├── ACNet ├── ACComponents │ ├── ACChemUtils.py │ ├── ACDataset │ │ ├── DataPreprocess.py │ │ ├── DataUtils.py │ │ ├── Dataset.py │ │ └── GenerateACDatasets.py │ ├── ACModels.py │ ├── ACProcessControllers.py │ └── ACSplitter.py ├── ACNetEnviron.yml ├── CMPNNLarge.py ├── CMPNNMedium.py ├── CMPNNSmall.py ├── ChemBERTFew.py ├── FPMLPFew.py ├── FPMLPLarge.py ├── FPMLPMedium.py ├── FPMLPMixRandom.py ├── FPMLPMixTarget.py ├── FPMLPSmall.py ├── GCNLarge.py ├── GCNMedium.py ├── GCNMixRandom.py ├── GCNMixTarget.py ├── GCNSmall.py ├── GINLarge.py ├── GINMedium.py ├── GINSmall.py ├── GROVERFew.py ├── GRULarge.py ├── GRUMedium.py ├── GRUMixRandom.py ├── GRUMixTarget.py ├── GRUSmall.py ├── GraphLoGFew.py ├── GraphormerLarge.py ├── GraphormerMedium.py ├── GraphormerMixRandom.py ├── GraphormerMixTarget.py ├── GraphormerSmall.py ├── LSTMLarge.py ├── LSTMMedium.py ├── LSTMSmall.py ├── MATFew.py ├── Models │ ├── BasicGNNs.py │ ├── CMPNN │ │ ├── CMPNNFeaturizer.py │ │ ├── CMPNNModel.py │ │ └── nn_utils.py │ ├── ClassifierModel.py │ └── Graphormer │ │ ├── Graphormer.py │ │ ├── algos.c │ │ ├── algos.cpython-37m-x86_64-linux-gnu.so │ │ ├── algos.cpython-38-x86_64-linux-gnu.so │ │ ├── algos.pyx │ │ ├── build │ │ ├── temp.linux-x86_64-3.7 │ │ │ └── algos.o │ │ └── temp.linux-x86_64-3.8 │ │ │ └── algos.o │ │ ├── collator.py │ │ ├── data.py │ │ ├── setup.py │ │ └── wrapper.py ├── Pretrain8Few.py ├── PretrainGNNsFew.py ├── SGCLarge.py ├── SGCMedium.py ├── SGCSmall.py ├── SMILESTransformerFew.py └── TrainingFramework │ ├── ChemUtils.py │ ├── Dataset.py │ ├── Evaluator.py │ ├── Featurizer.py │ ├── FileUtils.py │ ├── Initializer.py │ ├── Metrics.py │ ├── ProcessControllers.py │ ├── Scheduler.py │ ├── Splitter.py │ └── Utils.py ├── LICENSE └── README.md /ACNet/ACComponents/ACChemUtils.py: -------------------------------------------------------------------------------- 1 | import rdkit 2 | import rdkit.Chem as Chem 3 | from rdkit.Chem import AllChem 4 | from rdkit import DataStructs 5 | from rdkit.Chem import MACCSkeys 6 | from rdkit.Chem import AllChem 7 | from rdkit.Chem import Draw 8 | import numpy as np 9 | from TrainingFramework.ChemUtils import BasicChecker, GetMol 10 | 11 | class ACMolChecker(BasicChecker): 12 | def __init__(self, pair_wise=None): 13 | super(ACMolChecker, self).__init__() 14 | self.pair_wise = pair_wise 15 | 16 | def check(self, dataset): 17 | origin_dataset = dataset 18 | checked_dataset = [] 19 | discarded_dataset = [] 20 | for item in origin_dataset: 21 | if not self.pair_wise: 22 | smiles = item['SMILES'] 23 | mol = GetMol(smiles) 24 | if mol: 25 | checked_dataset.append(item) 26 | else: 27 | discarded_dataset.append(item) 28 | else: 29 | smiles1 = item['SMILES1'] 30 | smiles2 = item['SMILES2'] 31 | mol1 = GetMol(smiles1) 32 | mol2 = GetMol(smiles2) 33 | if mol1 and mol2: 34 | checked_dataset.append(item) 35 | else: 36 | discarded_dataset.append(item) 37 | assert len(checked_dataset) + len(discarded_dataset) == len(origin_dataset) 38 | print("Total num of origin dataset: ", len(origin_dataset)) 39 | print(len(checked_dataset), " molecules have passed check.") 40 | print(len(discarded_dataset), " molecules have been discarded.") 41 | print("Discarded molecules:") 42 | print(discarded_dataset) 43 | return checked_dataset 44 | 45 | class ACAttentiveFPChecker(BasicChecker): 46 | # Rules proposed in the source code of Attentive FP 47 | # To screen the samples that not satisfy the rules 48 | # more rules can be added. 49 | def __init__(self, max_atom_num, max_degree, pair_wise=None): 50 | super(ACAttentiveFPChecker, self).__init__() 51 | self.max_atom_num = max_atom_num 52 | self.max_degree = max_degree 53 | self.mol_error_flag = 0 54 | self.pair_wise = pair_wise 55 | 56 | def check(self, dataset): 57 | origin_dataset = dataset 58 | checked_dataset = [] 59 | discarded_dataset = [] 60 | for item in origin_dataset: 61 | if self.pair_wise: 62 | smiles1 = item['SMILES1'] 63 | smiles2 = item['SMILES2'] 64 | mol1 = GetMol(smiles1) 65 | mol2 = GetMol(smiles2) 66 | if mol1 and mol2: 67 | #self.check_single_bonds(mol) 68 | self.check_degree(mol1) 69 | self.check_degree(mol2) 70 | self.check_max_atom_num(mol1) 71 | self.check_max_atom_num(mol2) 72 | if self.mol_error_flag == 0: 73 | checked_dataset.append(item) 74 | else: 75 | discarded_dataset.append(item) 76 | self.mol_error_flag = 0 77 | else: 78 | discarded_dataset.append(item) 79 | self.mol_error_flag = 0 80 | else: 81 | smiles = item['SMILES'] 82 | mol = GetMol(smiles) 83 | #check 84 | if mol: 85 | #self.check_single_bonds(mol) 86 | self.check_degree(mol) 87 | self.check_max_atom_num(mol) 88 | if self.mol_error_flag == 0: 89 | checked_dataset.append(item) 90 | else: 91 | discarded_dataset.append(item) 92 | self.mol_error_flag = 0 93 | else: 94 | discarded_dataset.append(item) 95 | self.mol_error_flag = 0 96 | 97 | assert len(checked_dataset) + len(discarded_dataset) == len(origin_dataset) 98 | print("Total num of origin dataset: ", len(origin_dataset)) 99 | print(len(checked_dataset), " molecules has passed check.") 100 | print(len(discarded_dataset), " molecules has been discarded.") 101 | print("Discarded molecules:") 102 | print(discarded_dataset) 103 | return checked_dataset 104 | 105 | def check_degree(self, mol): 106 | for atom in mol.GetAtoms(): 107 | if atom.GetDegree() > self.max_degree: 108 | self.mol_error_flag = 1 109 | break 110 | 111 | def check_max_atom_num(self, mol): 112 | if len(mol.GetAtoms()) > self.max_atom_num: 113 | self.mol_error_flag = 1 114 | 115 | def check_single_bonds(self, mol): 116 | # check whether there is at least one single bond in the molecule 117 | # this check is not used in FraGAT 118 | self.mol_error_flag = 1 119 | for bond in mol.GetBonds(): 120 | if bond.GetBondType() == Chem.rdchem.BondType.SINGLE: 121 | if not bond.IsInRing(): 122 | self.mol_error_flag = 0 123 | break -------------------------------------------------------------------------------- /ACNet/ACComponents/ACDataset/DataPreprocess.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import numpy as np 4 | from ACComponents.ACDataset.DataUtils import Config, SaveJson, LoadJson 5 | import random 6 | 7 | OriginDatasetAddrAll = './data_files/raw_data/all_smiles_target.csv' 8 | OriginDatasetAddrPos = './data_files/raw_data/mmp_ac_s_distinct.csv' 9 | OriginDatasetAddrNeg = './data_files/raw_data/mmp_ac_s_neg_distinct.csv' 10 | 11 | GeneratedDatasetAddrAll = './data_files/generated_datasets/MMP_AC.json' 12 | GeneratedDatasetAddrLarge = './data_files/generated_datasets/MMP_AC_Large.json' 13 | GeneratedDatasetAddrMedium = './data_files/generated_datasets/MMP_AC_Medium.json' 14 | GeneratedDatasetAddrSmall = './data_files/generated_datasets/MMP_AC_Small.json' 15 | GeneratedDatasetAddrFew = './data_files/generated_datasets/MMP_AC_Few.json' 16 | 17 | DiscardedDatasetAddr = './data_files/generated_datasets/MMP_AC_Discarded.json' 18 | 19 | GeneratedDatasetAddrMixed = './data_files/generated_datasets/MMP_AC_Mixed.json' 20 | GeneratedDatasetAddrMixedScreened = './data_files/generated_datasets/MMP_AC_Mixed_Screened.json' 21 | 22 | 23 | 24 | def ReadACDatafile(AddrPos, AddrNeg): 25 | dfpos = pd.read_csv(AddrPos) 26 | dfneg = pd.read_csv(AddrNeg) 27 | 28 | targets = {} 29 | 30 | total_items1 = len(dfpos) 31 | total_items2 = len(dfneg) 32 | 33 | for i in range(total_items1): 34 | target = str(dfpos['tid'][i]) 35 | if target not in targets.keys(): 36 | targets.update({target:[]}) 37 | 38 | SMILES1 = dfpos['c1'][i] 39 | SMILES2 = dfpos['c2'][i] 40 | targets[target].append({'SMILES1': SMILES1, 'SMILES2': SMILES2, 'Value': '1'}) 41 | 42 | discard_cnt = 0 43 | valid_neg_cnt = 0 44 | for i in range(total_items2): 45 | target = str(dfneg['tid'][i]) 46 | if target in targets.keys(): 47 | SMILES1 = dfneg['c1'][i] 48 | SMILES2 = dfneg['c2'][i] 49 | targets[target].append({'SMILES1': SMILES1, 'SMILES2': SMILES2, 'Value': '0'}) 50 | valid_neg_cnt += 1 51 | if target not in targets.keys(): 52 | discard_cnt += 1 53 | 54 | print(f"Total positive count: {total_items1}") 55 | print(f"Total negative count: {total_items2}") 56 | print(f"Valid negative count: {valid_neg_cnt}") 57 | print(f"Discarded negative count: {discard_cnt}") 58 | 59 | return targets 60 | 61 | def RandomScreenNeg(dataset, config): 62 | screened_dataset = {} 63 | discarded_dataset = {} 64 | 65 | org_tot_cnt = 0 66 | allowed_ratio = config.pn_rate_threshold 67 | for target in dataset.keys(): 68 | print(f"Checking tid:{target}") 69 | subset = dataset[target] 70 | org_tot_cnt += len(subset) 71 | pos_set = [] 72 | neg_set = [] 73 | for item in subset: 74 | if item['Value'] == '1': 75 | pos_set.append(item) 76 | else: 77 | neg_set.append(item) 78 | pos_cnt = len(pos_set) 79 | neg_cnt = len(neg_set) 80 | print(f"Pos/Neg ratio: {pos_cnt/neg_cnt}.") 81 | 82 | if (pos_cnt / neg_cnt) > allowed_ratio: 83 | print(f"Allowed.") 84 | screened_dataset.update({target:subset}) 85 | else: 86 | print(f"Screening...") 87 | screened_subset = pos_set.copy() 88 | max_sample_num = int(pos_cnt / allowed_ratio) 89 | print(f"Pos cnt: {pos_cnt}.") 90 | random.seed(config.random_sample_negative_seed) 91 | random.shuffle(neg_set) 92 | chosen_neg = neg_set[:max_sample_num] 93 | print(f"Randomly chosen: {len(chosen_neg)}") 94 | discarded_subset = neg_set[max_sample_num:] 95 | print(f"Discard: {len(discarded_subset)}") 96 | screened_subset.extend(chosen_neg) 97 | print(f"subset after screening: {len(screened_subset)}") 98 | print(f"ratio after screening: {len(pos_set) / len(chosen_neg)}.") 99 | screened_dataset.update({target:screened_subset}) 100 | discarded_dataset.update({target:discarded_subset}) 101 | 102 | print(f"Dataset after screening: {len(screened_dataset)}") 103 | tot_cnt = 0 104 | dis_cnt = 0 105 | for key in screened_dataset.keys(): 106 | subset = screened_dataset[key] 107 | tot_cnt+=len(subset) 108 | for key in discarded_dataset.keys(): 109 | subset = discarded_dataset[key] 110 | dis_cnt+=len(subset) 111 | print(f"Number of samples reserved:{tot_cnt}") 112 | print(f"Number of samples discarded:{dis_cnt}") 113 | assert (tot_cnt + dis_cnt) == org_tot_cnt 114 | 115 | return screened_dataset, discarded_dataset 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | def SubsetNumDistribution(dataset): 125 | cnt = 0 126 | cnt_distribution = [] 127 | cnt_distribution_tid = [] 128 | for idx, tid in enumerate(dataset): 129 | item = dataset[(str(tid))] 130 | cnt_distribution_tid.append(tid) 131 | cnt_distribution.append(len(item)) 132 | cnt += len(item) 133 | 134 | print(f"Total number of samples in the dataset: {cnt}") 135 | print(f"Size of all Subsets in the dataset: {cnt_distribution}") 136 | print(f"The tid of all subsets in the dataset: {cnt_distribution_tid}") 137 | print(f"The maximum size of subsets: {max(cnt_distribution)}") 138 | print(f"The minimum size of subsets: {min(cnt_distribution)}") 139 | 140 | return cnt, cnt_distribution, cnt_distribution_tid 141 | 142 | def SplitSubsetsByCnt(dataset, cnt_distribution, cnt_distribution_tid, config): 143 | cnt_distribution = np.array(cnt_distribution) 144 | 145 | large_thres = config.large_thres 146 | medium_thres = config.medium_thres 147 | small_thres = config.small_thres 148 | 149 | cnt_subset_large = np.where(cnt_distribution > large_thres)[0] 150 | cnt_subset_medium = np.where((cnt_distribution <= large_thres)&(cnt_distribution > medium_thres))[0] 151 | cnt_subset_small = np.where((cnt_distribution <= medium_thres)&(cnt_distribution > small_thres))[0] 152 | cnt_subset_few = np.where((cnt_distribution <= small_thres)&(cnt_distribution > 1))[0] 153 | 154 | print(f"The number of subsets in Large set is: {len(cnt_subset_large)}") 155 | print(f"The number of subsets in Medium set is: {len(cnt_subset_medium)}") 156 | print(f"The number of subsets in Small set is: {len(cnt_subset_small)}") 157 | print(f"The number of subsets in Few set is: {len(cnt_subset_few)}") 158 | 159 | subset_large = {} 160 | subset_medium = {} 161 | subset_small = {} 162 | subset_few = {} 163 | 164 | for i in range(len(cnt_subset_large)): 165 | loc = cnt_subset_large[i] 166 | tid = cnt_distribution_tid[loc] 167 | item = dataset[tid] 168 | subset_large.update({tid: item}) 169 | SaveJson(GeneratedDatasetAddrLarge, subset_large) 170 | 171 | 172 | for i in range(len(cnt_subset_medium)): 173 | loc = cnt_subset_medium[i] 174 | tid = cnt_distribution_tid[loc] 175 | item = dataset[tid] 176 | subset_medium.update({tid: item}) 177 | SaveJson(GeneratedDatasetAddrMedium, subset_medium) 178 | 179 | for i in range(len(cnt_subset_small)): 180 | loc = cnt_subset_small[i] 181 | tid = cnt_distribution_tid[loc] 182 | item = dataset[tid] 183 | subset_small.update({tid: item}) 184 | SaveJson(GeneratedDatasetAddrSmall, subset_small) 185 | 186 | for i in range(len(cnt_subset_few)): 187 | loc = cnt_subset_few[i] 188 | tid = cnt_distribution_tid[loc] 189 | item = dataset[tid] 190 | subset_few.update({tid: item}) 191 | SaveJson(GeneratedDatasetAddrFew, subset_few) 192 | 193 | def ScreenFewPosSubsets(dataset, config): 194 | screened_dataset = dataset.copy() 195 | discarded_subsets = {} 196 | few_pos_threshold = config.few_pos_threshold 197 | 198 | for idx, tid in enumerate(dataset): 199 | print(f"Checking subset of target {tid}") 200 | item = dataset[str(tid)] 201 | subset_num = len(item) 202 | print(f"Total num of samples of this target: {subset_num}") 203 | pos_cnt = 0 204 | for i in range(subset_num): 205 | sample = item[i] 206 | if sample['Value'] == '1': 207 | pos_cnt += 1 208 | 209 | print(f"Total positive sample num {pos_cnt}") 210 | if pos_cnt < few_pos_threshold: 211 | print(f"Discard this subset.") 212 | screened_dataset.pop(str(tid)) 213 | discarded_subsets.update({str(tid): item}) 214 | 215 | assert len(screened_dataset) + len(discarded_subsets) == len(dataset) 216 | return screened_dataset, discarded_subsets 217 | 218 | def ScreenImbalancedSubsets(dataset, config): 219 | screened_dataset = dataset.copy() 220 | discarded_subsets = {} 221 | pn_rate_threshold = config.pn_rate_threshold 222 | 223 | for idx, tid in enumerate(dataset): 224 | print(f"Checking subset of target {tid}") 225 | item = dataset[str(tid)] 226 | subset_size = len(item) 227 | print(f"Total num of samples of this target: {subset_size}") 228 | 229 | pos_cnt = 0 230 | neg_cnt = 0 231 | for i in range(subset_size): 232 | sample = item[i] 233 | if sample['Value'] == '1': 234 | pos_cnt += 1 235 | elif sample['Value'] == '0': 236 | neg_cnt += 1 237 | else: 238 | raise ValueError( 239 | f'Wrong Value of target {tid} and sample {sample} with idx {i}.' 240 | ) 241 | 242 | rate = pos_cnt / neg_cnt 243 | print(f"Positive / Negative rate is: {rate}") 244 | 245 | if rate < pn_rate_threshold: 246 | print(f"Discard this subset.") 247 | screened_dataset.pop(str(tid)) 248 | discarded_subsets.update({str(tid): item}) 249 | 250 | assert len(screened_dataset) + len(discarded_subsets) == len(dataset) 251 | return screened_dataset, discarded_subsets 252 | 253 | def MixAllSubsets(dataset): 254 | mixed_dataset = {'All':[]} 255 | 256 | # total_targets_num = len(mixed_dataset) 257 | for idx, tid in enumerate(dataset): 258 | item = dataset[str(tid)] 259 | subset_size = len(item) 260 | 261 | for i in range(subset_size): 262 | sample = item[i] 263 | sample.update({'Target': tid}) 264 | mixed_dataset['All'].append(sample) 265 | 266 | return mixed_dataset 267 | 268 | def CheckConflictSamples(dataset): 269 | total_num = len(dataset) 270 | print(f"Total number of samples in mixed dataset is {total_num}.") 271 | 272 | MolPairDict = {} 273 | conflict_cnt = 0 274 | for item in dataset: 275 | smiles1 = item['SMILES1'] 276 | smiles2 = item['SMILES2'] 277 | molpair = smiles1 + '?' + smiles2 278 | molpair_rev = smiles2 + '?' + smiles1 279 | if (molpair not in MolPairDict.keys()) & (molpair_rev not in MolPairDict.keys()): 280 | MolPairDict.update({molpair: item}) 281 | else: 282 | if molpair in MolPairDict.keys(): 283 | conflict_molpair = molpair 284 | elif molpair_rev in MolPairDict.keys(): 285 | conflict_molpair = molpair_rev 286 | previous_value = MolPairDict[conflict_molpair]['Value'] 287 | current_value = item['Value'] 288 | if previous_value != current_value: 289 | print(f"Confilict encountered!") 290 | conflict_cnt += 1 291 | print(f"Previous conflict sample: {conflict_molpair} : {MolPairDict[conflict_molpair]}.") 292 | print(f"Current sample: {item}.") 293 | print(f"Total conflict sample number is {conflict_cnt}") 294 | 295 | def ScreenConflictSamples(dataset): 296 | total_num = len(dataset) 297 | print(f"Total number of samples in the mixed dataset is {total_num}.") 298 | 299 | MolPairDict = {} 300 | ScreenedDataset = [] 301 | MolPairIndexDict = {} 302 | ToBeScreenedMolPairList = [] 303 | discarded_cnt = 0 304 | repeated_cnt = 0 305 | for item in dataset: 306 | smiles1 = item['SMILES1'] 307 | smiles2 = item['SMILES2'] 308 | molpair = smiles1 + '?' + smiles2 309 | molpair_rev = smiles2 + '?' + smiles1 310 | 311 | if (molpair not in MolPairDict.keys()) & (molpair_rev not in MolPairDict.keys()): 312 | MolPairDict.update({molpair: item}) 313 | ScreenedDataset.append(item) 314 | MolPairIndexDict.update({molpair: len(ScreenedDataset)}) 315 | 316 | else: 317 | repeated_cnt += 1 318 | if molpair in MolPairDict.keys(): 319 | conflict_molpair = molpair 320 | elif molpair_rev in MolPairDict.keys(): 321 | conflict_molpair = molpair_rev 322 | previous_value = MolPairDict[conflict_molpair]['Value'] 323 | current_value = item['Value'] 324 | 325 | if previous_value != current_value: 326 | # previous_index = MolPairIndexDict[conflict_molpair] 327 | # previous_item = ScreenedDataset[previous_index] 328 | # previous_molpair = previous_item['SMILES1'] + '?' + previous_item['SMILES2'] 329 | # assert previous_molpair == conflict_molpair 330 | # ScreenedDataset.pop(previous_index) 331 | if MolPairDict[conflict_molpair] not in ToBeScreenedMolPairList: 332 | ToBeScreenedMolPairList.append(MolPairDict[conflict_molpair]) 333 | discarded_cnt += 1 334 | 335 | for item in ToBeScreenedMolPairList: 336 | try: 337 | ScreenedDataset.remove(item) 338 | except: 339 | print(f"{item} have been removed before.") 340 | 341 | print(f"Total repeated sample number is {repeated_cnt}.") 342 | print(f"Total discarded sample number is {discarded_cnt}.") 343 | print(f"Total to be screened sample number is {len(ToBeScreenedMolPairList)}.") 344 | print(f"Remained sample number is {len(ScreenedDataset)}.") 345 | 346 | return ScreenedDataset 347 | 348 | 349 | #################################### 350 | 351 | 352 | def ACDatasetPreprocess(config): 353 | if not os.path.exists(GeneratedDatasetAddrAll): 354 | dataset = ReadACDatafile(OriginDatasetAddrPos, OriginDatasetAddrNeg) 355 | discarded_dataset = {} 356 | if config.discard_few_pos: 357 | screened_dataset, discarded_dataset1 = ScreenFewPosSubsets(dataset, config) 358 | discarded_dataset.update(discarded_dataset1) 359 | if config.random_sample_negative: 360 | screened_dataset, discarded_dataset2 = RandomScreenNeg(screened_dataset, config) 361 | discarded_dataset.update(discarded_dataset2) 362 | if config.discard_extreme_imbalance: 363 | screened_dataset, discarded_dataset3 = ScreenImbalancedSubsets(screened_dataset, config) 364 | discarded_dataset.update(discarded_dataset2) 365 | 366 | SaveJson(GeneratedDatasetAddrAll, screened_dataset) 367 | SaveJson(DiscardedDatasetAddr, discarded_dataset) 368 | 369 | dataset = screened_dataset 370 | 371 | else: 372 | dataset = LoadJson(GeneratedDatasetAddrAll) 373 | 374 | print(f'Total targets(subsets) of the dataset: {len(dataset)}') 375 | 376 | cnt, cnt_distribution, cnt_distribution_tid = SubsetNumDistribution(dataset) 377 | 378 | SplitSubsetsByCnt(dataset, cnt_distribution, cnt_distribution_tid, config) 379 | 380 | if config.mixed: 381 | mixed_dataset = MixAllSubsets(dataset) 382 | print(f"Total number of samples in the mixed dataset is {len(mixed_dataset['All'])}") 383 | SaveJson(GeneratedDatasetAddrMixed, mixed_dataset) 384 | CheckConflictSamples(mixed_dataset['All']) 385 | screened_mixed_dataset = {'All': []} 386 | screened_mixed_dataset['All'] = ScreenConflictSamples(mixed_dataset['All']) 387 | SaveJson(GeneratedDatasetAddrMixedScreened, screened_mixed_dataset) 388 | 389 | 390 | 391 | -------------------------------------------------------------------------------- /ACNet/ACComponents/ACDataset/DataUtils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | import pandas as pd 4 | 5 | 6 | class Config(object): 7 | def __init__(self): 8 | super(Config, self).__init__() 9 | self.mixed = True 10 | self.random_sample_negative = False 11 | self.random_sample_negative_seed = 8 12 | self.discard_extreme_imbalance = False # Discard the subsets that are extremely imbalanced. Default False. 13 | self.pn_rate_threshold = 0.2 # The threshold of Pos/Neg for extremely imbalanced subsets. Default 0.1. 14 | self.discard_few_pos = True # Discard the subsets that have only few positive samples. Default True. 15 | self.few_pos_threshold = 10 # The threshold to identify few positive 16 | self.large_thres = 20000 17 | self.medium_thres = 1000 18 | self.small_thres = 100 19 | 20 | 21 | 22 | 23 | def SaveJson(Addr, object): 24 | with open(Addr, 'w') as f: 25 | json.dump(object,f) 26 | 27 | def LoadJson(Addr): 28 | with open(Addr, 'r') as f: 29 | content = json.load(f) 30 | return content -------------------------------------------------------------------------------- /ACNet/ACComponents/ACDataset/GenerateACDatasets.py: -------------------------------------------------------------------------------- 1 | import ACComponents.ACDataset.DataPreprocess as DP 2 | import ACComponents.ACDataset.DataUtils as DU 3 | 4 | cuf_config = DU.Config() 5 | DP.ACDatasetPreprocess(cuf_config) -------------------------------------------------------------------------------- /ACNet/ACComponents/ACModels.py: -------------------------------------------------------------------------------- 1 | import torch as t 2 | import torch.nn as nn 3 | from sklearn import svm 4 | from Models.CMPNN.CMPNNModel import * 5 | from Models.BasicGNNs import * 6 | from Models.Graphormer.Graphormer import Graphormer 7 | from Models.ClassifierModel import DNN 8 | 9 | 10 | class ACPredMLP(nn.Module): 11 | def __init__(self, opt): 12 | super(ACPredMLP, self).__init__() 13 | self.opt = opt 14 | # todo(zqzhang): updated in ACv8 15 | if self.opt.args['Feature'] == 'FP': 16 | self.input_size = self.opt.args['nBits'] 17 | elif self.opt.args['Feature'] == 'Raw': 18 | self.input_size = self.opt.args['RawFeatureSize'] 19 | self.Classifier = DNN( 20 | input_size = 2 * self.input_size, 21 | output_size = self.opt.args['OutputSize'], 22 | layer_sizes = self.opt.args['DNNLayers'], 23 | opt = self.opt 24 | ) 25 | 26 | def forward(self, Input): 27 | Input1, Input2 = Input 28 | Input1 = Input1.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu')) 29 | Input2 = Input2.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu')) 30 | # print("Input1:") 31 | # print(Input1) 32 | # print(Input1.size()) 33 | # print('Input2:') 34 | # print(Input2) 35 | # print(Input2.size()) 36 | # [batch_size, nBits] 37 | PairwiseMolFeature = t.cat([Input1, Input2], dim=1) 38 | # print("PairwiseFeature:") 39 | # print(PairwiseMolFeature) 40 | # print(PairwiseMolFeature.size()) 41 | prediction = self.Classifier(PairwiseMolFeature) 42 | # print("Prediction:") 43 | # print(prediction) 44 | # print(prediction.size()) 45 | 46 | return prediction 47 | 48 | class ACPredLSTM(nn.Module): 49 | def __init__(self, opt): 50 | super(ACPredLSTM, self).__init__() 51 | self.opt = opt 52 | self.WordEmbed = nn.Embedding(self.opt.args['MaxDictLength'], 53 | self.opt.args['FPSize'], 54 | padding_idx = self.opt.args['MaxDictLength']-1) 55 | self.MolFeatureExtractor = nn.LSTM(input_size = self.opt.args['FPSize'], 56 | hidden_size = self.opt.args['FPSize'], 57 | num_layers = self.opt.args['LSTMLayers'], 58 | batch_first = True, 59 | bidirectional = True 60 | 61 | ) 62 | self.Classifier = DNN( 63 | input_size = 2*self.opt.args['FPSize'], 64 | output_size = self.opt.args['OutputSize'], 65 | layer_sizes = self.opt.args['DNNLayers'], 66 | opt = self.opt 67 | ) 68 | 69 | def forward(self, Input): 70 | Input1, Input2 = Input 71 | Input1 = Input1.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu')) 72 | Input2 = Input2.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu')) 73 | Embed1 = self.WordEmbed(Input1) 74 | Embed2 = self.WordEmbed(Input2) 75 | _, (MolFeature1,_) = self.MolFeatureExtractor(Embed1) 76 | _, (MolFeature2,_) = self.MolFeatureExtractor(Embed2) 77 | # print(MolFeature1) 78 | # print(MolFeature1.size()) 79 | # MolFeature: [ LSTMLayer*Bi, Batch_size, FP_size] 80 | MolFeature1 = MolFeature1.permute(1,0,2) 81 | MolFeature2 = MolFeature2.permute(1,0,2) 82 | # MolFeature: [Batch_size, LSMTLayer*Bi, FP_size] 83 | MolFeature1 = MolFeature1.sum(dim=1) 84 | MolFeature2 = MolFeature2.sum(dim=1) 85 | # MolFeature: [Batch_size, FP_size] 86 | PairwiseMolFeature = t.cat([MolFeature1,MolFeature2],dim=1) 87 | prediction = self.Classifier(PairwiseMolFeature) 88 | return prediction 89 | 90 | class ACPredGRU(nn.Module): 91 | def __init__(self, opt): 92 | super(ACPredGRU, self).__init__() 93 | self.opt = opt 94 | self.WordEmbed = nn.Embedding(self.opt.args['MaxDictLength'], 95 | self.opt.args['FPSize'], 96 | padding_idx = self.opt.args['MaxDictLength'] - 1) 97 | self.MolFeatureExtractor = nn.GRU(input_size = self.opt.args['FPSize'], 98 | hidden_size = self.opt.args['FPSize'], 99 | num_layers = self.opt.args['GRULayers'], 100 | batch_first = True, 101 | bidirectional = True) 102 | self.Classifier = DNN( 103 | input_size = 2 * self.opt.args['FPSize'], 104 | output_size = self.opt.args['OutputSize'], 105 | layer_sizes = self.opt.args['DNNLayers'], 106 | opt = self.opt 107 | ) 108 | 109 | def forward(self, Input): 110 | Input1, Input2 = Input 111 | Input1 = Input1.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu')) 112 | Input2 = Input2.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu')) 113 | Embed1 = self.WordEmbed(Input1) 114 | Embed2 = self.WordEmbed(Input2) 115 | _, MolFeature1 = self.MolFeatureExtractor(Embed1) 116 | _, MolFeature2 = self.MolFeatureExtractor(Embed2) 117 | # print(MolFeature1) 118 | # print(MolFeature1.size()) 119 | # MolFeature: [ GRULayer*Bi, Batch_size, FP_size] 120 | MolFeature1 = MolFeature1.permute(1, 0, 2) 121 | MolFeature2 = MolFeature2.permute(1, 0, 2) 122 | # MolFeature: [Batch_size, GRULayer*Bi, FP_size] 123 | MolFeature1 = MolFeature1.sum(dim = 1) 124 | MolFeature2 = MolFeature2.sum(dim = 1) 125 | # MolFeature: [Batch_size, FP_size] 126 | PairwiseMolFeature = t.cat([MolFeature1, MolFeature2], dim = 1) 127 | prediction = self.Classifier(PairwiseMolFeature) 128 | return prediction 129 | 130 | class ACPredCMPNN(nn.Module): 131 | def __init__(self, opt): 132 | super(ACPredCMPNN, self).__init__() 133 | self.opt = opt 134 | self.MolFeatureExtractor = CMPNNModel( 135 | self.opt.args['dataset_type']=='classification', 136 | self.opt.args['dataset_type']=='multiclass', 137 | opt = self.opt) 138 | self.Classifier = DNN( 139 | input_size = 2 * self.opt.args['FPSize'], 140 | output_size = self.opt.args['OutputSize'], 141 | layer_sizes = self.opt.args['DNNLayers'], 142 | opt = self.opt 143 | ) 144 | 145 | def forward(self, Input): 146 | Input1, Input2 = Input 147 | MolFeature1 = self.MolFeatureExtractor(Input1) 148 | MolFeature2 = self.MolFeatureExtractor(Input2) 149 | # print(f"size of Mol1 and Mol2: {MolFeature1.size()}") 150 | PairwiseMolFeature = t.cat([MolFeature1,MolFeature2],dim=1) 151 | # print(f'size of PairwiseMolFeature: {PairwiseMolFeature.size()}') 152 | prediction = self.Classifier(PairwiseMolFeature) 153 | 154 | return prediction 155 | 156 | class ACPredGCN(nn.Module): 157 | def __init__(self, opt): 158 | super(ACPredGCN, self).__init__() 159 | self.opt = opt 160 | if not self.opt.args['PyG']: 161 | print(f"PyG arg should be {True}") 162 | raise ValueError 163 | self.MolFeatureExtractor = PyGGCN(self.opt, FeatureExtractor = True) 164 | self.Classifier = DNN( 165 | input_size = 2 *self.opt.args['FPSize'], 166 | output_size = self.opt.args['OutputSize'], 167 | layer_sizes = self.opt.args['DNNLayers'], 168 | opt = self.opt 169 | ) 170 | 171 | def forward(self, Input): 172 | self.reset_batch(Input) 173 | Input = Input.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu')) 174 | # print(f"Input.batch: {Input.batch}") 175 | MolEmbeddings = self.MolFeatureExtractor(Input) 176 | MolEmbeddings = self.decompose_mol_pair(MolEmbeddings) 177 | 178 | prediction = self.Classifier(MolEmbeddings) 179 | return prediction 180 | 181 | def reset_batch(self, Input): 182 | batch = Input.batch 183 | atom_nums = Input.atom_num 184 | bond_nums = Input.bond_num 185 | MolNum = len(atom_nums) 186 | # print(f"batch: {batch}") 187 | # print(f"atom_nums: {atom_nums}") 188 | # print(f"MolNum: {MolNum}") 189 | # print(f"len batch: {len(batch)}") 190 | # print(f"sum atom_nums: {t.sum(atom_nums)}") 191 | assert len(batch) == t.sum(atom_nums) 192 | 193 | # reset batch by atom num 194 | mol_cnt = 0 195 | mol_batch = t.Tensor([]) 196 | for i in range(MolNum): 197 | tmp = t.Tensor([mol_cnt]) 198 | tmp = tmp.repeat(atom_nums[i].item()) 199 | assert len(tmp) == atom_nums[i] 200 | mol_batch = t.cat([mol_batch, tmp]).long() 201 | mol_cnt += 1 202 | Input.batch = mol_batch 203 | 204 | def decompose_mol_pair(self, MolEmbeddings): 205 | # print(f"MolEmbedding size: {MolEmbeddings.size()}") 206 | mol_num = MolEmbeddings.size()[0] 207 | EmbLength = MolEmbeddings.size()[1] 208 | assert mol_num % 2 == 0 209 | return MolEmbeddings.view(int(mol_num/2), int(EmbLength*2)) 210 | 211 | class ACPredGIN(nn.Module): 212 | def __init__(self, opt): 213 | super(ACPredGIN, self).__init__() 214 | self.opt = opt 215 | if not self.opt.args['PyG']: 216 | print(f"PyG arg should be {True}") 217 | raise ValueError 218 | self.MolFeatureExtractor = PyGGIN(self.opt, FeatureExtractor = True) 219 | self.Classifier = DNN( 220 | input_size = 2 *self.opt.args['FPSize'], 221 | output_size = self.opt.args['OutputSize'], 222 | layer_sizes = self.opt.args['DNNLayers'], 223 | opt = self.opt 224 | ) 225 | 226 | def forward(self, Input): 227 | self.reset_batch(Input) 228 | Input = Input.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu')) 229 | # print(f"Input.batch: {Input.batch}") 230 | MolEmbeddings = self.MolFeatureExtractor(Input) 231 | MolEmbeddings = self.decompose_mol_pair(MolEmbeddings) 232 | 233 | prediction = self.Classifier(MolEmbeddings) 234 | return prediction 235 | 236 | def reset_batch(self, Input): 237 | batch = Input.batch 238 | atom_nums = Input.atom_num 239 | bond_nums = Input.bond_num 240 | MolNum = len(atom_nums) 241 | # print(f"batch: {batch}") 242 | # print(f"atom_nums: {atom_nums}") 243 | # print(f"MolNum: {MolNum}") 244 | # print(f"len batch: {len(batch)}") 245 | # print(f"sum atom_nums: {t.sum(atom_nums)}") 246 | assert len(batch) == t.sum(atom_nums) 247 | 248 | # reset batch by atom num 249 | mol_cnt = 0 250 | mol_batch = t.Tensor([]) 251 | for i in range(MolNum): 252 | tmp = t.Tensor([mol_cnt]) 253 | tmp = tmp.repeat(atom_nums[i].item()) 254 | assert len(tmp) == atom_nums[i] 255 | mol_batch = t.cat([mol_batch, tmp]).long() 256 | mol_cnt += 1 257 | Input.batch = mol_batch 258 | 259 | def decompose_mol_pair(self, MolEmbeddings): 260 | # print(f"MolEmbedding size: {MolEmbeddings.size()}") 261 | mol_num = MolEmbeddings.size()[0] 262 | EmbLength = MolEmbeddings.size()[1] 263 | assert mol_num % 2 == 0 264 | return MolEmbeddings.view(int(mol_num/2), int(EmbLength*2)) 265 | 266 | class ACPredSGC(nn.Module): 267 | def __init__(self, opt): 268 | super(ACPredSGC, self).__init__() 269 | self.opt = opt 270 | if not self.opt.args['PyG']: 271 | print(f"PyG arg should be {True}") 272 | raise ValueError 273 | self.MolFeatureExtractor = PyGSGC(self.opt, FeatureExtractor = True) 274 | self.Classifier = DNN( 275 | input_size = 2 *self.opt.args['FPSize'], 276 | output_size = self.opt.args['OutputSize'], 277 | layer_sizes = self.opt.args['DNNLayers'], 278 | opt = self.opt 279 | ) 280 | 281 | def forward(self, Input): 282 | self.reset_batch(Input) 283 | Input = Input.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu')) 284 | # print(f"Input.batch: {Input.batch}") 285 | MolEmbeddings = self.MolFeatureExtractor(Input) 286 | MolEmbeddings = self.decompose_mol_pair(MolEmbeddings) 287 | 288 | prediction = self.Classifier(MolEmbeddings) 289 | return prediction 290 | 291 | def reset_batch(self, Input): 292 | batch = Input.batch 293 | atom_nums = Input.atom_num 294 | bond_nums = Input.bond_num 295 | MolNum = len(atom_nums) 296 | # print(f"batch: {batch}") 297 | # print(f"atom_nums: {atom_nums}") 298 | # print(f"MolNum: {MolNum}") 299 | # print(f"len batch: {len(batch)}") 300 | # print(f"sum atom_nums: {t.sum(atom_nums)}") 301 | assert len(batch) == t.sum(atom_nums) 302 | 303 | # reset batch by atom num 304 | mol_cnt = 0 305 | mol_batch = t.Tensor([]) 306 | for i in range(MolNum): 307 | tmp = t.Tensor([mol_cnt]) 308 | tmp = tmp.repeat(atom_nums[i].item()) 309 | assert len(tmp) == atom_nums[i] 310 | mol_batch = t.cat([mol_batch, tmp]).long() 311 | mol_cnt += 1 312 | Input.batch = mol_batch 313 | 314 | def decompose_mol_pair(self, MolEmbeddings): 315 | # print(f"MolEmbedding size: {MolEmbeddings.size()}") 316 | mol_num = MolEmbeddings.size()[0] 317 | EmbLength = MolEmbeddings.size()[1] 318 | assert mol_num % 2 == 0 319 | return MolEmbeddings.view(int(mol_num/2), int(EmbLength*2)) 320 | 321 | class ACPredGraphormer(nn.Module): 322 | def __init__(self, opt): 323 | super(ACPredGraphormer, self).__init__() 324 | self.opt = opt 325 | self.MolFeatureExtractor = Graphormer( 326 | num_encoder_layers = self.opt.args['num_encoder_layers'], 327 | num_attention_heads = self.opt.args['num_attention_heads'], 328 | embedding_dim = self.opt.args['embedding_dim'], 329 | dropout_rate = self.opt.args['dropout_rate'], 330 | intput_dropout_rate = self.opt.args['intput_dropout_rate'], 331 | ffn_dim = self.opt.args['ffn_dim'], 332 | edge_type = self.opt.args['edge_type'], 333 | multi_hop_max_dist = self.opt.args['multi_hop_max_dist'], 334 | attention_dropout_rate = self.opt.args['attention_dropout_rate'], 335 | flag = self.opt.args['flag'], 336 | opt = self.opt, 337 | mode = 'Extractor' 338 | ) 339 | self.Classifier = DNN( 340 | input_size = 2 * self.opt.args['embedding_dim'], 341 | output_size = self.opt.args['OutputSize'], 342 | layer_sizes = self.opt.args['DNNLayers'], 343 | opt = self.opt 344 | ) 345 | 346 | def forward(self, Input): 347 | Input1, Input2 = Input 348 | MolFeature1 = self.MolFeatureExtractor(Input1) 349 | MolFeature2 = self.MolFeatureExtractor(Input2) 350 | PairwiseMolFeature = t.cat([MolFeature1, MolFeature2], dim=1) 351 | prediction = self.Classifier(PairwiseMolFeature) 352 | 353 | return prediction 354 | 355 | -------------------------------------------------------------------------------- /ACNet/ACComponents/ACSplitter.py: -------------------------------------------------------------------------------- 1 | from TrainingFramework.Splitter import * 2 | 3 | 4 | class TargetSplitter(BasicSplitter): 5 | def __init__(self): 6 | super(TargetSplitter, self).__init__() 7 | 8 | def split(self, dataset, opt): 9 | rate = opt.args['SplitRate'] 10 | validseed = opt.args['SplitValidSeed'] 11 | testseed = opt.args['SplitTestSeed'] 12 | total_num = len(dataset) 13 | if total_num == 1: 14 | dataset = dataset[0] 15 | total_num = len(dataset) 16 | 17 | tarid2size, tarid2sample = self.GetTargetidList(dataset) 18 | tarids = tarid2size.keys() 19 | 20 | # calculate the splitting thres 21 | if len(rate) == 1: 22 | assert rate[0] < 1 23 | train_num = int(total_num * rate[0]) 24 | valid_num = total_num - train_num 25 | elif len(rate) == 2: 26 | assert rate[0] + rate[1] < 1 27 | train_num = int(total_num * rate[0]) 28 | valid_num = int(total_num * rate[1]) 29 | test_num = total_num - train_num - valid_num 30 | else: 31 | print("Wrong splitting rate") 32 | raise RuntimeError 33 | 34 | if len(rate) == 1: 35 | sample_size = int(len(tarids) * (1-rate[0])) 36 | validtargets, chosen_cnt = self.BinaryClassSample(tarid2size, tarids, sample_size, valid_num, validseed) 37 | validset, valididx = self.Target2Samples(validtargets,tarid2sample) 38 | assert len(validset) == chosen_cnt 39 | traintargets = self.excludedtargets(validtargets, tarids) 40 | trainset, trainidx = self.Target2Samples(traintargets, tarid2sample) 41 | assert len(validset) + len(trainset) == total_num 42 | return (trainset, validset), (trainidx, valididx) 43 | elif len(rate) == 2: 44 | sample_size = int(len(tarids) * (1-rate[0]-rate[1])) 45 | testtargets, chosen_cnt = self.BinaryClassSample(tarid2size, tarids, sample_size, test_num, testseed) 46 | testset, testidx = self.Target2Samples(testtargets, tarid2sample) 47 | assert len(testset) == chosen_cnt 48 | remained_tarids = self.excludedtargets(testtargets, tarids) 49 | sample_size = int(len(tarids) * rate[1]) 50 | validtargets, chosen_cnt = self.BinaryClassSample(tarid2size, remained_tarids, sample_size, valid_num, validseed) 51 | validset, valididx = self.Target2Samples(validtargets, tarid2sample) 52 | assert len(validset) == chosen_cnt 53 | traintargets = self.excludedtargets(validtargets, remained_tarids) 54 | trainset, trainidx = self.Target2Samples(traintargets, tarid2sample) 55 | assert len(validset)+len(testset)+len(trainset) == total_num 56 | return (trainset, validset, testset), (trainidx, valididx, testidx) 57 | 58 | def BinaryClassSample(self, tarid2size, tarids, sample_size, optimal_count, seed): 59 | 60 | count = 0 61 | tried_times = 0 62 | error_rate = 0.1 63 | 64 | 65 | while (count < optimal_count * (1-error_rate)) or (count > optimal_count * (1+error_rate)): 66 | tried_times += 1 67 | 68 | if tried_times % 5000 == 0: 69 | print("modify error rate.") 70 | error_rate += 0.05 71 | print("modify sample target number.") 72 | sample_size = int(sample_size * 1.1) 73 | assert sample_size < len(tarids) 74 | 75 | seed += 1 76 | random.seed(seed) 77 | chosen_targets = random.sample(tarids, sample_size) 78 | count = sum([tarid2size[target] for target in chosen_targets]) 79 | 80 | print(f"Sample num: {count}") 81 | print(f"Tried times: {tried_times}") 82 | print(f"Available seed: {seed}") 83 | 84 | 85 | return chosen_targets, count 86 | 87 | def Target2Samples(self, chosen_targets, tarid2sample): 88 | set = [] 89 | for targetid in chosen_targets: 90 | targetset = tarid2sample[targetid] 91 | set.extend(targetset) 92 | idx = [] 93 | for item in set: 94 | id = item['idx'] 95 | idx.append(id) 96 | return set, idx 97 | 98 | def excludedtargets(self, chosen_targets, tarids): 99 | excluded_targets = [] 100 | for target in tarids: 101 | if target not in chosen_targets: 102 | excluded_targets.append(target) 103 | return excluded_targets 104 | 105 | def GetTargetidList(self, dataset): 106 | tarid2size = {} 107 | tarid2sample = {} 108 | for item in dataset: 109 | tarid = item['Target'] 110 | if tarid not in tarid2size.keys(): 111 | tarid2size.update({tarid: 0}) 112 | tarid2sample.update({tarid: []}) 113 | tarid2size[tarid] += 1 114 | tarid2sample[tarid].append(item) 115 | return tarid2size, tarid2sample 116 | 117 | 118 | def verification(sets, opt): 119 | rate = opt.args['SplitRate'] 120 | if len(rate)==1: 121 | trainset, validset = sets 122 | testset = None 123 | elif len(rate) == 2: 124 | trainset, validset, testset = sets 125 | 126 | train_targets = [] 127 | valid_targets = [] 128 | test_targets = [] 129 | 130 | for item in trainset: 131 | target = item['Target'] 132 | if target not in train_targets: 133 | train_targets.append(target) 134 | 135 | for item in validset: 136 | target = item['Target'] 137 | if target not in valid_targets: 138 | valid_targets.append(target) 139 | 140 | if testset: 141 | for item in testset: 142 | target = item['Target'] 143 | if target not in test_targets: 144 | test_targets.append(target) 145 | 146 | # varify train and valid 147 | for target in train_targets: 148 | assert target not in valid_targets 149 | 150 | for target in valid_targets: 151 | assert target not in train_targets 152 | 153 | # verify train and test 154 | if testset: 155 | for target in train_targets: 156 | assert target not in test_targets 157 | for target in test_targets: 158 | assert target not in train_targets 159 | 160 | # verify valid and test 161 | for target in valid_targets: 162 | assert target not in test_targets 163 | for target in test_targets: 164 | assert target not in valid_targets 165 | 166 | print(f"Verification passed.") 167 | print(f"trainset target num: {len(train_targets)}") 168 | print(f"validset target num: {len(valid_targets)}") 169 | print(f"testset target num: {len(test_targets)}") 170 | -------------------------------------------------------------------------------- /ACNet/ACNetEnviron.yml: -------------------------------------------------------------------------------- 1 | name: MolGraphEnv-1.11 2 | channels: 3 | - pytorch 4 | - pyg 5 | - conda-forge 6 | - defaults 7 | dependencies: 8 | - _libgcc_mutex=0.1=main 9 | - _openmp_mutex=4.5=1_gnu 10 | - blas=1.0=mkl 11 | - boost=1.74.0=py38hc10631b_3 12 | - boost-cpp=1.74.0=h9359b55_0 13 | - brotlipy=0.7.0=py38h497a2fe_1001 14 | - bzip2=1.0.8=h7f98852_4 15 | - ca-certificates=2021.10.8=ha878542_0 16 | - cairo=1.16.0=hf32fb01_1 17 | - certifi=2021.10.8=py38h578d9bd_2 18 | - cffi=1.15.0=py38hd667e15_1 19 | - charset-normalizer=2.0.12=pyhd8ed1ab_0 20 | - colorama=0.4.4=pyh9f0ad1d_0 21 | - cryptography=35.0.0=py38ha5dfef3_0 22 | - cudatoolkit=11.3.1=h2bc3f7f_2 23 | - cycler=0.11.0=pyhd8ed1ab_0 24 | - cython=0.29.28=py38h295c915_0 25 | - decorator=4.4.2=py_0 26 | - ffmpeg=4.3.2=hca11adc_0 27 | - fontconfig=2.13.1=h6c09931_0 28 | - freetype=2.10.4=h0708190_1 29 | - glib=2.69.1=h4ff587b_1 30 | - gmp=6.2.1=h58526e2_0 31 | - gnutls=3.6.13=h85f3911_1 32 | - icu=67.1=he1b5a44_0 33 | - idna=3.3=pyhd8ed1ab_0 34 | - intel-openmp=2021.4.0=h06a4308_3561 35 | - jinja2=3.1.1=pyhd8ed1ab_0 36 | - joblib=1.1.0=pyhd8ed1ab_0 37 | - jpeg=9d=h7f8727e_0 38 | - kiwisolver=1.3.2=py38h295c915_0 39 | - lame=3.100=h7f98852_1001 40 | - ld_impl_linux-64=2.35.1=h7274673_9 41 | - libffi=3.3=he6710b0_2 42 | - libgcc-ng=9.3.0=h5101ec6_17 43 | - libgfortran-ng=7.5.0=h14aa051_20 44 | - libgfortran4=7.5.0=h14aa051_20 45 | - libgomp=9.3.0=h5101ec6_17 46 | - libiconv=1.16=h516909a_0 47 | - libpng=1.6.37=h21135ba_2 48 | - libstdcxx-ng=9.3.0=hd4cf53a_17 49 | - libtiff=4.0.10=hc3755c2_1005 50 | - libuuid=1.0.3=h7f8727e_2 51 | - libuv=1.40.0=h7b6447c_0 52 | - libxcb=1.14=h7b6447c_0 53 | - libxml2=2.9.10=h68273f3_2 54 | - littleutils=0.2.2=py_0 55 | - lz4-c=1.9.3=h9c3ff4c_1 56 | - markupsafe=2.0.1=py38h497a2fe_0 57 | - matplotlib-base=3.3.4=py38h0efea84_0 58 | - mkl=2021.4.0=h06a4308_640 59 | - mkl-service=2.4.0=py38h497a2fe_0 60 | - mkl_fft=1.3.1=py38hd3c417c_0 61 | - mkl_random=1.2.2=py38h1abd341_0 62 | - ncurses=6.3=h7f8727e_2 63 | - nettle=3.6=he412f7d_0 64 | - networkx=2.5.1=pyhd8ed1ab_0 65 | - numpy=1.21.2=py38h20f2e39_0 66 | - numpy-base=1.21.2=py38h79a1101_0 67 | - ogb=1.3.3=pyhd8ed1ab_0 68 | - olefile=0.46=pyh9f0ad1d_1 69 | - openh264=2.1.1=h780b84a_0 70 | - openssl=1.1.1n=h7f8727e_0 71 | - outdated=0.2.1=pyhd8ed1ab_0 72 | - packaging=21.3=pyhd8ed1ab_0 73 | - pandas=1.2.3=py38h51da96c_0 74 | - patsy=0.5.2=pyhd8ed1ab_0 75 | - pcre=8.45=h9c3ff4c_0 76 | - pillow=6.2.1=py38h6b7be26_0 77 | - pip=21.2.4=py38h06a4308_0 78 | - pixman=0.40.0=h36c2ea0_0 79 | - pycairo=1.19.1=py38h708ec4a_0 80 | - pycparser=2.21=pyhd8ed1ab_0 81 | - pyg=2.0.4=py38_torch_1.11.0_cu113 82 | - pyopenssl=22.0.0=pyhd8ed1ab_0 83 | - pyparsing=3.0.7=pyhd8ed1ab_0 84 | - pysocks=1.7.1=py38h578d9bd_5 85 | - python=3.8.13=h12debd9_0 86 | - python-dateutil=2.8.2=pyhd8ed1ab_0 87 | - python-louvain=0.15=pyhd8ed1ab_1 88 | - python_abi=3.8=2_cp38 89 | - pytorch=1.11.0=py3.8_cuda11.3_cudnn8.2.0_0 90 | - pytorch-cluster=1.6.0=py38_torch_1.11.0_cu113 91 | - pytorch-mutex=1.0=cuda 92 | - pytorch-scatter=2.0.9=py38_torch_1.11.0_cu113 93 | - pytorch-sparse=0.6.13=py38_torch_1.11.0_cu113 94 | - pytorch-spline-conv=1.2.1=py38_torch_1.11.0_cu113 95 | - pytz=2022.1=pyhd8ed1ab_0 96 | - pyyaml=5.4.1=py38h497a2fe_0 97 | - rdkit=2020.09.5=py38h2bca085_0 98 | - readline=8.1.2=h7f8727e_1 99 | - reportlab=3.5.68=py38hadf75a6_0 100 | - requests=2.27.1=pyhd8ed1ab_0 101 | - scikit-learn=1.0.2=py38h51133e4_1 102 | - scipy=1.7.3=py38hc147768_0 103 | - seaborn=0.11.2=hd8ed1ab_0 104 | - seaborn-base=0.11.2=pyhd8ed1ab_0 105 | - setuptools=58.0.4=py38h06a4308_0 106 | - six=1.16.0=pyh6c4a22f_0 107 | - sqlalchemy=1.3.23=py38h497a2fe_0 108 | - sqlite=3.38.2=hc218d9a_0 109 | - statsmodels=0.13.2=py38h7f8727e_0 110 | - threadpoolctl=3.1.0=pyh8a188c0_0 111 | - tk=8.6.11=h1ccaba5_0 112 | - torchaudio=0.11.0=py38_cu113 113 | - torchvision=0.12.0=py38_cu113 114 | - tornado=6.1=py38h497a2fe_1 115 | - tqdm=4.63.1=pyhd8ed1ab_0 116 | - typing_extensions=4.1.1=pyha770c72_0 117 | - urllib3=1.26.9=pyhd8ed1ab_0 118 | - wheel=0.37.1=pyhd3eb1b0_0 119 | - x264=1!161.3030=h7f98852_1 120 | - xz=5.2.5=h7b6447c_0 121 | - yacs=0.1.8=pyhd8ed1ab_0 122 | - yaml=0.2.5=h516909a_0 123 | - zlib=1.2.11=h7f8727e_4 124 | - zstd=1.4.9=ha95c52a_0 125 | - pip: 126 | - class-resolver==0.3.8 127 | prefix: /opt/conda/envs/MolGraphEnv-1.11 128 | -------------------------------------------------------------------------------- /ACNet/CMPNNLarge.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 3, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACLarge', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Large.json', 14 | 'RootPath': './TestExp/Large/CMPNN/', 15 | 'CUDA_VISIBLE_DEVICES': '2', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'CMPNN', 20 | 'Model': 'CMPNN', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': False, 26 | 27 | 'ValidRate': 40000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'ValidBalance': False, 31 | 'TestBalance': False, 32 | 'SplitRate': [0.8, 0.1], 33 | 'Splitter': 'Random', 34 | 'MaxEpoch': 300, 35 | 'LowerThanMaxLimit': 12, 36 | 'DecreasingLimit': 8, 37 | 38 | # if OnlyEval == True: 39 | 'EvalModelPath': None, 40 | 'EvalDatasetPath': None, 41 | 'EvalLogAllPreds': None, 42 | 43 | 'Scheduler': 'PolynomialDecayLR', 44 | # 'Scheduler': 'EmptyLRScheduler', 45 | 46 | 47 | # Params for PolynomialDecayLR only 48 | 'WarmupEpoch': 2, 49 | 'LRMaxEpoch':300, 50 | 'EndLR':1e-9, 51 | 'Power':1.0, 52 | # Params for StepLR only 53 | 'LRStep': 30, 54 | 'LRGamma': 0.1, 55 | ########## 56 | 57 | 'WeightIniter': None, 58 | 59 | # Params for NormWeightIniter only 60 | 'InitMean' : 0, 61 | 'InitStd' : 1, 62 | 63 | # Params for CMPNN only 64 | 'dataset_type': 'classification', 65 | 'activation': 'ReLU', 66 | 'ffn_num_layers':3, # useless for AC 67 | 'ffn_hidden_size': 300, # useless for AC 68 | 'no_cache': False, 69 | 'atom_messages': False, 70 | 'CommunicateKernel': 'Add', 71 | 'only_extract_feature': True, # True for AC 72 | 73 | 74 | # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here. 75 | 'SplitValidSeed': 8, 76 | 'SplitTestSeed': 8, 77 | 'BatchSize': 256, 78 | 79 | } 80 | AdjustableParamList = {} 81 | SpecificParamList = { 82 | 'DropRate':[0.2], 83 | 'WeightDecay':[4.5], 84 | 'lr':[3], 85 | 'FPSize': [128], 86 | 'CMPNNLayers': [3], 87 | 'DNNLayers':[[128]], 88 | } 89 | 90 | 91 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 92 | 93 | expcontroller.ExperimentStart() 94 | 95 | -------------------------------------------------------------------------------- /ACNet/CMPNNMedium.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 64, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACMedium', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Medium.json', 14 | 'RootPath': './TestExp/Medium/CMPNN/', 15 | 'CUDA_VISIBLE_DEVICES': '2', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'CMPNN', 20 | 'Model': 'CMPNN', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': False, 26 | 27 | 'ValidRate': 40000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'ValidBalance': False, 31 | 'TestBalance': False, 32 | 'SplitRate': [0.8, 0.1], 33 | 'Splitter': 'Random', 34 | 'MaxEpoch': 300, 35 | 'LowerThanMaxLimit': 12, 36 | 'DecreasingLimit': 8, 37 | 38 | # if OnlyEval == True: 39 | 'EvalModelPath': None, 40 | 'EvalDatasetPath': None, 41 | 'EvalLogAllPreds': None, 42 | 43 | 'Scheduler': 'PolynomialDecayLR', 44 | # 'Scheduler': 'EmptyLRScheduler', 45 | 46 | 47 | # Params for PolynomialDecayLR only 48 | 'WarmupEpoch': 2, 49 | 'LRMaxEpoch':300, 50 | 'EndLR':1e-9, 51 | 'Power':1.0, 52 | # Params for StepLR only 53 | 'LRStep': 30, 54 | 'LRGamma': 0.1, 55 | ########## 56 | 57 | 'WeightIniter': None, 58 | 59 | # Params for NormWeightIniter only 60 | 'InitMean' : 0, 61 | 'InitStd' : 1, 62 | 63 | # Params for CMPNN only 64 | 'dataset_type': 'classification', 65 | 'activation': 'ReLU', 66 | 'ffn_num_layers':3, # useless for AC 67 | 'ffn_hidden_size': 300, # useless for AC 68 | 'no_cache': False, 69 | 'atom_messages': False, 70 | 'CommunicateKernel': 'Add', 71 | 'only_extract_feature': True, # True for AC 72 | 73 | 74 | # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here. 75 | 'SplitValidSeed': 8, 76 | 'SplitTestSeed': 8, 77 | 'BatchSize': 256, 78 | 79 | } 80 | AdjustableParamList = {} 81 | SpecificParamList = { 82 | 'DropRate':[0.2], 83 | 'WeightDecay':[4.5], 84 | 'lr':[3], 85 | 'FPSize': [128], 86 | 'CMPNNLayers': [3], 87 | 'DNNLayers':[[128]], 88 | } 89 | 90 | 91 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 92 | 93 | expcontroller.ExperimentStart() 94 | 95 | -------------------------------------------------------------------------------- /ACNet/CMPNNSmall.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 110, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACSmall', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Small.json', 14 | 'RootPath': './TestExp/Small/CMPNN/', 15 | 'CUDA_VISIBLE_DEVICES': '2', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'CMPNN', 20 | 'Model': 'CMPNN', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': False, 26 | 27 | 'ValidRate': 40000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'ValidBalance': False, 31 | 'TestBalance': False, 32 | 'SplitRate': [0.8, 0.1], 33 | 'Splitter': 'Random', 34 | 'MaxEpoch': 300, 35 | 'LowerThanMaxLimit': 12, 36 | 'DecreasingLimit': 8, 37 | 38 | # if OnlyEval == True: 39 | 'EvalModelPath': None, 40 | 'EvalDatasetPath': None, 41 | 'EvalLogAllPreds': None, 42 | 43 | 'Scheduler': 'PolynomialDecayLR', 44 | # 'Scheduler': 'EmptyLRScheduler', 45 | 46 | 47 | # Params for PolynomialDecayLR only 48 | 'WarmupEpoch': 2, 49 | 'LRMaxEpoch':300, 50 | 'EndLR':1e-9, 51 | 'Power':1.0, 52 | # Params for StepLR only 53 | 'LRStep': 30, 54 | 'LRGamma': 0.1, 55 | ########## 56 | 57 | 'WeightIniter': None, 58 | 59 | # Params for NormWeightIniter only 60 | 'InitMean' : 0, 61 | 'InitStd' : 1, 62 | 63 | # Params for CMPNN only 64 | 'dataset_type': 'classification', 65 | 'activation': 'ReLU', 66 | 'ffn_num_layers':3, # useless for AC 67 | 'ffn_hidden_size': 300, # useless for AC 68 | 'no_cache': False, 69 | 'atom_messages': False, 70 | 'CommunicateKernel': 'Add', 71 | 'only_extract_feature': True, # True for AC 72 | 73 | 74 | # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here. 75 | 'SplitValidSeed': 8, 76 | 'SplitTestSeed': 8, 77 | 'BatchSize': 256, 78 | 79 | } 80 | AdjustableParamList = {} 81 | SpecificParamList = { 82 | 'DropRate':[0.2], 83 | 'WeightDecay':[4.5], 84 | 'lr':[3], 85 | 'FPSize': [128], 86 | 'CMPNNLayers': [3], 87 | 'DNNLayers':[[128]], 88 | } 89 | 90 | 91 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 92 | 93 | expcontroller.ExperimentStart() 94 | 95 | -------------------------------------------------------------------------------- /ACNet/ChemBERTFew.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 13, 7 | 'OnlyEval': False, 8 | 'Finetune':True, 9 | } 10 | 11 | BasicParamList = { 12 | 'ExpName': 'ACFew', 13 | 'MainMetric': 'AUC', 14 | 'DataPath': './ACComponents/ACDataset/data_files/MMP_AC_Few_representation/ChemBERT.npz', 15 | 'RootPath': './TestExp/Few/ChemBERT/', 16 | 'CUDA_VISIBLE_DEVICES': '2', 17 | 'TaskNum': 1, 18 | 'ClassNum': 2, 19 | 'OutputSize': 2, 20 | 'Feature': 'Raw', 21 | 'Model': 'MLP', 22 | 23 | # if Feature == Raw 24 | 'RawFeatureSize': 1024, 25 | 26 | 'OnlySpecific': True, 27 | 'Weight': True, 28 | 'AC': True, 29 | 'PyG': False, 30 | 31 | 'ValidRate': 40000, 32 | 'PrintRate': 5, 33 | 'UpdateRate': 1, 34 | 'SplitRate': [0.8, 0.1], 35 | 'Splitter': 'Random', 36 | 'MaxEpoch': 300, 37 | 'LowerThanMaxLimit': 12, 38 | 'DecreasingLimit': 8, 39 | 40 | # if OnlyEval == True: 41 | 'EvalModelPath': None, 42 | 'EvalDatasetPath': None, 43 | 'EvalLogAllPreds': None, 44 | 45 | 'Scheduler': 'PolynomialDecayLR', 46 | 47 | # Params for PolynomialDecayLR only 48 | 'WarmupEpoch': 2, 49 | 'LRMaxEpoch':300, 50 | 'EndLR':1e-9, 51 | 'Power':1.0, 52 | # Params for StepLR only 53 | 'LRStep': 30, 54 | 'LRGamma': 0.1, 55 | ########## 56 | 57 | 'WeightIniter': None, 58 | 59 | # Params for NormWeightIniter only 60 | 'InitMean' : 0, 61 | 'InitStd' : 1, 62 | 63 | # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here. 64 | 'SplitValidSeed': 8, 65 | 'SplitTestSeed': 8, 66 | 'BatchSize': 8, 67 | 68 | } 69 | AdjustableParamList = {} 70 | SpecificParamList = { 71 | 'DropRate':[0.2], 72 | 'WeightDecay':[5], 73 | 'lr':[4], 74 | 'DNNLayers':[[512, 128, 32]], 75 | } 76 | 77 | 78 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 79 | 80 | expcontroller.ExperimentStart() 81 | 82 | -------------------------------------------------------------------------------- /ACNet/FPMLPFew.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 13, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACFew', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Few.json', 14 | 'RootPath': './TestExp/Few/FPMLP/', 15 | 'CUDA_VISIBLE_DEVICES': '0', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'FP', 20 | 'Model': 'MLP', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': False, 26 | 27 | 'ValidRate': 40000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'Random', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 12, 34 | 'DecreasingLimit': 8, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | 43 | # Params for PolynomialDecayLR only 44 | 'WarmupEpoch': 2, 45 | 'LRMaxEpoch':300, 46 | 'EndLR':1e-9, 47 | 'Power':1.0, 48 | # Params for StepLR only 49 | 'LRStep': 30, 50 | 'LRGamma': 0.1, 51 | ########## 52 | 53 | 'WeightIniter': None, 54 | 55 | # Params for NormWeightIniter only 56 | 'InitMean' : 0, 57 | 'InitStd' : 1, 58 | 59 | 'AtomFeatureSize': 39, 60 | 'BondFeatureSize': 10, 61 | 'MolFP': 'MorganFP', 62 | 'radius': 2, 63 | 'nBits': 1024, 64 | 65 | # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here. 66 | 'SplitValidSeed': 8, 67 | 'SplitTestSeed': 8, 68 | 'BatchSize': 8, 69 | 70 | } 71 | AdjustableParamList = {} 72 | SpecificParamList = { 73 | 'DropRate':[0.2], 74 | 'WeightDecay':[4.5], 75 | 'lr':[3], 76 | 'DNNLayers':[[128]], 77 | } 78 | 79 | 80 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 81 | 82 | expcontroller.ExperimentStart() 83 | 84 | -------------------------------------------------------------------------------- /ACNet/FPMLPLarge.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 3, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACLarge', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Large.json', 14 | 'RootPath': './TestExp/Large/FPMLP/', 15 | 'CUDA_VISIBLE_DEVICES': '0', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'FP', 20 | 'Model': 'MLP', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': False, 26 | 27 | 'ValidRate': 40000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'Random', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 12, 34 | 'DecreasingLimit': 8, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | 43 | # Params for PolynomialDecayLR only 44 | 'WarmupEpoch': 2, 45 | 'LRMaxEpoch':300, 46 | 'EndLR':1e-9, 47 | 'Power':1.0, 48 | # Params for StepLR only 49 | 'LRStep': 30, 50 | 'LRGamma': 0.1, 51 | ########## 52 | 53 | 'WeightIniter': None, 54 | 55 | # Params for NormWeightIniter only 56 | 'InitMean' : 0, 57 | 'InitStd' : 1, 58 | 59 | 'AtomFeatureSize': 39, 60 | 'BondFeatureSize': 10, 61 | 'MolFP': 'MorganFP', 62 | 'radius': 2, 63 | 'nBits': 1024, 64 | 65 | 'SplitValidSeed': 8, 66 | 'SplitTestSeed': 8, 67 | 'BatchSize': 256, 68 | 69 | } 70 | AdjustableParamList = {} 71 | SpecificParamList = { 72 | 'DropRate':[0.4], 73 | 'WeightDecay':[5], 74 | 'lr':[3], 75 | 'DNNLayers':[[512, 128, 32]], 76 | } 77 | 78 | 79 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 80 | 81 | expcontroller.ExperimentStart() 82 | 83 | -------------------------------------------------------------------------------- /ACNet/FPMLPMedium.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 64, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACMedium', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Medium.json', 14 | 'RootPath': './TestExp/Medium/FPMLP/', 15 | 'CUDA_VISIBLE_DEVICES': '0', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'FP', 20 | 'Model': 'MLP', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': False, 26 | 27 | 'ValidRate': 40000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'Random', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 12, 34 | 'DecreasingLimit': 8, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | 43 | # Params for PolynomialDecayLR only 44 | 'WarmupEpoch': 2, 45 | 'LRMaxEpoch':300, 46 | 'EndLR':1e-9, 47 | 'Power':1.0, 48 | # Params for StepLR only 49 | 'LRStep': 30, 50 | 'LRGamma': 0.1, 51 | ########## 52 | 53 | 'WeightIniter': None, 54 | 55 | # Params for NormWeightIniter only 56 | 'InitMean' : 0, 57 | 'InitStd' : 1, 58 | 59 | 'AtomFeatureSize': 39, 60 | 'BondFeatureSize': 10, 61 | 'MolFP': 'MorganFP', 62 | 'radius': 2, 63 | 'nBits': 1024, 64 | 65 | 'SplitValidSeed': 8, 66 | 'SplitTestSeed': 8, 67 | 'BatchSize': 256, 68 | 69 | } 70 | AdjustableParamList = {} 71 | SpecificParamList = { 72 | 'DropRate':[0.4], 73 | 'WeightDecay':[5], 74 | 'lr':[3], 75 | 'DNNLayers':[[512, 128, 32]], 76 | } 77 | 78 | 79 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 80 | 81 | expcontroller.ExperimentStart() 82 | 83 | -------------------------------------------------------------------------------- /ACNet/FPMLPMixRandom.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 1, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACMixRandom', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Mixed_Screened.json', 14 | 'RootPath': './TestExp/Mix/FPMLP/', 15 | 'CUDA_VISIBLE_DEVICES': '0', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'FP', 20 | 'Model': 'MLP', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': False, 26 | 27 | 'ValidRate': 40000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'Random', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 12, 34 | 'DecreasingLimit': 8, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | 43 | # Params for PolynomialDecayLR only 44 | 'WarmupEpoch': 2, 45 | 'LRMaxEpoch':300, 46 | 'EndLR':1e-9, 47 | 'Power':1.0, 48 | # Params for StepLR only 49 | 'LRStep': 30, 50 | 'LRGamma': 0.1, 51 | ########## 52 | 53 | 'WeightIniter': None, 54 | 55 | # Params for NormWeightIniter only 56 | 'InitMean' : 0, 57 | 'InitStd' : 1, 58 | 59 | 'AtomFeatureSize': 39, 60 | 'BondFeatureSize': 10, 61 | 'MolFP': 'MorganFP', 62 | 'radius': 2, 63 | 'nBits': 1024, 64 | 65 | # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here. 66 | 'SplitValidSeed': 8, 67 | 'SplitTestSeed': 8, 68 | 'BatchSize': 256, 69 | 70 | } 71 | AdjustableParamList = {} 72 | SpecificParamList = { 73 | 'DropRate':[0.4], 74 | 'WeightDecay':[4.5], 75 | 'lr':[4], 76 | 'DNNLayers':[[128]], 77 | } 78 | 79 | 80 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 81 | 82 | expcontroller.ExperimentStart() 83 | 84 | -------------------------------------------------------------------------------- /ACNet/FPMLPMixTarget.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 1, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACMixTarget', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Mixed_Screened.json', 14 | 'RootPath': './TestExp/Mix/FPMLP/', 15 | 'CUDA_VISIBLE_DEVICES': '0', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'FP', 20 | 'Model': 'MLP', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': False, 26 | 27 | 'ValidRate': 40000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'TargetRandom', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 12, 34 | 'DecreasingLimit': 8, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | 43 | # Params for PolynomialDecayLR only 44 | 'WarmupEpoch': 2, 45 | 'LRMaxEpoch':300, 46 | 'EndLR':1e-9, 47 | 'Power':1.0, 48 | # Params for StepLR only 49 | 'LRStep': 30, 50 | 'LRGamma': 0.1, 51 | ########## 52 | 53 | 'WeightIniter': None, 54 | 55 | # Params for NormWeightIniter only 56 | 'InitMean' : 0, 57 | 'InitStd' : 1, 58 | 59 | 'AtomFeatureSize': 39, 60 | 'BondFeatureSize': 10, 61 | 'MolFP': 'MorganFP', 62 | 'radius': 2, 63 | 'nBits': 1024, 64 | 65 | # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here. 66 | 'SplitValidSeed': 8, 67 | 'SplitTestSeed': 8, 68 | 'BatchSize': 256, 69 | 70 | } 71 | AdjustableParamList = {} 72 | SpecificParamList = { 73 | 'DropRate':[0.2], 74 | 'WeightDecay':[4.5], 75 | 'lr':[3], 76 | 'DNNLayers':[[128]], 77 | } 78 | 79 | 80 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 81 | 82 | expcontroller.ExperimentStart() 83 | 84 | -------------------------------------------------------------------------------- /ACNet/FPMLPSmall.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 110, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACSmall', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Small.json', 14 | 'RootPath': './TestExp/Small/FPMLP/', 15 | 'CUDA_VISIBLE_DEVICES': '0', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'FP', 20 | 'Model': 'MLP', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': False, 26 | 27 | 'ValidRate': 40000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'Random', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 12, 34 | 'DecreasingLimit': 8, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | 43 | # Params for PolynomialDecayLR only 44 | 'WarmupEpoch': 2, 45 | 'LRMaxEpoch':300, 46 | 'EndLR':1e-9, 47 | 'Power':1.0, 48 | # Params for StepLR only 49 | 'LRStep': 30, 50 | 'LRGamma': 0.1, 51 | ########## 52 | 53 | 'WeightIniter': None, 54 | 55 | # Params for NormWeightIniter only 56 | 'InitMean' : 0, 57 | 'InitStd' : 1, 58 | 59 | 'AtomFeatureSize': 39, 60 | 'BondFeatureSize': 10, 61 | 'MolFP': 'MorganFP', 62 | 'radius': 2, 63 | 'nBits': 1024, 64 | 65 | 'SplitValidSeed': 8, 66 | 'SplitTestSeed': 8, 67 | 'BatchSize': 32, 68 | 69 | } 70 | AdjustableParamList = {} 71 | SpecificParamList = { 72 | 'DropRate':[0.4], 73 | 'WeightDecay':[5], 74 | 'lr':[3], 75 | 'DNNLayers':[[256, 64]], 76 | } 77 | 78 | 79 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 80 | 81 | expcontroller.ExperimentStart() 82 | 83 | -------------------------------------------------------------------------------- /ACNet/GCNLarge.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 3, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACLarge', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Large.json', 14 | 'RootPath': './TestExp/Large/GCN/', 15 | 'CUDA_VISIBLE_DEVICES': '1', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'PyGGCN', 20 | 'Model': 'PyGGCN', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': True, 26 | 27 | 'ValidRate': 4000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'Random', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 30, 34 | 'DecreasingLimit': 12, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | 43 | # Params for PolynomialDecayLR only 44 | 'WarmupEpoch': 2, 45 | 'LRMaxEpoch':300, 46 | 'EndLR':1e-9, 47 | 'Power':1.0, 48 | # Params for StepLR only 49 | 'LRStep': 30, 50 | 'LRGamma': 0.1, 51 | ########## 52 | 53 | 'WeightIniter': 'XavierNorm', 54 | 55 | # Params for NormWeightIniter only 56 | 'InitMean' : 0, 57 | 'InitStd' : 1, 58 | 59 | 'AtomFeatureSize': 39, 60 | 'BondFeatureSize': 10, 61 | 62 | 'GCNReadout': 'Add', 63 | 64 | 65 | 'SplitValidSeed': 8, 66 | 'SplitTestSeed': 8, 67 | 'BatchSize': 200, 68 | 69 | } 70 | AdjustableParamList = {} 71 | SpecificParamList = { 72 | 'DropRate':[0.4], 73 | 'WeightDecay':[4.5], 74 | 'lr':[3], 75 | 'GCNInputSize': [64], 76 | 'GCNHiddenSize': [128], 77 | 'GCNLayers': [3], 78 | 'FPSize':[64], 79 | 'DNNLayers':[[32]], 80 | } 81 | 82 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 83 | 84 | expcontroller.ExperimentStart() 85 | -------------------------------------------------------------------------------- /ACNet/GCNMedium.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 64, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACMedium', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Medium.json', 14 | 'RootPath': './TestExp/Medium/GCN/', 15 | 'CUDA_VISIBLE_DEVICES': '1', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'PyGGCN', 20 | 'Model': 'PyGGCN', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': True, 26 | 27 | 'ValidRate': 4000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'Random', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 30, 34 | 'DecreasingLimit': 12, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | 43 | # Params for PolynomialDecayLR only 44 | 'WarmupEpoch': 2, 45 | 'LRMaxEpoch':300, 46 | 'EndLR':1e-9, 47 | 'Power':1.0, 48 | # Params for StepLR only 49 | 'LRStep': 30, 50 | 'LRGamma': 0.1, 51 | ########## 52 | 53 | 'WeightIniter': 'XavierNorm', 54 | 55 | # Params for NormWeightIniter only 56 | 'InitMean' : 0, 57 | 'InitStd' : 1, 58 | 59 | 'AtomFeatureSize': 39, 60 | 'BondFeatureSize': 10, 61 | 62 | 'GCNReadout': 'Add', 63 | 64 | 65 | 'SplitValidSeed': 8, 66 | 'SplitTestSeed': 8, 67 | 'BatchSize': 200, 68 | 69 | } 70 | AdjustableParamList = {} 71 | SpecificParamList = { 72 | 'DropRate':[0.2], 73 | 'WeightDecay':[4.5], 74 | 'lr':[3], 75 | 'GCNInputSize': [64], 76 | 'GCNHiddenSize': [128], 77 | 'GCNLayers': [3], 78 | 'FPSize':[64], 79 | 'DNNLayers':[[]], 80 | } 81 | 82 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 83 | 84 | expcontroller.ExperimentStart() 85 | -------------------------------------------------------------------------------- /ACNet/GCNMixRandom.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 1, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACMixRandom', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Mixed_Screened.json', 14 | 'RootPath': './TestExp/Mix/GCN/', 15 | 'CUDA_VISIBLE_DEVICES': '1', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'PyGGCN', 20 | 'Model': 'PyGGCN', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': True, 26 | 27 | 'ValidRate': 4000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'Random', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 30, 34 | 'DecreasingLimit': 12, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | 43 | # Params for PolynomialDecayLR only 44 | 'WarmupEpoch': 2, 45 | 'LRMaxEpoch':300, 46 | 'EndLR':1e-9, 47 | 'Power':1.0, 48 | # Params for StepLR only 49 | 'LRStep': 30, 50 | 'LRGamma': 0.1, 51 | ########## 52 | 53 | 'WeightIniter': 'XavierNorm', 54 | 55 | # Params for NormWeightIniter only 56 | 'InitMean' : 0, 57 | 'InitStd' : 1, 58 | 59 | 'AtomFeatureSize': 39, 60 | 'BondFeatureSize': 10, 61 | 62 | 'GCNReadout': 'Add', 63 | 64 | 65 | 'SplitValidSeed': 8, 66 | 'SplitTestSeed': 8, 67 | 'BatchSize': 200, 68 | 69 | } 70 | AdjustableParamList = {} 71 | SpecificParamList = { 72 | 'DropRate':[0.2], 73 | 'WeightDecay':[5], 74 | 'lr':[4], 75 | 'GCNInputSize': [128], 76 | 'GCNHiddenSize': [512], 77 | 'GCNLayers': [2], 78 | 'FPSize':[256], 79 | 'DNNLayers':[[128, 32]], 80 | } 81 | 82 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 83 | 84 | expcontroller.ExperimentStart() 85 | -------------------------------------------------------------------------------- /ACNet/GCNMixTarget.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 1, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACMixTarget', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Mixed_Screened.json', 14 | 'RootPath': './TestExp/Mix/GCN/', 15 | 'CUDA_VISIBLE_DEVICES': '1', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'PyGGCN', 20 | 'Model': 'PyGGCN', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': True, 26 | 27 | 'ValidRate': 4000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'TargetRandom', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 30, 34 | 'DecreasingLimit': 12, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | 43 | # Params for PolynomialDecayLR only 44 | 'WarmupEpoch': 2, 45 | 'LRMaxEpoch':300, 46 | 'EndLR':1e-9, 47 | 'Power':1.0, 48 | # Params for StepLR only 49 | 'LRStep': 30, 50 | 'LRGamma': 0.1, 51 | ########## 52 | 53 | 'WeightIniter': 'XavierNorm', 54 | 55 | # Params for NormWeightIniter only 56 | 'InitMean' : 0, 57 | 'InitStd' : 1, 58 | 59 | 'AtomFeatureSize': 39, 60 | 'BondFeatureSize': 10, 61 | 62 | 'GCNReadout': 'Add', 63 | 64 | 65 | 'SplitValidSeed': 8, 66 | 'SplitTestSeed': 8, 67 | 'BatchSize': 200, 68 | 69 | } 70 | AdjustableParamList = {} 71 | SpecificParamList = { 72 | 'DropRate':[0.2], 73 | 'WeightDecay':[5], 74 | 'lr':[4], 75 | 'GCNInputSize': [128], 76 | 'GCNHiddenSize': [256], 77 | 'GCNLayers': [2], 78 | 'FPSize':[128], 79 | 'DNNLayers':[[64]], 80 | } 81 | 82 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 83 | 84 | expcontroller.ExperimentStart() 85 | -------------------------------------------------------------------------------- /ACNet/GCNSmall.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 110, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACSmall', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Small.json', 14 | 'RootPath': './TestExp/Small/GCN/', 15 | 'CUDA_VISIBLE_DEVICES': '1', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'PyGGCN', 20 | 'Model': 'PyGGCN', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': True, 26 | 27 | 'ValidRate': 4000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'Random', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 30, 34 | 'DecreasingLimit': 12, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | 43 | # Params for PolynomialDecayLR only 44 | 'WarmupEpoch': 2, 45 | 'LRMaxEpoch':300, 46 | 'EndLR':1e-9, 47 | 'Power':1.0, 48 | # Params for StepLR only 49 | 'LRStep': 30, 50 | 'LRGamma': 0.1, 51 | ########## 52 | 53 | 'WeightIniter': 'XavierNorm', 54 | 55 | # Params for NormWeightIniter only 56 | 'InitMean' : 0, 57 | 'InitStd' : 1, 58 | 59 | 'AtomFeatureSize': 39, 60 | 'BondFeatureSize': 10, 61 | 62 | 'GCNReadout': 'Add', 63 | 64 | 65 | 'SplitValidSeed': 8, 66 | 'SplitTestSeed': 8, 67 | 'BatchSize': 32, 68 | 69 | } 70 | AdjustableParamList = {} 71 | SpecificParamList = { 72 | 'DropRate':[0.2], 73 | 'WeightDecay':[4.5], 74 | 'lr':[3], 75 | 'GCNInputSize': [64], 76 | 'GCNHiddenSize': [128], 77 | 'GCNLayers': [3], 78 | 'FPSize':[64], 79 | 'DNNLayers':[[]], 80 | } 81 | 82 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 83 | 84 | expcontroller.ExperimentStart() 85 | -------------------------------------------------------------------------------- /ACNet/GINLarge.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 3, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACLarge', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Large.json', 14 | 'RootPath': './TestExp/Large/GIN/', 15 | 'CUDA_VISIBLE_DEVICES': '1', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'PyGGIN', 20 | 'Model': 'PyGGIN', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': True, 26 | 27 | 'ValidRate': 4000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'Random', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 30, 34 | 'DecreasingLimit': 12, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | 43 | # Params for PolynomialDecayLR only 44 | 'WarmupEpoch': 2, 45 | 'LRMaxEpoch':300, 46 | 'EndLR':1e-9, 47 | 'Power':1.0, 48 | # Params for StepLR only 49 | 'LRStep': 30, 50 | 'LRGamma': 0.1, 51 | ########## 52 | 53 | 'WeightIniter': 'XavierNorm', 54 | 55 | # Params for NormWeightIniter only 56 | 'InitMean' : 0, 57 | 'InitStd' : 1, 58 | 59 | 'AtomFeatureSize': 39, 60 | 'BondFeatureSize': 10, 61 | 62 | 'GCNReadout': 'Add', 63 | 64 | 65 | 'SplitValidSeed': 8, 66 | 'SplitTestSeed': 8, 67 | 'BatchSize': 200, 68 | 69 | } 70 | AdjustableParamList = {} 71 | SpecificParamList = { 72 | 'DropRate':[0.2], 73 | 'WeightDecay':[4.5], 74 | 'lr':[3], 75 | 'GINInputSize': [64], 76 | 'GINHiddenSize': [64], 77 | 'GINLayers': [3], 78 | 'GINEps': [0], 79 | 'FPSize':[32], 80 | 'DNNLayers':[[]], 81 | } 82 | 83 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 84 | 85 | expcontroller.ExperimentStart() 86 | -------------------------------------------------------------------------------- /ACNet/GINMedium.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 64, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACMedium', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Medium.json', 14 | 'RootPath': './TestExp/Medium/GIN/', 15 | 'CUDA_VISIBLE_DEVICES': '1', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'PyGGIN', 20 | 'Model': 'PyGGIN', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': True, 26 | 27 | 'ValidRate': 4000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'Random', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 30, 34 | 'DecreasingLimit': 12, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | 43 | # Params for PolynomialDecayLR only 44 | 'WarmupEpoch': 2, 45 | 'LRMaxEpoch':300, 46 | 'EndLR':1e-9, 47 | 'Power':1.0, 48 | # Params for StepLR only 49 | 'LRStep': 30, 50 | 'LRGamma': 0.1, 51 | ########## 52 | 53 | 'WeightIniter': 'XavierNorm', 54 | 55 | # Params for NormWeightIniter only 56 | 'InitMean' : 0, 57 | 'InitStd' : 1, 58 | 59 | 'AtomFeatureSize': 39, 60 | 'BondFeatureSize': 10, 61 | 62 | 'GCNReadout': 'Add', 63 | 64 | 65 | 'SplitValidSeed': 8, 66 | 'SplitTestSeed': 8, 67 | 'BatchSize': 200, 68 | 69 | } 70 | AdjustableParamList = {} 71 | SpecificParamList = { 72 | 'DropRate':[0.2], 73 | 'WeightDecay':[4.5], 74 | 'lr':[3], 75 | 'GINInputSize': [64], 76 | 'GINHiddenSize': [128], 77 | 'GINLayers': [3], 78 | 'GINEps': [0], 79 | 'FPSize':[64], 80 | 'DNNLayers':[[]], 81 | } 82 | 83 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 84 | 85 | expcontroller.ExperimentStart() 86 | -------------------------------------------------------------------------------- /ACNet/GINSmall.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 110, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACSmall', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Small.json', 14 | 'RootPath': './TestExp/Small/GIN/', 15 | 'CUDA_VISIBLE_DEVICES': '1', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'PyGGIN', 20 | 'Model': 'PyGGIN', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': True, 26 | 27 | 'ValidRate': 4000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'Random', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 30, 34 | 'DecreasingLimit': 12, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | 43 | # Params for PolynomialDecayLR only 44 | 'WarmupEpoch': 2, 45 | 'LRMaxEpoch':300, 46 | 'EndLR':1e-9, 47 | 'Power':1.0, 48 | # Params for StepLR only 49 | 'LRStep': 30, 50 | 'LRGamma': 0.1, 51 | ########## 52 | 53 | 'WeightIniter': 'XavierNorm', 54 | 55 | # Params for NormWeightIniter only 56 | 'InitMean' : 0, 57 | 'InitStd' : 1, 58 | 59 | 'AtomFeatureSize': 39, 60 | 'BondFeatureSize': 10, 61 | 62 | 'GCNReadout': 'Add', 63 | 64 | 65 | 'SplitValidSeed': 8, 66 | 'SplitTestSeed': 8, 67 | 'BatchSize': 32, 68 | 69 | } 70 | AdjustableParamList = {} 71 | SpecificParamList = { 72 | 'DropRate':[0.2], 73 | 'WeightDecay':[4.5], 74 | 'lr':[3], 75 | 'GINInputSize': [64], 76 | 'GINHiddenSize': [64], 77 | 'GINLayers': [3], 78 | 'GINEps': [0], 79 | 'FPSize':[32], 80 | 'DNNLayers':[[]], 81 | } 82 | 83 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 84 | 85 | expcontroller.ExperimentStart() 86 | -------------------------------------------------------------------------------- /ACNet/GROVERFew.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 13, 7 | 'OnlyEval': False, 8 | 'Finetune':True, 9 | } 10 | 11 | BasicParamList = { 12 | 'ExpName': 'ACFew', 13 | 'MainMetric': 'AUC', 14 | 'DataPath': './ACComponents/ACDataset/data_files/MMP_AC_Few_representation/GROVER.npz', 15 | 'RootPath': './TestExp/Few/GROVER/', 16 | 'CUDA_VISIBLE_DEVICES': '2', 17 | 'TaskNum': 1, 18 | 'ClassNum': 2, 19 | 'OutputSize': 2, 20 | 'Feature': 'Raw', 21 | 'Model': 'MLP', 22 | 23 | # if Feature == Raw 24 | 'RawFeatureSize': 3400, 25 | 26 | 'OnlySpecific': True, 27 | 'Weight': True, 28 | 'AC': True, 29 | 'PyG': False, 30 | 31 | 'ValidRate': 40000, 32 | 'PrintRate': 5, 33 | 'UpdateRate': 1, 34 | 'ValidBalance': False, 35 | 'TestBalance': False, 36 | 'SplitRate': [0.8, 0.1], 37 | 'Splitter': 'Random', 38 | 'MaxEpoch': 300, 39 | 'LowerThanMaxLimit': 12, 40 | 'DecreasingLimit': 8, 41 | 42 | # if OnlyEval == True: 43 | 'EvalModelPath': None, 44 | 'EvalDatasetPath': None, 45 | 'EvalLogAllPreds': None, 46 | 47 | 'Scheduler': 'PolynomialDecayLR', 48 | 49 | # Params for PolynomialDecayLR only 50 | 'WarmupEpoch': 2, 51 | 'LRMaxEpoch':300, 52 | 'EndLR':1e-9, 53 | 'Power':1.0, 54 | # Params for StepLR only 55 | 'LRStep': 30, 56 | 'LRGamma': 0.1, 57 | ########## 58 | 59 | 'WeightIniter': None, 60 | 61 | # Params for NormWeightIniter only 62 | 'InitMean' : 0, 63 | 'InitStd' : 1, 64 | 65 | # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here. 66 | 'SplitValidSeed': 8, 67 | 'SplitTestSeed': 8, 68 | 'BatchSize': 8, 69 | 70 | } 71 | AdjustableParamList = {} 72 | SpecificParamList = { 73 | 'DropRate':[0.2], 74 | 'WeightDecay':[5], 75 | 'lr':[4], 76 | 'DNNLayers':[[256,64]], 77 | } 78 | 79 | 80 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 81 | 82 | expcontroller.ExperimentStart() 83 | 84 | -------------------------------------------------------------------------------- /ACNet/GRULarge.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 3, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACLarge', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Large.json', 14 | 'RootPath': './TestExp/Large/GRU/', 15 | 'CUDA_VISIBLE_DEVICES': '0', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'SMILES', 20 | 'Model': 'GRU', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': False, 26 | 27 | 'ValidRate': 40000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'Random', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 12, 34 | 'DecreasingLimit': 8, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | 43 | # Params for PolynomialDecayLR only 44 | 'WarmupEpoch': 2, 45 | 'LRMaxEpoch':300, 46 | 'EndLR':1e-9, 47 | 'Power':1.0, 48 | # Params for StepLR only 49 | 'LRStep': 30, 50 | 'LRGamma': 0.1, 51 | ########## 52 | 53 | 'WeightIniter': None, 54 | 55 | # Params for NormWeightIniter only 56 | 'InitMean' : 0, 57 | 'InitStd' : 1, 58 | 59 | 'SplitValidSeed': 8, 60 | 'SplitTestSeed': 8, 61 | 'BatchSize': 256, 62 | 63 | } 64 | AdjustableParamList = {} 65 | SpecificParamList = { 66 | 'DropRate':[0.2], 67 | 'WeightDecay':[5], 68 | 'lr':[4], 69 | 'GRULayers':[3], 70 | 'FPSize':[256], 71 | 'DNNLayers':[[512, 128, 32]], 72 | } 73 | 74 | 75 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 76 | 77 | expcontroller.ExperimentStart() 78 | 79 | -------------------------------------------------------------------------------- /ACNet/GRUMedium.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 64, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACMedium', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Medium.json', 14 | 'RootPath': './TestExp/Medium/GRU/', 15 | 'CUDA_VISIBLE_DEVICES': '0', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'SMILES', 20 | 'Model': 'GRU', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': False, 26 | 27 | 'ValidRate': 40000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'Random', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 12, 34 | 'DecreasingLimit': 8, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | 43 | # Params for PolynomialDecayLR only 44 | 'WarmupEpoch': 2, 45 | 'LRMaxEpoch':300, 46 | 'EndLR':1e-9, 47 | 'Power':1.0, 48 | # Params for StepLR only 49 | 'LRStep': 30, 50 | 'LRGamma': 0.1, 51 | ########## 52 | 53 | 'WeightIniter': None, 54 | 55 | # Params for NormWeightIniter only 56 | 'InitMean' : 0, 57 | 'InitStd' : 1, 58 | 59 | 'SplitValidSeed': 8, 60 | 'SplitTestSeed': 8, 61 | 'BatchSize': 256, 62 | 63 | } 64 | AdjustableParamList = {} 65 | SpecificParamList = { 66 | 'DropRate':[0.2], 67 | 'WeightDecay':[4.5], 68 | 'lr':[3], 69 | 'GRULayers':[2], 70 | 'FPSize':[64], 71 | 'DNNLayers':[[128]], 72 | } 73 | 74 | 75 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 76 | 77 | expcontroller.ExperimentStart() 78 | 79 | -------------------------------------------------------------------------------- /ACNet/GRUMixRandom.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 1, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACMixRandom', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Mixed_Screened.json', 14 | 'RootPath': './TestExp/Mix/GRU/', 15 | 'CUDA_VISIBLE_DEVICES': '0', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'SMILES', 20 | 'Model': 'GRU', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': False, 26 | 27 | 'ValidRate': 40000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'Random', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 12, 34 | 'DecreasingLimit': 8, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | 43 | # Params for PolynomialDecayLR only 44 | 'WarmupEpoch': 2, 45 | 'LRMaxEpoch':300, 46 | 'EndLR':1e-9, 47 | 'Power':1.0, 48 | # Params for StepLR only 49 | 'LRStep': 30, 50 | 'LRGamma': 0.1, 51 | ########## 52 | 53 | 'WeightIniter': None, 54 | 55 | # Params for NormWeightIniter only 56 | 'InitMean' : 0, 57 | 'InitStd' : 1, 58 | 59 | 60 | # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here. 61 | 'SplitValidSeed': 8, 62 | 'SplitTestSeed': 8, 63 | 'BatchSize': 256, 64 | 65 | } 66 | AdjustableParamList = {} 67 | SpecificParamList = { 68 | 'DropRate':[0.2], 69 | 'WeightDecay':[4.5], 70 | 'lr':[3], 71 | 'GRULayers':[2], 72 | 'FPSize':[64], 73 | 'DNNLayers':[[128]], 74 | } 75 | 76 | 77 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 78 | 79 | expcontroller.ExperimentStart() 80 | 81 | -------------------------------------------------------------------------------- /ACNet/GRUMixTarget.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 1, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACMixTarget', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Mixed_Screened.json', 14 | 'RootPath': './TestExp/Mix/GRU/', 15 | 'CUDA_VISIBLE_DEVICES': '0', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'SMILES', 20 | 'Model': 'GRU', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': False, 26 | 27 | 'ValidRate': 40000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'TargetRandom', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 12, 34 | 'DecreasingLimit': 8, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | 43 | # Params for PolynomialDecayLR only 44 | 'WarmupEpoch': 2, 45 | 'LRMaxEpoch':300, 46 | 'EndLR':1e-9, 47 | 'Power':1.0, 48 | # Params for StepLR only 49 | 'LRStep': 30, 50 | 'LRGamma': 0.1, 51 | ########## 52 | 53 | 'WeightIniter': None, 54 | 55 | # Params for NormWeightIniter only 56 | 'InitMean' : 0, 57 | 'InitStd' : 1, 58 | 59 | 60 | # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here. 61 | 'SplitValidSeed': 8, 62 | 'SplitTestSeed': 8, 63 | 'BatchSize': 256, 64 | 65 | } 66 | AdjustableParamList = {} 67 | SpecificParamList = { 68 | 'DropRate':[0.2], 69 | 'WeightDecay':[5], 70 | 'lr':[4], 71 | 'GRULayers':[3], 72 | 'FPSize':[128], 73 | 'DNNLayers':[[256,64]], 74 | } 75 | 76 | 77 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 78 | 79 | expcontroller.ExperimentStart() 80 | 81 | -------------------------------------------------------------------------------- /ACNet/GRUSmall.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 110, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACSmall', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Small.json', 14 | 'RootPath': './TestExp/Small/GRU/', 15 | 'CUDA_VISIBLE_DEVICES': '0', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'SMILES', 20 | 'Model': 'GRU', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': False, 26 | 27 | 'ValidRate': 40000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'Random', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 12, 34 | 'DecreasingLimit': 8, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | 43 | # Params for PolynomialDecayLR only 44 | 'WarmupEpoch': 2, 45 | 'LRMaxEpoch':300, 46 | 'EndLR':1e-9, 47 | 'Power':1.0, 48 | # Params for StepLR only 49 | 'LRStep': 30, 50 | 'LRGamma': 0.1, 51 | ########## 52 | 53 | 'WeightIniter': None, 54 | 55 | # Params for NormWeightIniter only 56 | 'InitMean' : 0, 57 | 'InitStd' : 1, 58 | 59 | 'SplitValidSeed': 8, 60 | 'SplitTestSeed': 8, 61 | 'BatchSize': 32, 62 | 63 | } 64 | AdjustableParamList = {} 65 | SpecificParamList = { 66 | 'DropRate':[0.2], 67 | 'WeightDecay':[4.5], 68 | 'lr':[3], 69 | 'GRULayers':[2], 70 | 'FPSize':[64], 71 | 'DNNLayers':[[128]], 72 | } 73 | 74 | 75 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 76 | 77 | expcontroller.ExperimentStart() 78 | 79 | -------------------------------------------------------------------------------- /ACNet/GraphLoGFew.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 13, 7 | 'OnlyEval': False, 8 | 'Finetune':True, 9 | } 10 | 11 | BasicParamList = { 12 | 'ExpName': 'ACFew', 13 | 'MainMetric': 'AUC', 14 | 'DataPath': './ACComponents/ACDataset/data_files/MMP_AC_Few_representation/GraphLoG.npz', 15 | 'RootPath': './TestExp/Few/GraphLoG/', 16 | 'CUDA_VISIBLE_DEVICES': '2', 17 | 'TaskNum': 1, 18 | 'ClassNum': 2, 19 | 'OutputSize': 2, 20 | 'Feature': 'Raw', 21 | 'Model': 'MLP', 22 | 23 | # if Feature == Raw 24 | 'RawFeatureSize': 300, 25 | 26 | 'OnlySpecific': True, 27 | 'Weight': True, 28 | 'AC': True, 29 | 'PyG': False, 30 | 31 | 'ValidRate': 40000, 32 | 'PrintRate': 5, 33 | 'UpdateRate': 1, 34 | 'ValidBalance': False, 35 | 'TestBalance': False, 36 | 'SplitRate': [0.8, 0.1], 37 | 'Splitter': 'Random', 38 | 'MaxEpoch': 300, 39 | 'LowerThanMaxLimit': 12, 40 | 'DecreasingLimit': 8, 41 | 42 | # if OnlyEval == True: 43 | 'EvalModelPath': None, 44 | 'EvalDatasetPath': None, 45 | 'EvalLogAllPreds': None, 46 | 47 | 'Scheduler': 'PolynomialDecayLR', 48 | 49 | # Params for PolynomialDecayLR only 50 | 'WarmupEpoch': 2, 51 | 'LRMaxEpoch':300, 52 | 'EndLR':1e-9, 53 | 'Power':1.0, 54 | # Params for StepLR only 55 | 'LRStep': 30, 56 | 'LRGamma': 0.1, 57 | ########## 58 | 59 | 'WeightIniter': None, 60 | 61 | # Params for NormWeightIniter only 62 | 'InitMean' : 0, 63 | 'InitStd' : 1, 64 | 65 | # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here. 66 | 'SplitValidSeed': 8, 67 | 'SplitTestSeed': 8, 68 | 'BatchSize': 8, 69 | 70 | } 71 | AdjustableParamList = {} 72 | SpecificParamList = { 73 | 'DropRate':[0.2], 74 | 'WeightDecay':[5], 75 | 'lr':[4], 76 | 'DNNLayers':[[256,64]], 77 | } 78 | 79 | 80 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 81 | 82 | expcontroller.ExperimentStart() 83 | 84 | -------------------------------------------------------------------------------- /ACNet/GraphormerLarge.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 3, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACLarge', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Large.json', 14 | 'RootPath': './TestExp/Large/Graphormer/', 15 | 'CUDA_VISIBLE_DEVICES': '3', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'Graphormer', 20 | 'Model': 'Graphormer', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': False, 26 | 27 | 'ValidRate': 4000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'Random', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 30, 34 | 'DecreasingLimit': 12, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | # 'Scheduler': 'EmptyLRScheduler', 43 | 44 | 45 | # Params for PolynomialDecayLR only 46 | 'WarmupEpoch': 2, 47 | 'LRMaxEpoch':300, 48 | 'EndLR':1e-9, 49 | 'Power':1.0, 50 | # Params for StepLR only 51 | 'LRStep': 30, 52 | 'LRGamma': 0.1, 53 | ########## 54 | 55 | 'WeightIniter': None, 56 | 57 | # Params for NormWeightIniter only 58 | 'InitMean' : 0, 59 | 'InitStd' : 1, 60 | 61 | 'FeatureCategory': 'BaseOH', 62 | 63 | # Params for Graphormer only 64 | 'num_offset': 16, 65 | 'num_atoms': 16 * 39, # offset * AtomFeatureNum 66 | 'num_in_degree': 16, # length of indegree dictionary 67 | 'num_out_degree': 16, # length of outdegree dictionary 68 | 'num_edges': 16 * 10, # offset * BondFeatureNum 69 | 'num_spatial': 512, # length of SPD dictionary, must be larger than the largest SPD 70 | 'num_edge_dis': 30, # must be larger than multi-hop-max-dist 71 | 'dropout_rate': 0.1, 72 | 'intput_dropout_rate': 0.1, 73 | 'edge_type': 'multi_hop', 74 | 'multi_hop_max_dist': 20, 75 | 'flag': False, 76 | 'spatial_pos_max': 20, 77 | 'max_node': 512, 78 | 79 | # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here. 80 | 'SplitValidSeed': 8, 81 | 'SplitTestSeed': 8, 82 | 'BatchSize': 32, 83 | 84 | } 85 | AdjustableParamList = {} 86 | SpecificParamList = { 87 | 'DropRate':[0.2], 88 | 'WeightDecay':[5], 89 | 'lr':[4], 90 | 'num_encoder_layers':[8], 91 | 'num_attention_heads':[16], 92 | 'embedding_dim':[128], 93 | 'ffn_dim':[128], 94 | 'attention_dropout_rate':[0.2], 95 | 'DNNLayers':[[64]], 96 | } 97 | 98 | 99 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 100 | 101 | expcontroller.ExperimentStart() 102 | 103 | -------------------------------------------------------------------------------- /ACNet/GraphormerMedium.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 64, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACMedium', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Medium.json', 14 | 'RootPath': './TestExp/Medium/Graphormer/', 15 | 'CUDA_VISIBLE_DEVICES': '3', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'Graphormer', 20 | 'Model': 'Graphormer', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': False, 26 | 27 | 'ValidRate': 4000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'Random', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 30, 34 | 'DecreasingLimit': 12, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | # 'Scheduler': 'EmptyLRScheduler', 43 | 44 | 45 | # Params for PolynomialDecayLR only 46 | 'WarmupEpoch': 2, 47 | 'LRMaxEpoch':300, 48 | 'EndLR':1e-9, 49 | 'Power':1.0, 50 | # Params for StepLR only 51 | 'LRStep': 30, 52 | 'LRGamma': 0.1, 53 | ########## 54 | 55 | 'WeightIniter': None, 56 | 57 | # Params for NormWeightIniter only 58 | 'InitMean' : 0, 59 | 'InitStd' : 1, 60 | 61 | 'FeatureCategory': 'BaseOH', 62 | 63 | # Params for Graphormer only 64 | 'num_offset': 16, 65 | 'num_atoms': 16 * 39, # offset * AtomFeatureNum 66 | 'num_in_degree': 16, # length of indegree dictionary 67 | 'num_out_degree': 16, # length of outdegree dictionary 68 | 'num_edges': 16 * 10, # offset * BondFeatureNum 69 | 'num_spatial': 512, # length of SPD dictionary, must be larger than the largest SPD 70 | 'num_edge_dis': 30, # must be larger than multi-hop-max-dist 71 | 'dropout_rate': 0.1, 72 | 'intput_dropout_rate': 0.1, 73 | 'edge_type': 'multi_hop', 74 | 'multi_hop_max_dist': 20, 75 | 'flag': False, 76 | 'spatial_pos_max': 20, 77 | 'max_node': 512, 78 | 79 | # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here. 80 | 'SplitValidSeed': 8, 81 | 'SplitTestSeed': 8, 82 | 'BatchSize': 32, 83 | 84 | } 85 | AdjustableParamList = {} 86 | SpecificParamList = { 87 | 'DropRate':[0.2], 88 | 'WeightDecay':[4.5], 89 | 'lr':[3], 90 | 'num_encoder_layers':[4], 91 | 'num_attention_heads':[8], 92 | 'embedding_dim':[32], 93 | 'ffn_dim':[32], 94 | 'attention_dropout_rate':[0.1], 95 | 'DNNLayers':[[]], 96 | } 97 | 98 | 99 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 100 | 101 | expcontroller.ExperimentStart() 102 | 103 | -------------------------------------------------------------------------------- /ACNet/GraphormerMixRandom.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 1, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACMixRandom', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Mixed_Screened.json', 14 | 'RootPath': './TestExp/Mix/Graphormer/', 15 | 'CUDA_VISIBLE_DEVICES': '3', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'Graphormer', 20 | 'Model': 'Graphormer', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': False, 26 | 27 | 'ValidRate': 40000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'Random', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 12, 34 | 'DecreasingLimit': 8, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | # 'Scheduler': 'EmptyLRScheduler', 43 | 44 | 45 | # Params for PolynomialDecayLR only 46 | 'WarmupEpoch': 2, 47 | 'LRMaxEpoch':300, 48 | 'EndLR':1e-9, 49 | 'Power':1.0, 50 | # Params for StepLR only 51 | 'LRStep': 30, 52 | 'LRGamma': 0.1, 53 | ########## 54 | 55 | 'WeightIniter': None, 56 | 57 | # Params for NormWeightIniter only 58 | 'InitMean' : 0, 59 | 'InitStd' : 1, 60 | 61 | 'FeatureCategory': 'BaseED', 62 | 63 | # Params for Graphormer only 64 | 'num_offset': 16, 65 | 'num_atoms': 16 * 8, # offset * AtomFeatureNum 66 | 'num_in_degree': 16, # length of indegree dictionary 67 | 'num_out_degree': 16, # length of outdegree dictionary 68 | 'num_edges': 16 * 4, # offset * BondFeatureNum 69 | 'num_spatial': 512, # length of SPD dictionary, must be larger than the largest SPD 70 | 'num_edge_dis': 30, # must be larger than multi-hop-max-dist 71 | 'dropout_rate': 0.1, 72 | 'intput_dropout_rate': 0.1, 73 | 'edge_type': 'multi_hop', 74 | 'multi_hop_max_dist': 20, 75 | 'flag': False, 76 | 'spatial_pos_max': 20, 77 | 'max_node': 512, 78 | 79 | # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here. 80 | 'SplitValidSeed': 8, 81 | 'SplitTestSeed': 8, 82 | 'BatchSize': 32, 83 | 84 | } 85 | AdjustableParamList = {} 86 | SpecificParamList = { 87 | 'DropRate':[0.4], 88 | 'WeightDecay':[5], 89 | 'lr':[4], 90 | 'num_encoder_layers':[10], 91 | 'num_attention_heads':[32], 92 | 'embedding_dim':[256], 93 | 'ffn_dim':[256], 94 | 'attention_dropout_rate':[0.4], 95 | 'DNNLayers':[[64, 16]], 96 | } 97 | 98 | 99 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 100 | 101 | expcontroller.ExperimentStart() 102 | 103 | -------------------------------------------------------------------------------- /ACNet/GraphormerMixTarget.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 1, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACMixTarget', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Mixed_Screened.json', 14 | 'RootPath': './TestExp/Mix/Graphormer/', 15 | 'CUDA_VISIBLE_DEVICES': '3', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'Graphormer', 20 | 'Model': 'Graphormer', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': False, 26 | 27 | 'ValidRate': 40000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'TargetRandom', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 12, 34 | 'DecreasingLimit': 8, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | # 'Scheduler': 'EmptyLRScheduler', 43 | 44 | 45 | # Params for PolynomialDecayLR only 46 | 'WarmupEpoch': 2, 47 | 'LRMaxEpoch':300, 48 | 'EndLR':1e-9, 49 | 'Power':1.0, 50 | # Params for StepLR only 51 | 'LRStep': 30, 52 | 'LRGamma': 0.1, 53 | ########## 54 | 55 | 'WeightIniter': None, 56 | 57 | # Params for NormWeightIniter only 58 | 'InitMean' : 0, 59 | 'InitStd' : 1, 60 | 61 | 'FeatureCategory': 'BaseED', 62 | 63 | # Params for Graphormer only 64 | 'num_offset': 16, 65 | 'num_atoms': 16 * 8, # offset * AtomFeatureNum 66 | 'num_in_degree': 16, # length of indegree dictionary 67 | 'num_out_degree': 16, # length of outdegree dictionary 68 | 'num_edges': 16 * 4, # offset * BondFeatureNum 69 | 'num_spatial': 512, # length of SPD dictionary, must be larger than the largest SPD 70 | 'num_edge_dis': 30, # must be larger than multi-hop-max-dist 71 | 'dropout_rate': 0.1, 72 | 'intput_dropout_rate': 0.1, 73 | 'edge_type': 'multi_hop', 74 | 'multi_hop_max_dist': 20, 75 | 'flag': False, 76 | 'spatial_pos_max': 20, 77 | 'max_node': 512, 78 | 79 | # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here. 80 | 'SplitValidSeed': 8, 81 | 'SplitTestSeed': 8, 82 | 'BatchSize': 32, 83 | 84 | } 85 | AdjustableParamList = {} 86 | SpecificParamList = { 87 | 'DropRate':[0.4], 88 | 'WeightDecay':[5], 89 | 'lr':[4], 90 | 'num_encoder_layers':[10], 91 | 'num_attention_heads':[32], 92 | 'embedding_dim':[256], 93 | 'ffn_dim':[256], 94 | 'attention_dropout_rate':[0.4], 95 | 'DNNLayers':[[64, 16]], 96 | } 97 | 98 | 99 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 100 | 101 | expcontroller.ExperimentStart() 102 | 103 | -------------------------------------------------------------------------------- /ACNet/GraphormerSmall.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 110, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACSmall', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Small.json', 14 | 'RootPath': './TestExp/Small/Graphormer/', 15 | 'CUDA_VISIBLE_DEVICES': '3', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'Graphormer', 20 | 'Model': 'Graphormer', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': False, 26 | 27 | 'ValidRate': 4000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'Random', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 30, 34 | 'DecreasingLimit': 12, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | # 'Scheduler': 'EmptyLRScheduler', 43 | 44 | 45 | # Params for PolynomialDecayLR only 46 | 'WarmupEpoch': 2, 47 | 'LRMaxEpoch':300, 48 | 'EndLR':1e-9, 49 | 'Power':1.0, 50 | # Params for StepLR only 51 | 'LRStep': 30, 52 | 'LRGamma': 0.1, 53 | ########## 54 | 55 | 'WeightIniter': None, 56 | 57 | # Params for NormWeightIniter only 58 | 'InitMean' : 0, 59 | 'InitStd' : 1, 60 | 61 | 'FeatureCategory': 'BaseOH', 62 | 63 | # Params for Graphormer only 64 | 'num_offset': 16, 65 | 'num_atoms': 16 * 39, # offset * AtomFeatureNum 66 | 'num_in_degree': 16, # length of indegree dictionary 67 | 'num_out_degree': 16, # length of outdegree dictionary 68 | 'num_edges': 16 * 10, # offset * BondFeatureNum 69 | 'num_spatial': 512, # length of SPD dictionary, must be larger than the largest SPD 70 | 'num_edge_dis': 30, # must be larger than multi-hop-max-dist 71 | 'dropout_rate': 0.1, 72 | 'intput_dropout_rate': 0.1, 73 | 'edge_type': 'multi_hop', 74 | 'multi_hop_max_dist': 20, 75 | 'flag': False, 76 | 'spatial_pos_max': 20, 77 | 'max_node': 512, 78 | 79 | # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here. 80 | 'SplitValidSeed': 8, 81 | 'SplitTestSeed': 8, 82 | 'BatchSize': 32, 83 | 84 | } 85 | AdjustableParamList = {} 86 | SpecificParamList = { 87 | 'DropRate':[0.2], 88 | 'WeightDecay':[5], 89 | 'lr':[4], 90 | 'num_encoder_layers':[8], 91 | 'num_attention_heads':[16], 92 | 'embedding_dim':[128], 93 | 'ffn_dim':[128], 94 | 'attention_dropout_rate':[0.2], 95 | 'DNNLayers':[[64]], 96 | } 97 | 98 | 99 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 100 | 101 | expcontroller.ExperimentStart() 102 | 103 | -------------------------------------------------------------------------------- /ACNet/LSTMLarge.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 3, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACLarge', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Large.json', 14 | 'RootPath': './TestExp/Large/LSTM/', 15 | 'CUDA_VISIBLE_DEVICES': '0', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'SMILES', 20 | 'Model': 'LSTM', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': False, 26 | 27 | 'ValidRate': 40000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'Random', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 12, 34 | 'DecreasingLimit': 8, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | 43 | # Params for PolynomialDecayLR only 44 | 'WarmupEpoch': 2, 45 | 'LRMaxEpoch':300, 46 | 'EndLR':1e-9, 47 | 'Power':1.0, 48 | # Params for StepLR only 49 | 'LRStep': 30, 50 | 'LRGamma': 0.1, 51 | ########## 52 | 53 | 'WeightIniter': None, 54 | 55 | # Params for NormWeightIniter only 56 | 'InitMean' : 0, 57 | 'InitStd' : 1, 58 | 59 | 'AtomFeatureSize': 39, 60 | 'BondFeatureSize': 10, 61 | 'MolFP': 'MorganFP', 62 | 'radius': 2, 63 | 'nBits': 1024, 64 | 65 | 'SplitValidSeed': 8, 66 | 'SplitTestSeed': 8, 67 | 'BatchSize': 256, 68 | 69 | } 70 | AdjustableParamList = {} 71 | SpecificParamList = { 72 | 'DropRate':[0.4], 73 | 'WeightDecay':[5], 74 | 'lr':[3], 75 | 'LSTMLayers': [3], 76 | 'FPSize':[512], 77 | 'DNNLayers':[[512, 128, 32]], 78 | } 79 | 80 | 81 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 82 | 83 | expcontroller.ExperimentStart() 84 | 85 | -------------------------------------------------------------------------------- /ACNet/LSTMMedium.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 64, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACMedium', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Medium.json', 14 | 'RootPath': './TestExp/Medium/LSTM/', 15 | 'CUDA_VISIBLE_DEVICES': '0', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'SMILES', 20 | 'Model': 'LSTM', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': False, 26 | 27 | 'ValidRate': 40000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'Random', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 12, 34 | 'DecreasingLimit': 8, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | 43 | # Params for PolynomialDecayLR only 44 | 'WarmupEpoch': 2, 45 | 'LRMaxEpoch':300, 46 | 'EndLR':1e-9, 47 | 'Power':1.0, 48 | # Params for StepLR only 49 | 'LRStep': 30, 50 | 'LRGamma': 0.1, 51 | ########## 52 | 53 | 'WeightIniter': None, 54 | 55 | # Params for NormWeightIniter only 56 | 'InitMean' : 0, 57 | 'InitStd' : 1, 58 | 59 | 'AtomFeatureSize': 39, 60 | 'BondFeatureSize': 10, 61 | 'MolFP': 'MorganFP', 62 | 'radius': 2, 63 | 'nBits': 1024, 64 | 65 | 'SplitValidSeed': 8, 66 | 'SplitTestSeed': 8, 67 | 'BatchSize': 256, 68 | 69 | } 70 | AdjustableParamList = {} 71 | SpecificParamList = { 72 | 'DropRate':[0.2], 73 | 'WeightDecay':[4.5], 74 | 'lr':[3], 75 | 'LSTMLayers': [2], 76 | 'FPSize':[64], 77 | 'DNNLayers':[[]], 78 | } 79 | 80 | 81 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 82 | 83 | expcontroller.ExperimentStart() 84 | 85 | -------------------------------------------------------------------------------- /ACNet/LSTMSmall.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 110, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACSmall', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Small.json', 14 | 'RootPath': './TestExp/Small/LSTM/', 15 | 'CUDA_VISIBLE_DEVICES': '0', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'SMILES', 20 | 'Model': 'LSTM', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': False, 26 | 27 | 'ValidRate': 40000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'Random', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 12, 34 | 'DecreasingLimit': 8, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | 43 | # Params for PolynomialDecayLR only 44 | 'WarmupEpoch': 2, 45 | 'LRMaxEpoch':300, 46 | 'EndLR':1e-9, 47 | 'Power':1.0, 48 | # Params for StepLR only 49 | 'LRStep': 30, 50 | 'LRGamma': 0.1, 51 | ########## 52 | 53 | 'WeightIniter': None, 54 | 55 | # Params for NormWeightIniter only 56 | 'InitMean' : 0, 57 | 'InitStd' : 1, 58 | 59 | 'AtomFeatureSize': 39, 60 | 'BondFeatureSize': 10, 61 | 'MolFP': 'MorganFP', 62 | 'radius': 2, 63 | 'nBits': 1024, 64 | 65 | 'SplitValidSeed': 8, 66 | 'SplitTestSeed': 8, 67 | 'BatchSize': 32, 68 | 69 | } 70 | AdjustableParamList = {} 71 | SpecificParamList = { 72 | 'DropRate':[0.2], 73 | 'WeightDecay':[4.5], 74 | 'lr':[3], 75 | 'LSTMLayers': [2], 76 | 'FPSize':[128], 77 | 'DNNLayers':[[128]], 78 | } 79 | 80 | 81 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 82 | 83 | expcontroller.ExperimentStart() 84 | 85 | -------------------------------------------------------------------------------- /ACNet/MATFew.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 13, 7 | 'OnlyEval': False, 8 | 'Finetune':True, 9 | } 10 | 11 | BasicParamList = { 12 | 'ExpName': 'ACFew', 13 | 'MainMetric': 'AUC', 14 | 'DataPath': './ACComponents/ACDataset/data_files/MMP_AC_Few_representation/MAT.npz', 15 | 'RootPath': './TestExp/Few/MAT/', 16 | 'CUDA_VISIBLE_DEVICES': '2', 17 | 'TaskNum': 1, 18 | 'ClassNum': 2, 19 | 'OutputSize': 2, 20 | 'Feature': 'Raw', 21 | 'Model': 'MLP', 22 | 23 | # if Feature == Raw 24 | 'RawFeatureSize': 1024, 25 | 26 | 'OnlySpecific': True, 27 | 'Weight': True, 28 | 'AC': True, 29 | 'PyG': False, 30 | 31 | 'ValidRate': 40000, 32 | 'PrintRate': 5, 33 | 'UpdateRate': 1, 34 | 'SplitRate': [0.8, 0.1], 35 | 'Splitter': 'Random', 36 | 'MaxEpoch': 300, 37 | 'LowerThanMaxLimit': 12, 38 | 'DecreasingLimit': 8, 39 | 40 | # if OnlyEval == True: 41 | 'EvalModelPath': None, 42 | 'EvalDatasetPath': None, 43 | 'EvalLogAllPreds': None, 44 | 45 | 'Scheduler': 'PolynomialDecayLR', 46 | 47 | # Params for PolynomialDecayLR only 48 | 'WarmupEpoch': 2, 49 | 'LRMaxEpoch':300, 50 | 'EndLR':1e-9, 51 | 'Power':1.0, 52 | # Params for StepLR only 53 | 'LRStep': 30, 54 | 'LRGamma': 0.1, 55 | ########## 56 | 57 | 'WeightIniter': None, 58 | 59 | # Params for NormWeightIniter only 60 | 'InitMean' : 0, 61 | 'InitStd' : 1, 62 | 63 | 64 | # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here. 65 | 'SplitValidSeed': 8, 66 | 'SplitTestSeed': 8, 67 | 'BatchSize': 8, 68 | 69 | } 70 | AdjustableParamList = {} 71 | SpecificParamList = { 72 | 'DropRate':[0.2], 73 | 'WeightDecay':[5], 74 | 'lr':[4], 75 | 'DNNLayers':[[512, 128, 32]], 76 | } 77 | 78 | 79 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 80 | 81 | expcontroller.ExperimentStart() 82 | 83 | -------------------------------------------------------------------------------- /ACNet/Models/BasicGNNs.py: -------------------------------------------------------------------------------- 1 | import torch as t 2 | import torch.nn as nn 3 | from torch_geometric.nn import GCN, global_add_pool, global_mean_pool, global_max_pool, MLP, GIN, SGConv, MessagePassing 4 | from torch_geometric.nn.models.basic_gnn import BasicGNN 5 | 6 | 7 | 8 | class PyGGCN(nn.Module): 9 | def __init__(self, opt, FeatureExtractor = False): 10 | super(PyGGCN, self).__init__() 11 | self.opt = opt 12 | self.node_feat_size = opt.args['AtomFeatureSize'] 13 | self.in_channel = opt.args['GCNInputSize'] 14 | self.hidden_channel = opt.args['GCNHiddenSize'] 15 | self.out_channel = opt.args['FPSize'] 16 | self.num_layers = opt.args['GCNLayers'] 17 | self.MLPChannels = opt.args['DNNLayers'] 18 | self.MLPOutputSize = opt.args['OutputSize'] 19 | self.dropout = opt.args['DropRate'] 20 | self.FeatureExtractor = FeatureExtractor 21 | 22 | self.MLPChannels = [self.out_channel] + self.MLPChannels + [self.MLPOutputSize] 23 | 24 | self.GCN = GCN(in_channels = self.in_channel, 25 | hidden_channels = self.hidden_channel, 26 | out_channels = self.out_channel, 27 | num_layers = self.num_layers, 28 | dropout = self.dropout) 29 | self.NodeFeatEmbed = MLP([self.node_feat_size, self.in_channel], dropout = self.dropout) 30 | if not self.FeatureExtractor: 31 | self.TaskLayer = MLP(self.MLPChannels, dropout = self.dropout) 32 | 33 | self.ReadoutList = { 34 | 'Add': global_add_pool, 35 | 'Mean': global_mean_pool, 36 | 'Max': global_max_pool 37 | } 38 | self.readout = self.ReadoutList[opt.args['GCNReadout']] 39 | 40 | def forward(self, Input): 41 | # Input: Batch data of PyG 42 | Input = Input.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu')) 43 | x = self.NodeFeatEmbed(Input.x) 44 | x = self.GCN(x, Input.edge_index) 45 | x = self.readout(x, Input.batch) 46 | if not self.FeatureExtractor: 47 | x = self.TaskLayer(x) 48 | return x 49 | 50 | class PyGGIN(nn.Module): 51 | def __init__(self, opt, FeatureExtractor = False): 52 | super(PyGGIN, self).__init__() 53 | self.opt = opt 54 | self.node_feat_size = opt.args['AtomFeatureSize'] 55 | self.in_channel = opt.args['GINInputSize'] 56 | self.hidden_channel = opt.args['GINHiddenSize'] 57 | self.out_channel = opt.args['FPSize'] 58 | self.eps = opt.args['GINEps'] 59 | self.num_layers = opt.args['GINLayers'] 60 | self.MLPChannels = opt.args['DNNLayers'] 61 | self.MLPOutputSize = opt.args['OutputSize'] 62 | self.dropout = opt.args['DropRate'] 63 | self.FeatureExtractor = FeatureExtractor 64 | 65 | self.MLPChannels = [self.out_channel] + self.MLPChannels + [self.MLPOutputSize] 66 | 67 | self.GIN = GIN(in_channels = self.in_channel, 68 | hidden_channels = self.hidden_channel, 69 | out_channels = self.out_channel, 70 | num_layers = self.num_layers, 71 | dropout = self.dropout, 72 | eps = self.eps) 73 | self.NodeFeatEmbed = MLP([self.node_feat_size, self.in_channel], dropout = self.dropout) 74 | if not self.FeatureExtractor: 75 | self.TaskLayer = MLP(self.MLPChannels, dropout = self.dropout) 76 | 77 | self.ReadoutList = { 78 | 'Add': global_add_pool, 79 | 'Mean': global_mean_pool, 80 | 'Max': global_max_pool, 81 | } 82 | self.readout = self.ReadoutList[opt.args['GINReadout']] 83 | 84 | def forward(self, Input): 85 | # Input: Batch data of PyG 86 | Input = Input.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu')) 87 | x = self.NodeFeatEmbed(Input.x) 88 | x = self.GIN(x, Input.edge_index) 89 | x = self.readout(x, Input.batch) 90 | if not self.FeatureExtractor: 91 | x = self.TaskLayer(x) 92 | return x 93 | 94 | 95 | class SGC(BasicGNN): 96 | def init_conv(self, in_channels: int, out_channels: int, **kwargs) -> MessagePassing: 97 | return SGConv(in_channels, out_channels, **kwargs) 98 | 99 | class PyGSGC(nn.Module): 100 | def __init__(self, opt, FeatureExtractor = False): 101 | super(PyGSGC, self).__init__() 102 | self.opt = opt 103 | self.node_feat_size = opt.args['AtomFeatureSize'] 104 | self.in_channel = opt.args['SGCInputSize'] 105 | self.hidden_channel = opt.args['SGCHiddenSize'] 106 | self.out_channel = opt.args['FPSize'] 107 | self.K = opt.args['SGCK'] 108 | self.num_layers = opt.args['SGCLayers'] 109 | self.MLPChannels = opt.args['DNNLayers'] 110 | self.MLPOutputSize = opt.args['OutputSize'] 111 | self.dropout = opt.args['DropRate'] 112 | self.FeatureExtractor = FeatureExtractor 113 | 114 | 115 | self.MLPChannels = [self.out_channel] + self.MLPChannels + [self.MLPOutputSize] 116 | 117 | self.SGC = SGC(in_channels = self.in_channel, 118 | hidden_channels = self.hidden_channel, 119 | out_channels = self.out_channel, 120 | num_layers = self.num_layers, 121 | dropout = self.dropout, 122 | K = self.K) 123 | self.NodeFeatEmbed = MLP([self.node_feat_size, self.in_channel], dropout = self.dropout) 124 | self.TaskLayer = MLP(self.MLPChannels, dropout = self.dropout) 125 | 126 | self.ReadoutList = { 127 | 'Add': global_add_pool, 128 | 'Mean': global_mean_pool, 129 | 'Max': global_max_pool 130 | } 131 | self.readout = self.ReadoutList[opt.args['SGCReadout']] 132 | 133 | def forward(self, Input): 134 | # Input: Batch data of PyG 135 | Input = Input.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu')) 136 | x = self.NodeFeatEmbed(Input.x) 137 | x = self.SGC(x, Input.edge_index) 138 | x = self.readout(x, Input.batch) 139 | if not self.FeatureExtractor: 140 | x = self.TaskLayer(x) 141 | return x -------------------------------------------------------------------------------- /ACNet/Models/CMPNN/CMPNNModel.py: -------------------------------------------------------------------------------- 1 | import torch as t 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from Models.CMPNN.nn_utils import get_activation_function, index_select_ND 5 | from Models.CMPNN.CMPNNFeaturizer import mol2graph,get_atom_fdim, get_bond_fdim 6 | import math 7 | 8 | class CommunicateKernel(nn.Module): 9 | def __init__(self, opt): 10 | super(CommunicateKernel, self).__init__() 11 | self.opt = opt 12 | self.kernel = self.opt.args['CommunicateKernel'] 13 | 14 | # if self.kernel == 'MultilayerPerception': 15 | # self.linear = nn.Linear() 16 | 17 | def forward(self, hidden, agg_message): 18 | # hidden: h^{k-1} (v) 19 | # agg_message: m^k (v) 20 | 21 | if self.kernel == 'Add': 22 | return hidden + agg_message 23 | # elif self.opt.args['CommunicateKernel'] == 'MultilayerPerception': 24 | 25 | class MPNLayer(nn.Module): 26 | def __init__(self, opt): 27 | super(MPNLayer, self).__init__() 28 | self.opt = opt 29 | self.hidden_size = self.opt.args['FPSize'] 30 | self.W_bond = nn.Linear(self.hidden_size, self.hidden_size) 31 | self.dropout_layer = nn.Dropout(p=self.opt.args['DropRate']) 32 | self.act_func = get_activation_function(opt) 33 | self.communicate_kernel = CommunicateKernel(opt) 34 | 35 | def forward(self, message_atom, message_bond, a2b, b2a, b2revb, input_bond): 36 | # message_atom: h^{k-1} (v) 37 | # message_bond: h^{k-1} (e_{wv}) 38 | # a2b, b2a, b2revb are index to find neighbors 39 | # input_bond: h^0 (e_{vw}) 40 | # nodes 41 | agg_message = index_select_ND(message_bond, a2b) 42 | agg_message = self.MessageBooster(agg_message) 43 | message_atom = self.communicate_kernel(message_atom, agg_message) 44 | 45 | # edges 46 | rev_message = message_bond[b2revb] 47 | message_bond = message_atom[b2a] - rev_message 48 | message_bond = self.W_bond(message_bond) 49 | message_bond = self.dropout_layer(self.act_func(input_bond + message_bond)) 50 | 51 | return message_atom, message_bond 52 | 53 | def MessageBooster(self, agg_message): 54 | return agg_message.sum(dim=1) * agg_message.max(dim=1)[0] 55 | 56 | class BatchGRU(nn.Module): 57 | def __init__(self, hidden_size): 58 | super(BatchGRU, self).__init__() 59 | self.hidden_size = hidden_size 60 | self.gru = nn.GRU(self.hidden_size, self.hidden_size, batch_first = True, 61 | bidirectional = True) 62 | self.bias = nn.Parameter(t.Tensor(self.hidden_size)) 63 | self.bias.data.uniform_(-1.0 / math.sqrt(self.hidden_size), 64 | 1.0 / math.sqrt(self.hidden_size)) 65 | 66 | def forward(self, node, a_scope): 67 | # 输入:node为一个batch的大图中所有节点v的features, a_scope为这个batch的大图中,哪些节点隶属于一个mol 68 | # 69 | hidden = node 70 | message = F.relu(node + self.bias) # 节点信息加了一个偏置以后过relu激活(线性系数为1) 71 | MAX_atom_len = max([a_size for a_start, a_size in a_scope]) # 最大的原子数量 72 | # padding 73 | message_lst = [] 74 | hidden_lst = [] 75 | 76 | for i, (a_start, a_size) in enumerate(a_scope): 77 | if a_size == 0: 78 | assert 0 79 | cur_message = message.narrow(0, a_start, a_size) 80 | # torch.Tensor.narrow函数的功能是从第dimension维中,从start开始,选取length个,得到切片 81 | cur_hidden = hidden.narrow(0, a_start, a_size) 82 | # message和hidden的区别:hidden是K层以后得到的各个节点的feature,message是加了偏置并激活以后的feature 83 | 84 | hidden_lst.append(cur_hidden.max(0)[0].unsqueeze(0).unsqueeze(0)) 85 | # cur_hidden的尺寸应该是[a_size, hidden_size] 86 | # cur_hidden.max(0)[0]的结果是返回cur_hidden中,feature各个元素在不同atom上的最大值,返回尺寸[hidden_size] 87 | # 两次unsquezze(0)以后的尺寸为[1,1,hidden_size],append到list中 88 | 89 | cur_message = t.nn.ZeroPad2d((0, 0, 0, MAX_atom_len - cur_message.shape[0]))(cur_message) 90 | # 这句话就是简单的填充。把所有的cur_message,按照最大原子数填充一致 91 | # 从[a_size, hidden_size]填充为[max_atom_len, hidden_size] 92 | message_lst.append(cur_message.unsqueeze(0)) 93 | # unsqueeze成[1,max_atom_len,hidden_size]后,append到list中 94 | 95 | message_lst = t.cat(message_lst, 0) 96 | hidden_lst = t.cat(hidden_lst, 1) 97 | # 把两个list转化为两个tensor。list的长度均为batch_size 98 | # message_lst的尺寸为[batch_size, max_atom_len, hidden_size] 99 | # hidden_lst的尺寸为[1,batch_size, hidden_size] 100 | hidden_lst = hidden_lst.repeat(2, 1, 1) # [2,batch_size,hidden_size] 101 | cur_message, cur_hidden = self.gru(message_lst, hidden_lst) 102 | # 这里,GRU的输入,input为message_lst,h_0为hidden_lst。也就是说,K层MPN提出来的node embedding是GRU的初始隐变量 103 | # 而message_lst则是序列化的输入特征 104 | # 这里gru只有一层,但是因为选择了bidirectional,所以要把h复制成2. 105 | 106 | # 所以可以明确,GRU的作用就是,将一个分子中的各个原子的feature,按照序列输入到GRU,GRU将融合这个序列前后的其它各个原子的信息,对这个原子的信息进行更新 107 | # 这个GRU接收的序列,是一个分子中的各个原子组成的序列,因此并不是"不同层的信息更新",也没有包含有拓扑信息, 108 | # 因为这个序列只是按照原子序号进行组合的,并没有考虑拓扑 109 | # 所以这个GRU的作用值得商榷 110 | 111 | # unpadding 112 | cur_message_unpadding = [] 113 | for i, (a_start, a_size) in enumerate(a_scope): 114 | cur_message_unpadding.append(cur_message[i, :a_size].view(-1, 2 * self.hidden_size)) 115 | cur_message_unpadding = t.cat(cur_message_unpadding, 0) 116 | 117 | message = t.cat([t.cat([message.narrow(0, 0, 1), message.narrow(0, 0, 1)], 1), 118 | cur_message_unpadding], 0) 119 | return message 120 | 121 | 122 | ####################################################################################### 123 | 124 | class MPNEncoder(nn.Module): 125 | def __init__(self, opt, atom_fdim, bond_fdim): 126 | super(MPNEncoder, self).__init__() 127 | self.opt = opt 128 | self.atom_fdim = atom_fdim 129 | self.bond_fdim = bond_fdim 130 | self.hidden_size = opt.args['FPSize'] 131 | #self.bias = opt.args['bias'] 132 | self.depth = opt.args['CMPNNLayers'] 133 | 134 | # print(f"atom dim:{self.atom_fdim}") 135 | # print(f"hidden size: {self.hidden_size}") 136 | self.W_i_atom = nn.Linear(self.atom_fdim, self.hidden_size) 137 | self.W_i_bond = nn.Linear(self.bond_fdim, self.hidden_size) 138 | 139 | self.MPNLayers = nn.ModuleList() 140 | for k in range(self.depth - 1): 141 | self.MPNLayers.append(MPNLayer(opt)) 142 | 143 | self.lr = nn.Linear(self.hidden_size*3, self.hidden_size) 144 | self.gru = BatchGRU(hidden_size = self.hidden_size) 145 | 146 | self.W_o = nn.Linear(self.hidden_size*2, self.hidden_size) 147 | self.act_func = get_activation_function(opt) 148 | self.dropout_layer = nn.Dropout(p=self.opt.args['DropRate']) 149 | 150 | 151 | def forward(self, input): 152 | f_atoms, f_bonds, a2b, b2a, b2revb, a_scope, b_scope, bonds = self._unpack_inputs(input) 153 | 154 | # Input feature transform 155 | #print(f_atoms[0]) 156 | #print(f_atoms.size()) 157 | input_atom = self.W_i_atom(f_atoms) 158 | #print(input_atom.size()) 159 | input_atom = self.act_func(input_atom) 160 | message_atom = input_atom.clone() 161 | 162 | #print(f_bonds) 163 | #print(f_bonds.size()) 164 | #print(self.W_i_bond) 165 | input_bond = self.W_i_bond(f_bonds) 166 | input_bond = self.act_func(input_bond) 167 | message_bond = input_bond.clone() 168 | 169 | # Message Passing 170 | for layer in self.MPNLayers: 171 | message_atom, message_bond = layer(message_atom, message_bond, a2b,b2a,b2revb,input_bond) 172 | 173 | agg_message = index_select_ND(message_bond, a2b) 174 | agg_message = self.MessageBooster(agg_message) 175 | 176 | agg_message = self.lr(t.cat([agg_message, message_atom, input_atom], 1)) 177 | 178 | agg_message = self.gru(agg_message, a_scope) 179 | 180 | atom_hiddens = self.act_func(self.W_o(agg_message)) 181 | atom_hiddens = self.dropout_layer(atom_hiddens) 182 | 183 | # Readout 184 | mol_vecs = [] 185 | for i, (a_start, a_size) in enumerate(a_scope): 186 | if a_size == 0: 187 | assert 0 188 | cur_hiddens = atom_hiddens.narrow(0, a_start, a_size) 189 | mol_vecs.append(cur_hiddens.mean(0)) 190 | mol_vecs = t.stack(mol_vecs, dim=0) 191 | 192 | return mol_vecs 193 | 194 | def MessageBooster(self, agg_message): 195 | return agg_message.sum(dim=1) * agg_message.max(dim=1)[0] 196 | 197 | def _unpack_inputs(self, input): 198 | f_atoms, f_bonds, a2b, b2a, b2revb, a_scope, b_scope, bonds = input.get_components() 199 | #print(f_bonds) 200 | #print(f_bonds.size()) 201 | #print(self.opt.args['CUDA_VISIBLE_DEVICES']) 202 | f_atoms, f_bonds, a2b, b2a, b2revb = ( 203 | f_atoms.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu')), 204 | f_bonds.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu')), 205 | a2b.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu')), 206 | b2a.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu')), 207 | b2revb.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu'))) 208 | 209 | return f_atoms, f_bonds, a2b, b2a, b2revb, a_scope, b_scope, bonds 210 | 211 | class MPN(nn.Module): 212 | def __init__(self, opt): 213 | super(MPN, self).__init__() 214 | self.opt = opt 215 | self.atom_fdim = get_atom_fdim() 216 | self.bond_fdim = get_bond_fdim() + (not opt.args['atom_messages']) * self.atom_fdim 217 | self.encoder = MPNEncoder(self.opt, self.atom_fdim, self.bond_fdim) 218 | 219 | def forward(self, input): 220 | input = mol2graph(input, self.opt) 221 | output = self.encoder.forward(input) 222 | 223 | return output 224 | 225 | ###################################################################################### 226 | 227 | 228 | class CMPNNModel(nn.Module): 229 | # A CMPNN Model includes a message passing network following by a FCN. 230 | 231 | def __init__(self, classification: bool, multiclass: bool, opt): 232 | super(CMPNNModel, self).__init__() 233 | 234 | self.classification = classification 235 | if self.classification: 236 | self.sigmoid = nn.Sigmoid() 237 | self.multiclass = multiclass 238 | if self.multiclass: 239 | self.multiclass_softmax = nn.Softmax(dim=2) 240 | assert not (self.classification and self.multiclass) 241 | 242 | self.opt = opt 243 | self.hidden_size = opt.args['FPSize'] 244 | self.num_classes = opt.args['ClassNum'] 245 | self.dataset_type = opt.args['dataset_type'] 246 | self.output_size = opt.args['OutputSize'] 247 | self.ffn_hidden_size = opt.args['ffn_hidden_size'] 248 | 249 | self.only_extract_feature = opt.args['only_extract_feature'] 250 | 251 | if self.dataset_type == 'multicalss': 252 | self.multiclass == True 253 | 254 | self.create_encoder() 255 | self.create_ffn() 256 | 257 | def create_encoder(self): 258 | self.encoder = MPN(self.opt) 259 | 260 | def create_ffn(self): 261 | first_linear_dim = self.hidden_size * 1 262 | 263 | dropout = nn.Dropout(self.opt.args['DropRate']) 264 | activation = get_activation_function(self.opt) 265 | 266 | # Create FNN Layers 267 | if self.opt.args['ffn_num_layers'] == 1: 268 | ffn = [ 269 | dropout, 270 | nn.Linear(first_linear_dim, self.output_size) 271 | ] 272 | else: 273 | ffn = [ 274 | dropout, 275 | nn.Linear(first_linear_dim, self.ffn_hidden_size) 276 | ] 277 | for _ in range(self.opt.args['ffn_num_layers'] - 2): 278 | ffn.extend([ 279 | activation, 280 | dropout, 281 | nn.Linear(self.ffn_hidden_size, self.ffn_hidden_size) 282 | ]) 283 | ffn.extend([ 284 | activation, 285 | dropout, 286 | nn.Linear(self.ffn_hidden_size, self.output_size) 287 | ]) 288 | 289 | self.ffn = nn.Sequential(*ffn) 290 | 291 | def forward(self, input): 292 | # An encoder to extract information of a graph 293 | # and a FCN as task layer to make prediction 294 | # output = self.ffn(self.encoder(input)) 295 | # print(input) 296 | output = self.encoder(input) 297 | if self.only_extract_feature: 298 | # print(f"size of output is: {output.size()}") 299 | return output 300 | 301 | # self.ffn has the same function with the DNN classifier model 302 | # If we only need to extract features, ffn is not needed. 303 | output = self.ffn(output) 304 | # output layer 305 | if self.classification and not self.training: 306 | output = self.sigmoid(output) 307 | if self.multiclass: 308 | output = output.reshape((output.size(0),-1, self.num_classes)) 309 | if not self.training: 310 | output = self.multiclass_softmax(output) 311 | 312 | return output 313 | 314 | -------------------------------------------------------------------------------- /ACNet/Models/CMPNN/nn_utils.py: -------------------------------------------------------------------------------- 1 | import math 2 | from typing import List, Union 3 | 4 | import numpy as np 5 | import torch 6 | import torch.nn as nn 7 | from torch.optim import Optimizer 8 | from torch.optim.lr_scheduler import _LRScheduler 9 | 10 | 11 | class NoamLR(_LRScheduler): 12 | """ 13 | Noam learning rate scheduler with piecewise linear increase and exponential decay. 14 | 15 | The learning rate increases linearly from init_lr to max_lr over the course of 16 | the first warmup_steps (where warmup_steps = warmup_epochs * steps_per_epoch). 17 | Then the learning rate decreases exponentially from max_lr to final_lr over the 18 | course of the remaining total_steps - warmup_steps (where total_steps = 19 | total_epochs * steps_per_epoch). This is roughly based on the learning rate 20 | schedule from Attention is All You Need, section 5.3 (https://arxiv.org/abs/1706.03762). 21 | """ 22 | def __init__(self, 23 | optimizer: Optimizer, 24 | warmup_epochs: List[Union[float, int]], 25 | total_epochs: List[int], 26 | steps_per_epoch: int, 27 | init_lr: List[float], 28 | max_lr: List[float], 29 | final_lr: List[float]): 30 | """ 31 | Initializes the learning rate scheduler. 32 | 33 | :param optimizer: A PyTorch optimizer. 34 | :param warmup_epochs: The number of epochs during which to linearly increase the learning rate. 35 | :param total_epochs: The total number of epochs. 36 | :param steps_per_epoch: The number of steps (batches) per epoch. 37 | :param init_lr: The initial learning rate. 38 | :param max_lr: The maximum learning rate (achieved after warmup_epochs). 39 | :param final_lr: The final learning rate (achieved after total_epochs). 40 | """ 41 | assert len(optimizer.param_groups) == len(warmup_epochs) == len(total_epochs) == len(init_lr) == \ 42 | len(max_lr) == len(final_lr) 43 | 44 | self.num_lrs = len(optimizer.param_groups) 45 | 46 | self.optimizer = optimizer 47 | self.warmup_epochs = np.array(warmup_epochs) 48 | self.total_epochs = np.array(total_epochs) 49 | self.steps_per_epoch = steps_per_epoch 50 | self.init_lr = np.array(init_lr) 51 | self.max_lr = np.array(max_lr) 52 | self.final_lr = np.array(final_lr) 53 | 54 | self.current_step = 0 55 | self.lr = init_lr 56 | self.warmup_steps = (self.warmup_epochs * self.steps_per_epoch).astype(int) 57 | self.total_steps = self.total_epochs * self.steps_per_epoch 58 | self.linear_increment = (self.max_lr - self.init_lr) / self.warmup_steps 59 | 60 | self.exponential_gamma = (self.final_lr / self.max_lr) ** (1 / (self.total_steps - self.warmup_steps)) 61 | 62 | super(NoamLR, self).__init__(optimizer) 63 | 64 | def get_lr(self) -> List[float]: 65 | """Gets a list of the current learning rates.""" 66 | return list(self.lr) 67 | 68 | def step(self, current_step: int = None): 69 | """ 70 | Updates the learning rate by taking a step. 71 | 72 | :param current_step: Optionally specify what step to set the learning rate to. 73 | If None, current_step = self.current_step + 1. 74 | """ 75 | if current_step is not None: 76 | self.current_step = current_step 77 | else: 78 | self.current_step += 1 79 | 80 | for i in range(self.num_lrs): 81 | if self.current_step <= self.warmup_steps[i]: 82 | self.lr[i] = self.init_lr[i] + self.current_step * self.linear_increment[i] 83 | elif self.current_step <= self.total_steps[i]: 84 | self.lr[i] = self.max_lr[i] * (self.exponential_gamma[i] ** (self.current_step - self.warmup_steps[i])) 85 | else: # theoretically this case should never be reached since training should stop at total_steps 86 | self.lr[i] = self.final_lr[i] 87 | 88 | self.optimizer.param_groups[i]['lr'] = self.lr[i] 89 | 90 | 91 | def get_activation_function(opt) -> nn.Module: 92 | """ 93 | Gets an activation function module given the name of the activation. 94 | 95 | :param activation: The name of the activation function. 96 | :return: The activation function module. 97 | """ 98 | activation = opt.args['activation'] 99 | 100 | if activation == 'ReLU': 101 | return nn.ReLU() 102 | elif activation == 'LeakyReLU': 103 | return nn.LeakyReLU(0.1) 104 | elif activation == 'PReLU': 105 | return nn.PReLU() 106 | elif activation == 'tanh': 107 | return nn.Tanh() 108 | elif activation == 'SELU': 109 | return nn.SELU() 110 | elif activation == 'ELU': 111 | return nn.ELU() 112 | else: 113 | raise ValueError(f'Activation "{activation}" not supported.') 114 | 115 | 116 | def initialize_weights(model: nn.Module): 117 | """ 118 | Initializes the weights of a model in place. 119 | 120 | :param model: An nn.Module. 121 | """ 122 | for param in model.parameters(): 123 | if param.dim() == 1: 124 | nn.init.constant_(param, 0) 125 | else: 126 | nn.init.xavier_normal_(param) 127 | 128 | 129 | def index_select_ND(source: torch.Tensor, index: torch.Tensor) -> torch.Tensor: 130 | """ 131 | Selects the message features from source corresponding to the atom or bond indices in index. 132 | 133 | :param source: A tensor of shape (num_bonds, hidden_size) containing message features. 134 | :param index: A tensor of shape (num_atoms/num_bonds, max_num_bonds) containing the atom or bond 135 | indices to select from source. 136 | :return: A tensor of shape (num_atoms/num_bonds, max_num_bonds, hidden_size) containing the message 137 | features corresponding to the atoms/bonds specified in index. 138 | """ 139 | index_size = index.size() # (num_atoms/num_bonds, max_num_bonds) 140 | suffix_dim = source.size()[1:] # (hidden_size,) 141 | final_size = index_size + suffix_dim # (num_atoms/num_bonds, max_num_bonds, hidden_size) 142 | 143 | target = source.index_select(dim = 0, index = index.view(-1)) # (num_atoms/num_bonds * max_num_bonds, hidden_size) 144 | target = target.view(final_size) # (num_atoms/num_bonds, max_num_bonds, hidden_size) 145 | 146 | target[index == 0] = 0 147 | return target -------------------------------------------------------------------------------- /ACNet/Models/ClassifierModel.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | class DNN(nn.Module): 4 | def __init__(self, input_size, layer_sizes, output_size, opt): 5 | super(DNN, self).__init__() 6 | self.output_size = output_size 7 | self.opt = opt 8 | self.LayerList = nn.ModuleList() 9 | if len(layer_sizes) == 0: 10 | self.FC = nn.Linear(input_size, output_size) 11 | else: 12 | for i in range(len(layer_sizes)): 13 | if i == 0: 14 | self.LayerList.append(nn.Linear(input_size, layer_sizes[i])) 15 | else: 16 | self.LayerList.append(nn.Linear(layer_sizes[i-1], layer_sizes[i])) 17 | self.LayerList.append(nn.ReLU()) 18 | self.Output = nn.Linear(layer_sizes[-1], output_size) 19 | self.layer_sizes = layer_sizes 20 | self.Drop = nn.Dropout(p=self.opt.args['DropRate']) 21 | self.Softmax = nn.Softmax(dim=1) 22 | 23 | def forward(self, x): 24 | if len(self.layer_sizes) == 0: 25 | x = self.FC(x) 26 | if self.opt.args['ClassNum'] != 1: 27 | if not self.training: 28 | # print(f"x size: {x.size()}") 29 | x = self.Softmax(x) 30 | else: 31 | for layer in self.LayerList: 32 | x = layer(x) 33 | x = self.Drop(x) 34 | x = self.Output(x) 35 | if self.opt.args['ClassNum'] != 1: 36 | if not self.training: 37 | # print(f"x size: {x.size()}") 38 | x = self.Softmax(x) 39 | 40 | return x -------------------------------------------------------------------------------- /ACNet/Models/Graphormer/algos.cpython-37m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DrugAI/ACNet/1a4902c46f8a0bf97a0f8494d45989b81e053faa/ACNet/Models/Graphormer/algos.cpython-37m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /ACNet/Models/Graphormer/algos.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DrugAI/ACNet/1a4902c46f8a0bf97a0f8494d45989b81e053faa/ACNet/Models/Graphormer/algos.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /ACNet/Models/Graphormer/algos.pyx: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import cython 5 | from cython.parallel cimport prange, parallel 6 | cimport numpy 7 | import numpy 8 | 9 | def floyd_warshall(adjacency_matrix): 10 | 11 | (nrows, ncols) = adjacency_matrix.shape 12 | assert nrows == ncols 13 | cdef unsigned int n = nrows 14 | 15 | adj_mat_copy = adjacency_matrix.astype(long, order='C', casting='safe', copy=True) 16 | assert adj_mat_copy.flags['C_CONTIGUOUS'] 17 | cdef numpy.ndarray[long, ndim=2, mode='c'] M = adj_mat_copy 18 | cdef numpy.ndarray[long, ndim=2, mode='c'] path = numpy.zeros([n, n], dtype=numpy.int64) 19 | 20 | cdef unsigned int i, j, k 21 | cdef long M_ij, M_ik, cost_ikkj 22 | cdef long* M_ptr = &M[0,0] 23 | cdef long* M_i_ptr 24 | cdef long* M_k_ptr 25 | 26 | # set unreachable nodes distance to 510 27 | for i in range(n): 28 | for j in range(n): 29 | if i == j: 30 | M[i][j] = 0 31 | elif M[i][j] == 0: 32 | M[i][j] = 510 33 | 34 | # floyed algo 35 | for k in range(n): 36 | M_k_ptr = M_ptr + n*k 37 | for i in range(n): 38 | M_i_ptr = M_ptr + n*i 39 | M_ik = M_i_ptr[k] 40 | for j in range(n): 41 | cost_ikkj = M_ik + M_k_ptr[j] 42 | M_ij = M_i_ptr[j] 43 | if M_ij > cost_ikkj: 44 | M_i_ptr[j] = cost_ikkj 45 | path[i][j] = k 46 | # Path[i][j] means, if want go from i to j, traveler should go to k first. Then, from k to j. 47 | 48 | # set unreachable path to 510 49 | for i in range(n): 50 | for j in range(n): 51 | if M[i][j] >= 510: 52 | path[i][j] = 510 53 | M[i][j] = 510 54 | 55 | return M, path 56 | 57 | 58 | def get_all_edges(path, i, j): 59 | cdef unsigned int k = path[i][j] 60 | if k == 0: 61 | return [] 62 | else: 63 | return get_all_edges(path, i, k) + [k] + get_all_edges(path, k, j) 64 | # returns a list with all passing nodes from i to j in the SP 65 | 66 | def gen_edge_input(max_dist, path, edge_feat): 67 | 68 | (nrows, ncols) = path.shape 69 | assert nrows == ncols 70 | cdef unsigned int n = nrows 71 | cdef unsigned int max_dist_copy = max_dist 72 | 73 | path_copy = path.astype(long, order='C', casting='safe', copy=True) 74 | edge_feat_copy = edge_feat.astype(long, order='C', casting='safe', copy=True) 75 | assert path_copy.flags['C_CONTIGUOUS'] 76 | assert edge_feat_copy.flags['C_CONTIGUOUS'] 77 | 78 | cdef numpy.ndarray[long, ndim=4, mode='c'] edge_fea_all = -1 * numpy.ones([n, n, max_dist_copy, edge_feat.shape[-1]], dtype=numpy.int64) 79 | cdef unsigned int i, j, k, num_path, cur 80 | 81 | for i in range(n): 82 | for j in range(n): 83 | if i == j: 84 | continue 85 | if path_copy[i][j] == 510: 86 | continue 87 | path = [i] + get_all_edges(path_copy, i, j) + [j] 88 | # path: [i, k1, k2, k3, ..., j] 89 | # so the number of edges passing in this path is len(path) - 1 90 | num_path = len(path) - 1 91 | for k in range(num_path): 92 | edge_fea_all[i, j, k, :] = edge_feat_copy[path[k], path[k+1], :] 93 | 94 | # edge_fea_all: [node_num, node_num, max_distance(N), edge_feat_num] 95 | # it stores all of the edge_attr of edges passing from node i to node j. 96 | return edge_fea_all 97 | -------------------------------------------------------------------------------- /ACNet/Models/Graphormer/build/temp.linux-x86_64-3.7/algos.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DrugAI/ACNet/1a4902c46f8a0bf97a0f8494d45989b81e053faa/ACNet/Models/Graphormer/build/temp.linux-x86_64-3.7/algos.o -------------------------------------------------------------------------------- /ACNet/Models/Graphormer/build/temp.linux-x86_64-3.8/algos.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DrugAI/ACNet/1a4902c46f8a0bf97a0f8494d45989b81e053faa/ACNet/Models/Graphormer/build/temp.linux-x86_64-3.8/algos.o -------------------------------------------------------------------------------- /ACNet/Models/Graphormer/collator.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import torch 5 | 6 | 7 | def pad_1d_unsqueeze(x, padlen): 8 | x = x + 1 # pad id = 0 9 | xlen = x.size(0) 10 | if xlen < padlen: 11 | new_x = x.new_zeros([padlen], dtype=x.dtype) 12 | new_x[:xlen] = x 13 | x = new_x 14 | return x.unsqueeze(0) 15 | 16 | 17 | def pad_2d_unsqueeze(x, padlen): 18 | x = x + 1 # pad id = 0 19 | xlen, xdim = x.size() 20 | if xlen < padlen: 21 | new_x = x.new_zeros([padlen, xdim], dtype=x.dtype) 22 | new_x[:xlen, :] = x 23 | x = new_x 24 | return x.unsqueeze(0) 25 | 26 | 27 | def pad_attn_bias_unsqueeze(x, padlen): 28 | xlen = x.size(0) 29 | if xlen < padlen: 30 | new_x = x.new_zeros( 31 | [padlen, padlen], dtype=x.dtype).fill_(float('-inf')) 32 | new_x[:xlen, :xlen] = x 33 | new_x[xlen:, :xlen] = 0 34 | x = new_x 35 | return x.unsqueeze(0) 36 | 37 | 38 | def pad_edge_type_unsqueeze(x, padlen): 39 | xlen = x.size(0) 40 | if xlen < padlen: 41 | new_x = x.new_zeros([padlen, padlen, x.size(-1)], dtype=x.dtype) 42 | new_x[:xlen, :xlen, :] = x 43 | x = new_x 44 | return x.unsqueeze(0) 45 | 46 | 47 | def pad_spatial_pos_unsqueeze(x, padlen): 48 | x = x + 1 49 | xlen = x.size(0) 50 | if xlen < padlen: 51 | new_x = x.new_zeros([padlen, padlen], dtype=x.dtype) 52 | new_x[:xlen, :xlen] = x 53 | x = new_x 54 | return x.unsqueeze(0) 55 | 56 | 57 | def pad_3d_unsqueeze(x, padlen1, padlen2, padlen3): 58 | x = x + 1 59 | xlen1, xlen2, xlen3, xlen4 = x.size() 60 | if xlen1 < padlen1 or xlen2 < padlen2 or xlen3 < padlen3: 61 | new_x = x.new_zeros([padlen1, padlen2, padlen3, xlen4], dtype=x.dtype) 62 | new_x[:xlen1, :xlen2, :xlen3, :] = x 63 | x = new_x 64 | return x.unsqueeze(0) 65 | 66 | 67 | class Batch(): 68 | def __init__(self, idx, attn_bias, attn_edge_type, spatial_pos, in_degree, out_degree, x, edge_input, y): 69 | super(Batch, self).__init__() 70 | self.idx = idx 71 | self.in_degree, self.out_degree = in_degree, out_degree 72 | self.x, self.y = x, y 73 | self.attn_bias, self.attn_edge_type, self.spatial_pos = attn_bias, attn_edge_type, spatial_pos 74 | self.edge_input = edge_input 75 | 76 | def to(self, device): 77 | self.idx = self.idx.to(device) 78 | self.in_degree, self.out_degree = self.in_degree.to( 79 | device), self.out_degree.to(device) 80 | self.x, self.y = self.x.to(device), self.y.to(device) 81 | self.attn_bias, self.attn_edge_type, self.spatial_pos = self.attn_bias.to( 82 | device), self.attn_edge_type.to(device), self.spatial_pos.to(device) 83 | self.edge_input = self.edge_input.to(device) 84 | return self 85 | 86 | def __len__(self): 87 | return self.in_degree.size(0) 88 | 89 | def collator(items, max_node, multi_hop_max_dist, spatial_pos_max): 90 | data = [] 91 | for item in items: 92 | [x, 93 | adj, 94 | attn_bias, 95 | attn_edge_type, 96 | spatial_pos, 97 | in_degree, 98 | out_degree, 99 | edge_input, 100 | label, 101 | idx]= item 102 | if item is not None and x.size(0) <= max_node: 103 | edge_input = edge_input[:, :, :multi_hop_max_dist, :] 104 | item = [attn_bias, 105 | attn_edge_type, 106 | spatial_pos, 107 | in_degree, 108 | out_degree, 109 | x, 110 | edge_input, 111 | label, 112 | idx] 113 | data.append(item) 114 | attn_biases, attn_edge_types, spatial_poses, in_degrees, out_degrees, xs, edge_inputs, ys, idxs = zip( 115 | *data) 116 | for idx, _ in enumerate(attn_biases): 117 | attn_biases[idx][1:, 1:][spatial_poses[idx] >= spatial_pos_max] = float('-inf') 118 | max_node_num = max(i.size(0) for i in xs) 119 | max_dist = max(i.size(-2) for i in edge_inputs) 120 | 121 | y = torch.cat(ys) 122 | 123 | x = torch.cat([pad_2d_unsqueeze(i, max_node_num) for i in xs]) 124 | edge_input = torch.cat([pad_3d_unsqueeze( 125 | i, max_node_num, max_node_num, max_dist) for i in edge_inputs]) 126 | attn_bias = torch.cat([pad_attn_bias_unsqueeze( 127 | i, max_node_num + 1) for i in attn_biases]) 128 | attn_edge_type = torch.cat( 129 | [pad_edge_type_unsqueeze(i, max_node_num) for i in attn_edge_types]) 130 | spatial_pos = torch.cat([pad_spatial_pos_unsqueeze(i, max_node_num) 131 | for i in spatial_poses]) 132 | in_degree = torch.cat([pad_1d_unsqueeze(i, max_node_num) 133 | for i in in_degrees]) 134 | out_degree = torch.cat([pad_1d_unsqueeze(i, max_node_num) 135 | for i in out_degrees]) 136 | 137 | #generate batch_data 138 | return Batch( 139 | idx=torch.LongTensor(idxs), 140 | attn_bias=attn_bias, 141 | attn_edge_type=attn_edge_type, 142 | spatial_pos=spatial_pos, 143 | in_degree=in_degree, 144 | out_degree=out_degree, 145 | x=x, 146 | edge_input=edge_input, 147 | y=y, 148 | ) 149 | -------------------------------------------------------------------------------- /ACNet/Models/Graphormer/data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from Models.Graphormer.collator import collator 5 | from Models.Graphormer.wrapper import MyPygPCQM4MDataset, MyGraphPropPredDataset, MyZINCDataset 6 | 7 | from pytorch_lightning import LightningDataModule 8 | import torch 9 | from torch.nn import functional as F 10 | from torch.utils.data import DataLoader 11 | import ogb 12 | import ogb.lsc 13 | import ogb.graphproppred 14 | from functools import partial 15 | 16 | 17 | dataset = None 18 | 19 | 20 | def get_dataset(dataset_name='abaaba'): 21 | global dataset 22 | if dataset is not None: 23 | return dataset 24 | 25 | # max_node is set to max(max(num_val_graph_nodes), max(num_test_graph_nodes)) 26 | if dataset_name == 'ogbg-molpcba': 27 | dataset = { 28 | 'num_class': 128, 29 | 'loss_fn': F.binary_cross_entropy_with_logits, 30 | 'metric': 'ap', 31 | 'metric_mode': 'max', 32 | 'evaluator': ogb.graphproppred.Evaluator('ogbg-molpcba'), 33 | 'dataset': MyGraphPropPredDataset('ogbg-molpcba', root='../../dataset'), 34 | 'max_node': 128, 35 | } 36 | elif dataset_name == 'ogbg-molhiv': 37 | dataset = { 38 | 'num_class': 1, 39 | 'loss_fn': F.binary_cross_entropy_with_logits, 40 | 'metric': 'rocauc', 41 | 'metric_mode': 'max', 42 | 'evaluator': ogb.graphproppred.Evaluator('ogbg-molhiv'), 43 | 'dataset': MyGraphPropPredDataset('ogbg-molhiv', root='../../dataset'), 44 | 'max_node': 128, 45 | } 46 | elif dataset_name == 'PCQM4M-LSC': 47 | dataset = { 48 | 'num_class': 1, 49 | 'loss_fn': F.l1_loss, 50 | 'metric': 'mae', 51 | 'metric_mode': 'min', 52 | 'evaluator': ogb.lsc.PCQM4MEvaluator(), 53 | 'dataset': MyPygPCQM4MDataset(root='../../dataset'), 54 | 'max_node': 128, 55 | } 56 | elif dataset_name == 'ZINC': 57 | dataset = { 58 | 'num_class': 1, 59 | 'loss_fn': F.l1_loss, 60 | 'metric': 'mae', 61 | 'metric_mode': 'min', 62 | 'evaluator': ogb.lsc.PCQM4MEvaluator(), # same objective function, so reuse it 63 | 'train_dataset': MyZINCDataset(subset=True, root='../../dataset/pyg_zinc', split='train'), 64 | 'valid_dataset': MyZINCDataset(subset=True, root='../../dataset/pyg_zinc', split='val'), 65 | 'test_dataset': MyZINCDataset(subset=True, root='../../dataset/pyg_zinc', split='test'), 66 | 'max_node': 128, 67 | } 68 | 69 | else: 70 | raise NotImplementedError 71 | 72 | print(f' > {dataset_name} loaded!') 73 | print(dataset) 74 | print(f' > dataset info ends') 75 | return dataset 76 | 77 | 78 | class GraphDataModule(LightningDataModule): 79 | name = "OGB-GRAPH" 80 | 81 | def __init__( 82 | self, 83 | dataset_name: str = 'ogbg-molpcba', 84 | num_workers: int = 4, 85 | batch_size: int = 128, 86 | seed: int = 42, 87 | multi_hop_max_dist: int = 5, 88 | spatial_pos_max: int = 1024, 89 | *args, 90 | **kwargs, 91 | ): 92 | super().__init__(*args, **kwargs) 93 | self.dataset_name = dataset_name 94 | self.dataset = get_dataset(self.dataset_name) 95 | 96 | self.num_workers = num_workers 97 | self.batch_size = batch_size 98 | self.dataset_train = ... 99 | self.dataset_val = ... 100 | self.multi_hop_max_dist = multi_hop_max_dist 101 | self.spatial_pos_max = spatial_pos_max 102 | 103 | def setup(self, stage: str = None): 104 | if self.dataset_name == 'ZINC': 105 | self.dataset_train = self.dataset['train_dataset'] 106 | self.dataset_val = self.dataset['valid_dataset'] 107 | self.dataset_test = self.dataset['test_dataset'] 108 | else: 109 | split_idx = self.dataset['dataset'].get_idx_split() 110 | self.dataset_train = self.dataset['dataset'][split_idx["train"]] 111 | self.dataset_val = self.dataset['dataset'][split_idx["valid"]] 112 | self.dataset_test = self.dataset['dataset'][split_idx["test"]] 113 | 114 | def train_dataloader(self): 115 | loader = DataLoader( 116 | self.dataset_train, 117 | batch_size=self.batch_size, 118 | shuffle=True, 119 | num_workers=self.num_workers, 120 | pin_memory=True, 121 | collate_fn=partial(collator, max_node=get_dataset(self.dataset_name)[ 122 | 'max_node'], multi_hop_max_dist=self.multi_hop_max_dist, spatial_pos_max=self.spatial_pos_max), 123 | ) 124 | print('len(train_dataloader)', len(loader)) 125 | return loader 126 | 127 | def val_dataloader(self): 128 | loader = DataLoader( 129 | self.dataset_val, 130 | batch_size=self.batch_size, 131 | shuffle=False, 132 | num_workers=self.num_workers, 133 | pin_memory=False, 134 | collate_fn=partial(collator, max_node=get_dataset(self.dataset_name)[ 135 | 'max_node'], multi_hop_max_dist=self.multi_hop_max_dist, spatial_pos_max=self.spatial_pos_max), 136 | ) 137 | print('len(val_dataloader)', len(loader)) 138 | return loader 139 | 140 | def test_dataloader(self): 141 | loader = DataLoader( 142 | self.dataset_test, 143 | batch_size=self.batch_size, 144 | shuffle=False, 145 | num_workers=self.num_workers, 146 | pin_memory=False, 147 | collate_fn=partial(collator, max_node=get_dataset(self.dataset_name)[ 148 | 'max_node'], multi_hop_max_dist=self.multi_hop_max_dist, spatial_pos_max=self.spatial_pos_max), 149 | ) 150 | print('len(test_dataloader)', len(loader)) 151 | return loader 152 | -------------------------------------------------------------------------------- /ACNet/Models/Graphormer/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from Cython.Build import cythonize 3 | import numpy 4 | setup(name='Alogs app', 5 | ext_modules=cythonize('algos.pyx'), 6 | include_dirs=[numpy.get_include()]) 7 | -------------------------------------------------------------------------------- /ACNet/Models/Graphormer/wrapper.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import torch 5 | import numpy as np 6 | import torch_geometric.datasets 7 | from ogb.lsc.pcqm4m_pyg import PygPCQM4MDataset 8 | from ogb.graphproppred import PygGraphPropPredDataset 9 | import pyximport 10 | 11 | pyximport.install(setup_args={'include_dirs': np.get_include()}) 12 | from Models.Graphormer.algos import * 13 | 14 | 15 | def convert_to_single_emb(x, offset=512): 16 | feature_num = x.size(1) if len(x.size()) > 1 else 1 17 | feature_offset = 1 + \ 18 | torch.arange(0, feature_num * offset, offset, dtype=torch.long) 19 | x = x + feature_offset 20 | return x 21 | 22 | def preprocess_item(item): 23 | # edge_index [2,num_edges] 24 | # edge_attr [num_nodes, 3] 25 | # x: node_feature [num_nodes,9] 26 | # y: graph_label [num_graphs,1] 27 | edge_attr, edge_index, x = item.edge_attr, item.edge_index, item.x 28 | N = x.size(0) 29 | x = convert_to_single_emb(x) 30 | 31 | # node adj matrix [N, N] bool 32 | adj = torch.zeros([N, N], dtype=torch.bool) 33 | adj[edge_index[0, :], edge_index[1, :]] = True 34 | 35 | # edge feature here 36 | if len(edge_attr.size()) == 1: 37 | edge_attr = edge_attr[:, None] 38 | attn_edge_type = torch.zeros([N, N, edge_attr.size(-1)], dtype=torch.long) 39 | attn_edge_type[edge_index[0, :], edge_index[1, :] 40 | ] = convert_to_single_emb(edge_attr) + 1 41 | 42 | shortest_path_result, path = algos.floyd_warshall(adj.numpy()) 43 | max_dist = np.amax(shortest_path_result) 44 | edge_input = algos.gen_edge_input(max_dist, path, attn_edge_type.numpy()) 45 | spatial_pos = torch.from_numpy((shortest_path_result)).long() 46 | attn_bias = torch.zeros( 47 | [N + 1, N + 1], dtype=torch.float) # with graph token 48 | 49 | # combine 50 | item.x = x 51 | item.adj = adj 52 | item.attn_bias = attn_bias 53 | item.attn_edge_type = attn_edge_type 54 | item.spatial_pos = spatial_pos 55 | item.in_degree = adj.long().sum(dim=1).view(-1) 56 | item.out_degree = adj.long().sum(dim=0).view(-1) 57 | item.edge_input = torch.from_numpy(edge_input).long() 58 | 59 | return item 60 | 61 | 62 | class MyGraphPropPredDataset(PygGraphPropPredDataset): 63 | def download(self): 64 | super(MyGraphPropPredDataset, self).download() 65 | 66 | def process(self): 67 | super(MyGraphPropPredDataset, self).process() 68 | 69 | def __getitem__(self, idx): 70 | if isinstance(idx, int): 71 | item = self.get(self.indices()[idx]) 72 | item.idx = idx 73 | return preprocess_item(item) 74 | else: 75 | return self.index_select(idx) 76 | 77 | 78 | class MyPygPCQM4MDataset(PygPCQM4MDataset): 79 | def download(self): 80 | super(MyPygPCQM4MDataset, self).download() 81 | 82 | def process(self): 83 | super(MyPygPCQM4MDataset, self).process() 84 | 85 | def __getitem__(self, idx): 86 | if isinstance(idx, int): 87 | item = self.get(self.indices()[idx]) 88 | item.idx = idx 89 | return preprocess_item(item) 90 | else: 91 | return self.index_select(idx) 92 | 93 | 94 | class MyZINCDataset(torch_geometric.datasets.ZINC): 95 | def download(self): 96 | super(MyZINCDataset, self).download() 97 | 98 | def process(self): 99 | super(MyZINCDataset, self).process() 100 | 101 | def __getitem__(self, idx): 102 | if isinstance(idx, int): 103 | item = self.get(self.indices()[idx]) 104 | item.idx = idx 105 | return preprocess_item(item) 106 | else: 107 | return self.index_select(idx) 108 | -------------------------------------------------------------------------------- /ACNet/Pretrain8Few.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 13, 7 | 'OnlyEval': False, 8 | 'Finetune':True, 9 | } 10 | 11 | BasicParamList = { 12 | 'ExpName': 'ACFew', 13 | 'MainMetric': 'AUC', 14 | 'DataPath': './ACComponents/ACDataset/data_files/MMP_AC_Few_representation/Pretrain8.npz', 15 | 'RootPath': './TestExp/Few/Pretrain8/', 16 | 'CUDA_VISIBLE_DEVICES': '3', 17 | 'TaskNum': 1, 18 | 'ClassNum': 2, 19 | 'OutputSize': 2, 20 | 'Feature': 'Raw', 21 | 'Model': 'MLP', 22 | 23 | # if Feature == Raw 24 | 'RawFeatureSize': 512, 25 | 26 | 'OnlySpecific': True, 27 | 'Weight': True, 28 | 'AC': True, 29 | 'PyG': False, 30 | 31 | 'ValidRate': 40000, 32 | 'PrintRate': 5, 33 | 'UpdateRate': 1, 34 | 'SplitRate': [0.8, 0.1], 35 | 'Splitter': 'Random', 36 | 'MaxEpoch': 300, 37 | 'LowerThanMaxLimit': 12, 38 | 'DecreasingLimit': 8, 39 | 40 | # if OnlyEval == True: 41 | 'EvalModelPath': None, 42 | 'EvalDatasetPath': None, 43 | 'EvalLogAllPreds': None, 44 | 45 | 'Scheduler': 'PolynomialDecayLR', 46 | 47 | # Params for PolynomialDecayLR only 48 | 'WarmupEpoch': 2, 49 | 'LRMaxEpoch':300, 50 | 'EndLR':1e-9, 51 | 'Power':1.0, 52 | # Params for StepLR only 53 | 'LRStep': 30, 54 | 'LRGamma': 0.1, 55 | ########## 56 | 57 | 'WeightIniter': None, 58 | 59 | # Params for NormWeightIniter only 60 | 'InitMean' : 0, 61 | 'InitStd' : 1, 62 | 63 | # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here. 64 | 'SplitValidSeed': 8, 65 | 'SplitTestSeed': 8, 66 | 'BatchSize': 8, 67 | } 68 | AdjustableParamList = {} 69 | SpecificParamList = { 70 | 'DropRate':[0.4], 71 | 'WeightDecay':[5], 72 | 'lr':[3], 73 | 'DNNLayers':[[256,64]], 74 | } 75 | 76 | 77 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 78 | 79 | expcontroller.ExperimentStart() 80 | 81 | -------------------------------------------------------------------------------- /ACNet/PretrainGNNsFew.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 13, 7 | 'OnlyEval': False, 8 | 'Finetune':True, 9 | } 10 | 11 | BasicParamList = { 12 | 'ExpName': 'ACFew', 13 | 'MainMetric': 'AUC', 14 | 'DataPath': './ACComponents/ACDataset/data_files/MMP_AC_Few_representation/PretrainGNNs.npz', 15 | 'RootPath': './TestExp/Few/PretrainGNNs/', 16 | 'CUDA_VISIBLE_DEVICES': '3', 17 | 'TaskNum': 1, 18 | 'ClassNum': 2, 19 | 'OutputSize': 2, 20 | 'Feature': 'Raw', 21 | 'Model': 'MLP', 22 | 23 | # if Feature == Raw 24 | 'RawFeatureSize': 300, 25 | 26 | 'OnlySpecific': True, 27 | 'Weight': True, 28 | 'AC': True, 29 | 'PyG': False, 30 | 31 | 'ValidRate': 40000, 32 | 'PrintRate': 5, 33 | 'UpdateRate': 1, 34 | 'SplitRate': [0.8, 0.1], 35 | 'Splitter': 'Random', 36 | 'MaxEpoch': 300, 37 | 'LowerThanMaxLimit': 12, 38 | 'DecreasingLimit': 8, 39 | 40 | # if OnlyEval == True: 41 | 'EvalModelPath': None, 42 | 'EvalDatasetPath': None, 43 | 'EvalLogAllPreds': None, 44 | 45 | 'Scheduler': 'PolynomialDecayLR', 46 | 47 | # Params for PolynomialDecayLR only 48 | 'WarmupEpoch': 2, 49 | 'LRMaxEpoch':300, 50 | 'EndLR':1e-9, 51 | 'Power':1.0, 52 | # Params for StepLR only 53 | 'LRStep': 30, 54 | 'LRGamma': 0.1, 55 | ########## 56 | 57 | 'WeightIniter': None, 58 | 59 | # Params for NormWeightIniter only 60 | 'InitMean' : 0, 61 | 'InitStd' : 1, 62 | 63 | # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here. 64 | 'SplitValidSeed': 8, 65 | 'SplitTestSeed': 8, 66 | 'BatchSize': 8, 67 | 68 | } 69 | AdjustableParamList = {} 70 | SpecificParamList = { 71 | 'DropRate':[0.2], 72 | 'WeightDecay':[4.5], 73 | 'lr':[3], 74 | 'DNNLayers':[[128]], 75 | } 76 | 77 | 78 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 79 | 80 | expcontroller.ExperimentStart() 81 | 82 | -------------------------------------------------------------------------------- /ACNet/SGCLarge.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 3, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACLarge', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Large.json', 14 | 'RootPath': './TestExp/Large/SGC/', 15 | 'CUDA_VISIBLE_DEVICES': '1', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'PyGSGC', 20 | 'Model': 'PyGSGC', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': True, 26 | 27 | 'ValidRate': 4000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'Random', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 30, 34 | 'DecreasingLimit': 12, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | 43 | # Params for PolynomialDecayLR only 44 | 'WarmupEpoch': 2, 45 | 'LRMaxEpoch':300, 46 | 'EndLR':1e-9, 47 | 'Power':1.0, 48 | # Params for StepLR only 49 | 'LRStep': 30, 50 | 'LRGamma': 0.1, 51 | ########## 52 | 53 | 'WeightIniter': 'XavierNorm', 54 | 55 | # Params for NormWeightIniter only 56 | 'InitMean' : 0, 57 | 'InitStd' : 1, 58 | 59 | 'AtomFeatureSize': 39, 60 | 'BondFeatureSize': 10, 61 | 62 | 'GCNReadout': 'Add', 63 | 64 | 65 | 'SplitValidSeed': 8, 66 | 'SplitTestSeed': 8, 67 | 'BatchSize': 200, 68 | 69 | } 70 | AdjustableParamList = {} 71 | SpecificParamList = { 72 | 'DropRate':[0.4], 73 | 'WeightDecay':[5], 74 | 'lr':[4], 75 | 'SGCInputSize': [128], 76 | 'SGCHiddenSize': [256], 77 | 'SGCK': [2], 78 | 'SGCLayers': [4], 79 | 'FPSize':[128], 80 | 'DNNLayers':[[64]], 81 | } 82 | 83 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 84 | 85 | expcontroller.ExperimentStart() 86 | -------------------------------------------------------------------------------- /ACNet/SGCMedium.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 64, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACMedium', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Medium.json', 14 | 'RootPath': './TestExp/Medium/SGC/', 15 | 'CUDA_VISIBLE_DEVICES': '1', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'PyGSGC', 20 | 'Model': 'PyGSGC', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': True, 26 | 27 | 'ValidRate': 4000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'Random', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 30, 34 | 'DecreasingLimit': 12, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | 43 | # Params for PolynomialDecayLR only 44 | 'WarmupEpoch': 2, 45 | 'LRMaxEpoch':300, 46 | 'EndLR':1e-9, 47 | 'Power':1.0, 48 | # Params for StepLR only 49 | 'LRStep': 30, 50 | 'LRGamma': 0.1, 51 | ########## 52 | 53 | 'WeightIniter': 'XavierNorm', 54 | 55 | # Params for NormWeightIniter only 56 | 'InitMean' : 0, 57 | 'InitStd' : 1, 58 | 59 | 'AtomFeatureSize': 39, 60 | 'BondFeatureSize': 10, 61 | 62 | 'GCNReadout': 'Add', 63 | 64 | 65 | 'SplitValidSeed': 8, 66 | 'SplitTestSeed': 8, 67 | 'BatchSize': 200, 68 | 69 | } 70 | AdjustableParamList = {} 71 | SpecificParamList = { 72 | 'DropRate':[0.2], 73 | 'WeightDecay':[4.5], 74 | 'lr':[3], 75 | 'SGCInputSize': [64], 76 | 'SGCHiddenSize': [64], 77 | 'SGCK': [2], 78 | 'SGCLayers': [3], 79 | 'FPSize':[32], 80 | 'DNNLayers':[[]], 81 | } 82 | 83 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 84 | 85 | expcontroller.ExperimentStart() 86 | -------------------------------------------------------------------------------- /ACNet/SGCSmall.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 110, 7 | 'OnlyEval': False, 8 | } 9 | 10 | BasicParamList = { 11 | 'ExpName': 'ACSmall', 12 | 'MainMetric': 'AUC', 13 | 'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Small.json', 14 | 'RootPath': './TestExp/Small/SGC/', 15 | 'CUDA_VISIBLE_DEVICES': '1', 16 | 'TaskNum': 1, 17 | 'ClassNum': 2, 18 | 'OutputSize': 2, 19 | 'Feature': 'PyGSGC', 20 | 'Model': 'PyGSGC', 21 | 22 | 'OnlySpecific': True, 23 | 'Weight': True, 24 | 'AC': True, 25 | 'PyG': True, 26 | 27 | 'ValidRate': 4000, 28 | 'PrintRate': 5, 29 | 'UpdateRate': 1, 30 | 'SplitRate': [0.8, 0.1], 31 | 'Splitter': 'Random', 32 | 'MaxEpoch': 300, 33 | 'LowerThanMaxLimit': 30, 34 | 'DecreasingLimit': 12, 35 | 36 | # if OnlyEval == True: 37 | 'EvalModelPath': None, 38 | 'EvalDatasetPath': None, 39 | 'EvalLogAllPreds': None, 40 | 41 | 'Scheduler': 'PolynomialDecayLR', 42 | 43 | # Params for PolynomialDecayLR only 44 | 'WarmupEpoch': 2, 45 | 'LRMaxEpoch':300, 46 | 'EndLR':1e-9, 47 | 'Power':1.0, 48 | # Params for StepLR only 49 | 'LRStep': 30, 50 | 'LRGamma': 0.1, 51 | ########## 52 | 53 | 'WeightIniter': 'XavierNorm', 54 | 55 | # Params for NormWeightIniter only 56 | 'InitMean' : 0, 57 | 'InitStd' : 1, 58 | 59 | 'AtomFeatureSize': 39, 60 | 'BondFeatureSize': 10, 61 | 62 | 'GCNReadout': 'Add', 63 | 64 | 65 | 'SplitValidSeed': 8, 66 | 'SplitTestSeed': 8, 67 | 'BatchSize': 32, 68 | 69 | } 70 | AdjustableParamList = {} 71 | SpecificParamList = { 72 | 'DropRate':[0.2], 73 | 'WeightDecay':[4.5], 74 | 'lr':[3], 75 | 'SGCInputSize': [64], 76 | 'SGCHiddenSize': [128], 77 | 'SGCK': [1], 78 | 'SGCLayers': [3], 79 | 'FPSize':[64], 80 | 'DNNLayers':[[]], 81 | } 82 | 83 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 84 | 85 | expcontroller.ExperimentStart() 86 | -------------------------------------------------------------------------------- /ACNet/SMILESTransformerFew.py: -------------------------------------------------------------------------------- 1 | from ACComponents.ACProcessControllers import * 2 | 3 | ExpOptions = { 4 | 'Search': 'greedy', 5 | 'SeedPerOpt': 3, 6 | 'SubsetsNum': 13, 7 | 'OnlyEval': False, 8 | 'Finetune':True, 9 | } 10 | 11 | BasicParamList = { 12 | 'ExpName': 'ACFew', 13 | 'MainMetric': 'AUC', 14 | 'DataPath': './ACComponents/ACDataset/data_files/MMP_AC_Few_representation/SMILESTransformer.npz', 15 | 'RootPath': './TestExp/Few/SMILESTransformer/', 16 | 'CUDA_VISIBLE_DEVICES': '3', 17 | 'TaskNum': 1, 18 | 'ClassNum': 2, 19 | 'OutputSize': 2, 20 | 'Feature': 'Raw', 21 | 'Model': 'MLP', 22 | 23 | # if Feature == Raw 24 | 'RawFeatureSize': 1024, 25 | 26 | 'OnlySpecific': True, 27 | 'Weight': True, 28 | 'AC': True, 29 | 'PyG': False, 30 | 31 | 'ValidRate': 40000, 32 | 'PrintRate': 5, 33 | 'UpdateRate': 1, 34 | 'SplitRate': [0.8, 0.1], 35 | 'Splitter': 'Random', 36 | 'MaxEpoch': 300, 37 | 'LowerThanMaxLimit': 12, 38 | 'DecreasingLimit': 8, 39 | 40 | # if OnlyEval == True: 41 | 'EvalModelPath': None, 42 | 'EvalDatasetPath': None, 43 | 'EvalLogAllPreds': None, 44 | 45 | 'Scheduler': 'PolynomialDecayLR', 46 | 47 | # Params for PolynomialDecayLR only 48 | 'WarmupEpoch': 2, 49 | 'LRMaxEpoch':300, 50 | 'EndLR':1e-9, 51 | 'Power':1.0, 52 | # Params for StepLR only 53 | 'LRStep': 30, 54 | 'LRGamma': 0.1, 55 | ########## 56 | 57 | 'WeightIniter': None, 58 | 59 | # Params for NormWeightIniter only 60 | 'InitMean' : 0, 61 | 'InitStd' : 1, 62 | 63 | # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here. 64 | 'SplitValidSeed': 8, 65 | 'SplitTestSeed': 8, 66 | 'BatchSize': 8, 67 | } 68 | AdjustableParamList = {} 69 | SpecificParamList = { 70 | 'DropRate':[0.2], 71 | 'WeightDecay':[4.5], 72 | 'lr':[3], 73 | 'DNNLayers':[[128]], 74 | } 75 | 76 | 77 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList]) 78 | 79 | expcontroller.ExperimentStart() 80 | 81 | -------------------------------------------------------------------------------- /ACNet/TrainingFramework/Dataset.py: -------------------------------------------------------------------------------- 1 | from TrainingFramework.FileUtils import * 2 | from TrainingFramework.Splitter import * 3 | from TrainingFramework.Featurizer import * 4 | from torch.utils import data 5 | from torch_geometric.data import InMemoryDataset 6 | import os 7 | 8 | class PyGMolDataset(InMemoryDataset): 9 | def __init__(self, graphdataset, opt, mode): 10 | self.graph_dataset = graphdataset 11 | self.opt = opt 12 | # todo(zqzhang): updated in ACv7 13 | self.dataset_path_root = self.opt.args['ExpDir'] + 'Dataset/' 14 | if not os.path.exists(self.dataset_path_root): 15 | os.mkdir(self.dataset_path_root) 16 | self.mode = mode 17 | if os.path.exists(self.dataset_path_root + 'processed/' + self.processed_file_names[0]): 18 | os.remove(self.dataset_path_root + 'processed/' + self.processed_file_names[0]) 19 | super(PyGMolDataset, self).__init__(root = self.dataset_path_root) 20 | self.data, self.slices = t.load(self.processed_paths[0]) 21 | 22 | @property 23 | def raw_file_names(self): 24 | return [self.opt.args['DataPath']] 25 | 26 | @property 27 | def processed_file_names(self): 28 | return [self.opt.args['ExpName'] + '_' + self.mode + '.pt'] 29 | 30 | def download(self): 31 | pass 32 | 33 | def process(self): 34 | data_list = self.graph_dataset 35 | data, slices = self.collate(data_list) 36 | # print("Processed without saving complete.") 37 | print("Saving processed files...") 38 | t.save((data, slices), self.processed_paths[0]) 39 | print('Saving complete!') 40 | 41 | # def __len__(self): 42 | # return len(self.graph_dataset) 43 | -------------------------------------------------------------------------------- /ACNet/TrainingFramework/FileUtils.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | 4 | class FileLoader(object): 5 | def __init__(self, file_path): 6 | super(FileLoader, self).__init__() 7 | self.path = file_path 8 | 9 | def load(self): 10 | with open(self.path, 'r') as f: 11 | raw_data = f.readlines() 12 | return raw_data 13 | 14 | class JsonFileLoader(object): 15 | def __init__(self, file_path): 16 | super(JsonFileLoader, self).__init__() 17 | self.path = file_path 18 | 19 | def load(self): 20 | with open(self.path, 'r') as f: 21 | raw_dataset = json.load(f) 22 | return raw_dataset 23 | 24 | # todo(zqzhang): updated in TPv7 25 | class PTFileLoader(object): 26 | def __init__(self, file_path): 27 | super(PTFileLoader, self).__init__() 28 | self.path = file_path 29 | 30 | def load(self): 31 | import torch as t 32 | import numpy as np 33 | Content = t.load(self.path, map_location = 'cpu') 34 | if Content.__class__ == t.Tensor: 35 | Content = Content.cpu() 36 | Content = np.array(Content) 37 | return Content 38 | 39 | class NpyFileLoader(object): 40 | def __init__(self, file_path): 41 | super(NpyFileLoader, self).__init__() 42 | self.path = file_path 43 | 44 | def load(self): 45 | import numpy as np 46 | Content = np.load(self.path) 47 | return Content 48 | 49 | class NpzFileLoader(object): 50 | def __init__(self, file_path): 51 | super(NpzFileLoader, self).__init__() 52 | self.path = file_path 53 | 54 | def load(self): 55 | import numpy as np 56 | Content = np.load(self.path) 57 | return Content 58 | 59 | ############################################## 60 | # Parse files for different dataset files 61 | ############################################## 62 | class BasicFileParser(object): 63 | def __init__(self): 64 | super(BasicFileParser, self).__init__() 65 | 66 | def _parse_line(self, line): 67 | raise NotImplementedError( 68 | "Line parser not implemented." 69 | ) 70 | 71 | def parse_file(self, raw_data): 72 | Dataset = [] 73 | for line in raw_data: 74 | data = self._parse_line(line) 75 | Dataset.append(data) 76 | return Dataset 77 | 78 | class HIVFileParser(BasicFileParser): 79 | def __init__(self): 80 | super(HIVFileParser, self).__init__() 81 | 82 | def _parse_line(self, line): 83 | data = re.split(',', line) 84 | SMILES = data[0] 85 | Value = data[1] 86 | Value = re.split('\n', Value)[0] 87 | return {'SMILES': SMILES, 'Value': Value} 88 | 89 | 90 | class BBBPFileParser(BasicFileParser): 91 | def __init__(self): 92 | super(BBBPFileParser, self).__init__() 93 | 94 | def _parse_line(self, line): 95 | data = re.split(',', line) 96 | SMILES = data[0] 97 | Value = data[1] 98 | Value = re.split('\n', Value)[0] 99 | return {'SMILES': SMILES, 'Value': Value} 100 | 101 | 102 | class BACEFileParser(BasicFileParser): 103 | def __init__(self): 104 | super(BACEFileParser, self).__init__() 105 | 106 | def _parse_line(self, line): 107 | data = re.split(',', line) 108 | SMILES = data[0] 109 | Value = data[1] 110 | Value = re.split('\n', Value)[0] 111 | return {'SMILES': SMILES, 'Value': Value} 112 | 113 | 114 | class QM9FileParser(BasicFileParser): 115 | def __init__(self): 116 | super(QM9FileParser, self).__init__() 117 | 118 | def _parse_line(self, line): 119 | data = re.split(',', line) 120 | SMILES = data[0] 121 | Value = data[1:] 122 | Value[-1] = re.split('\n', Value[-1])[0] 123 | return {'SMILES': SMILES, 'Value': Value} 124 | 125 | 126 | class FreeSolvFileParser(BasicFileParser): 127 | def __init__(self): 128 | super(FreeSolvFileParser, self).__init__() 129 | 130 | def _parse_line(self, line): 131 | data = re.split(',', line) 132 | SMILES = data[0] 133 | Value = data[1] 134 | Value = re.split('\n', Value)[0] 135 | return {'SMILES': SMILES, 'Value': Value} 136 | 137 | 138 | class LipopFileParser(BasicFileParser): 139 | def __init__(self): 140 | super(LipopFileParser, self).__init__() 141 | 142 | def _parse_line(self, line): 143 | data = re.split(',', line) 144 | SMILES = data[0] 145 | Value = data[1] 146 | Value = re.split('\n', Value)[0] 147 | return {'SMILES': SMILES, 'Value': Value} 148 | 149 | 150 | class MalariaFileParser(BasicFileParser): 151 | def __init__(self): 152 | super(MalariaFileParser, self).__init__() 153 | 154 | def _parse_line(self, line): 155 | data = re.split(',', line) 156 | SMILES = data[0] 157 | Value = data[1] 158 | Value = re.split('\n', Value)[0] 159 | return {'SMILES': SMILES, 'Value': Value} 160 | 161 | 162 | class CEPFileParser(BasicFileParser): 163 | def __init__(self): 164 | super(CEPFileParser, self).__init__() 165 | 166 | def _parse_line(self, line): 167 | data = re.split(',', line) 168 | SMILES = data[0] 169 | Value = data[1] 170 | Value = re.split('\n', Value)[0] 171 | return {'SMILES': SMILES, 'Value': Value} 172 | 173 | 174 | class SHP2FileParser(BasicFileParser): 175 | def __init__(self): 176 | super(SHP2FileParser, self).__init__() 177 | 178 | def _parse_line(self, line): 179 | data = re.split(',', line) 180 | SMILES = data[0] 181 | Value = data[1] 182 | Value = re.split('\n', Value)[0] 183 | return {'SMILES': SMILES, 'Value': Value} 184 | 185 | 186 | class Tox21FileParser(BasicFileParser): 187 | def __init__(self): 188 | super(Tox21FileParser, self).__init__() 189 | 190 | def _parse_line(self, line): 191 | data = re.split(',', line) 192 | SMILES = data[0] 193 | Value = data[1:] 194 | Value[-1] = re.split('\n', Value[-1])[0] 195 | for i in range(len(Value)): 196 | value = Value[i] 197 | if value == '': 198 | Value[i] = '-1' 199 | return {'SMILES': SMILES, 'Value': Value} 200 | 201 | 202 | class ToxcastFileParser(BasicFileParser): 203 | def __init__(self): 204 | super(ToxcastFileParser, self).__init__() 205 | 206 | def _parse_line(self, line): 207 | # Convert '1.0/0.0' to '1/0' 208 | # Convert missing value '' to '-1' 209 | data = re.split(',', line) 210 | SMILES = data[0] 211 | Value = data[1:] 212 | Value[-1] = re.split('\n', Value[-1])[0] 213 | for i in range(len(Value)): 214 | value = Value[i] 215 | if value == '': 216 | Value[i] = '-1' 217 | elif value == '0.0': 218 | Value[i] = '0' 219 | elif value == '1.0': 220 | Value[i] = '1' 221 | return {'SMILES': SMILES, 'Value': Value} 222 | 223 | 224 | class MUVFileParser(BasicFileParser): 225 | def __init__(self): 226 | super(MUVFileParser, self).__init__() 227 | 228 | def _parse_line(self, line): 229 | data = re.split(',', line) 230 | SMILES = data[0] 231 | Value = data[1:] 232 | Value[-1] = re.split('\n', Value[-1])[0] 233 | for i in range(len(Value)): 234 | value = Value[i] 235 | if value == '': 236 | Value[i] = '-1' 237 | return {"SMILES": SMILES, 'Value': Value} 238 | 239 | 240 | class ClinToxFileParser(BasicFileParser): 241 | def __init__(self): 242 | super(ClinToxFileParser, self).__init__() 243 | 244 | def _parse_line(self, line): 245 | data = re.split(',', line) 246 | SMILES = data[0] 247 | Value = data[1:] 248 | Value[-1] = re.split('\n', Value[-1])[0] 249 | return {'SMILES': SMILES, 'Value': Value} 250 | 251 | 252 | class SIDERFileParser(BasicFileParser): 253 | def __init__(self): 254 | super(SIDERFileParser, self).__init__() 255 | 256 | def _parse_line(self, line): 257 | data = re.split(',', line) 258 | SMILES = data[0] 259 | Value = data[1:] 260 | Value[-1] = re.split('\n', Value[-1])[0] 261 | return {'SMILES': SMILES, 'Value': Value} 262 | 263 | 264 | class ESOLFileParser(BasicFileParser): 265 | def __init__(self): 266 | super(ESOLFileParser, self).__init__() 267 | 268 | def _parse_line(self, line): 269 | data = re.split(',', line) 270 | SMILES = data[0] 271 | Value = data[1] 272 | Value = re.split('\n', Value)[0] 273 | return {'SMILES': SMILES, 'Value': Value} 274 | ################################################ -------------------------------------------------------------------------------- /ACNet/TrainingFramework/Initializer.py: -------------------------------------------------------------------------------- 1 | import torch as t 2 | import torch.nn as nn 3 | 4 | class Initializer(object): 5 | def __init__(self): 6 | super(Initializer, self).__init__() 7 | 8 | def WeightInit(self, tensor): 9 | self._init_func(tensor) 10 | 11 | def _init_func(self, tensor): 12 | raise NotImplementedError("Weight Initialization Function is not implemented.") 13 | 14 | 15 | class NormalInitializer(Initializer): 16 | def __init__(self, opt): 17 | self.opt = opt 18 | super(NormalInitializer, self).__init__() 19 | 20 | def _init_func(self, tensor): 21 | mean = self.opt.args['InitMean'] 22 | std = self.opt.args['InitStd'] 23 | nn.init.normal_(tensor, mean, std) 24 | 25 | 26 | class XavierNormalInitializer(Initializer): 27 | def __init__(self): 28 | super(XavierNormalInitializer, self).__init__() 29 | 30 | def _init_func(self, tensor): 31 | if tensor.dim() == 1: 32 | nn.init.constant_(tensor, 0) 33 | else: 34 | nn.init.xavier_normal_(tensor) -------------------------------------------------------------------------------- /ACNet/TrainingFramework/Metrics.py: -------------------------------------------------------------------------------- 1 | import torch as t 2 | from sklearn.metrics import roc_auc_score 3 | import torch.nn.functional as F 4 | 5 | class ACC(object): 6 | def __init__(self): 7 | super(ACC, self).__init__() 8 | self.name = 'ACC' 9 | 10 | def compute(self, answer, label): 11 | assert len(answer) == len(label) 12 | total = len(answer) 13 | answer = t.Tensor(answer) 14 | label = t.Tensor(label) 15 | pred = t.argmax(answer, dim=1) 16 | correct = sum(pred == label).float() 17 | acc = correct / total 18 | return acc.item() 19 | 20 | 21 | class AUC(object): 22 | def __init__(self): 23 | super(AUC, self).__init__() 24 | self.name = 'AUC' 25 | 26 | def compute(self, answer, label): 27 | assert len(answer) == len(label) 28 | answer = t.Tensor(answer) 29 | answer = answer[:,1] 30 | answer = answer.tolist() 31 | result = roc_auc_score(y_true = label, y_score= answer) 32 | return result 33 | 34 | 35 | class MAE(object): 36 | def __init__(self): 37 | super(MAE, self).__init__() 38 | self.name = 'MAE' 39 | 40 | def compute(self, answer, label): 41 | assert len(answer) == len(label) 42 | answer = t.Tensor(answer).squeeze(-1) 43 | label = t.Tensor(label) 44 | MAE = F.l1_loss(answer, label, reduction = 'mean') 45 | return MAE.item() 46 | 47 | class RMSE(object): 48 | def __init__(self): 49 | super(RMSE, self).__init__() 50 | self.name = 'RMSE' 51 | 52 | def compute(self, answer, label): 53 | assert len(answer) == len(label) 54 | answer = t.Tensor(answer).squeeze(-1) 55 | label = t.Tensor(label) 56 | RMSE = F.mse_loss(answer, label, reduction = 'mean').sqrt() 57 | return RMSE.item() 58 | -------------------------------------------------------------------------------- /ACNet/TrainingFramework/Scheduler.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.optim.lr_scheduler import _LRScheduler 3 | 4 | 5 | class PolynomialDecayLR(_LRScheduler): 6 | 7 | def __init__(self, optimizer, warmup_updates, tot_updates, lr, end_lr, power, last_epoch=-1, verbose=False): 8 | self.warmup_updates = warmup_updates 9 | self.tot_updates = tot_updates 10 | self.lr = lr 11 | self.end_lr = end_lr 12 | self.power = power 13 | super(PolynomialDecayLR, self).__init__(optimizer, last_epoch, verbose) 14 | 15 | def get_lr(self): 16 | if self._step_count <= self.warmup_updates: 17 | self.warmup_factor = self._step_count / float(self.warmup_updates) 18 | lr = self.warmup_factor * self.lr 19 | elif self._step_count >= self.tot_updates: 20 | lr = self.end_lr 21 | else: 22 | warmup = self.warmup_updates 23 | lr_range = self.lr - self.end_lr 24 | pct_remaining = 1 - (self._step_count - warmup) / ( 25 | self.tot_updates - warmup 26 | ) 27 | lr = lr_range * pct_remaining ** (self.power) + self.end_lr 28 | 29 | # todo(zqzhang): updated in TPv7 30 | print(f"lr: {lr}") 31 | return [lr for group in self.optimizer.param_groups] 32 | 33 | def _get_closed_form_lr(self): 34 | assert False 35 | 36 | 37 | class EmptyLRSchedular(_LRScheduler): 38 | 39 | def __init__(self, optimizer, lr, last_epoch=-1, verbose=False): 40 | self.lr = lr 41 | super(EmptyLRSchedular, self).__init__(optimizer, last_epoch, verbose) 42 | 43 | 44 | def get_lr(self): 45 | lr = self.lr 46 | return [lr for group in self.optimizer.param_groups] -------------------------------------------------------------------------------- /ACNet/TrainingFramework/Splitter.py: -------------------------------------------------------------------------------- 1 | import random 2 | import torch 3 | from TrainingFramework.ChemUtils import * 4 | #from ProcessControllers import * 5 | 6 | class BasicSplitter(object): 7 | # A splitter module is used to split a dataset 8 | # with a entire dataset given, the splitter will return the index of the samples of different subsets, 9 | # or return the subsets directly. 10 | # return: (sets), (sets_index) 11 | def __init__(self): 12 | super(BasicSplitter, self).__init__() 13 | 14 | def split(self, dataset, opt): 15 | raise NotImplementedError( 16 | 'Dataset splitter not implemented.' 17 | ) 18 | 19 | class RandomSplitter(BasicSplitter): 20 | # Module for randomly splitting dataset 21 | def __init__(self): 22 | super(RandomSplitter, self).__init__() 23 | 24 | def CheckClass(self, dataset, tasknum): 25 | # To check whether both classes of samples appear in the dataset. 26 | c0cnt = np.zeros(tasknum) 27 | c1cnt = np.zeros(tasknum) 28 | for data in dataset: 29 | value = data['Value'] 30 | assert tasknum == len(value) 31 | for task in range(tasknum): 32 | # todo(zqzhang): updated in TPv7 33 | if (value[task] == '0') or (value[task] == 0): 34 | c0cnt[task] += 1 35 | elif (value[task] == '1') or (value[task] == 1): 36 | c1cnt[task] += 1 37 | if 0 in c0cnt: 38 | print("Invalid splitting.") 39 | return False 40 | elif 0 in c1cnt: 41 | print("Invalid splitting.") 42 | return False 43 | else: 44 | return True 45 | 46 | def split(self, dataset, opt): 47 | rate = opt.args['SplitRate'] 48 | validseed = opt.args['SplitValidSeed'] 49 | testseed = opt.args['SplitTestSeed'] 50 | total_num = len(dataset) 51 | np_dataset = np.array(dataset) 52 | index = np.arange(total_num) 53 | 54 | if len(rate) == 1: 55 | train_num = int(total_num * rate[0]) 56 | valid_num = total_num - train_num 57 | endflag = 0 58 | 59 | while not endflag: 60 | random.seed(validseed) 61 | random.shuffle(index) 62 | set1_idx = index[:train_num] 63 | set2_idx = index[train_num:] 64 | 65 | assert len(set1_idx) == train_num 66 | assert len(set2_idx) == valid_num 67 | 68 | set1 = np_dataset[set1_idx] 69 | set2 = np_dataset[set2_idx] 70 | if opt.args['ClassNum'] == 2: 71 | endflag = self.CheckClass(set2, opt.args['TaskNum']) 72 | validseed += 1 73 | else: 74 | endflag = 1 75 | return (set1, set2), (set1_idx, set2_idx) 76 | 77 | if len(rate) == 2: 78 | train_num = int(total_num * rate[0]) 79 | valid_num = int(total_num * rate[1]) 80 | test_num = total_num - train_num - valid_num 81 | endflag = 0 82 | 83 | while not endflag: 84 | random.seed(testseed) 85 | random.shuffle(index) 86 | set3_idx = index[(train_num + valid_num):] 87 | set3 = np_dataset[set3_idx] 88 | 89 | if opt.args['ClassNum'] == 2: 90 | endflag = self.CheckClass(set3, opt.args['TaskNum']) 91 | testseed += 1 92 | else: 93 | endflag = 1 94 | 95 | set_idx_remain = index[:(train_num + valid_num)] 96 | endflag = 0 97 | while not endflag: 98 | random.seed(validseed) 99 | random.shuffle(set_idx_remain) 100 | 101 | set1_idx = set_idx_remain[:train_num] 102 | set2_idx = set_idx_remain[train_num:] 103 | set1 = np_dataset[set1_idx] 104 | set2 = np_dataset[set2_idx] 105 | 106 | if opt.args['ClassNum'] == 2: 107 | endflag = self.CheckClass(set2, opt.args['TaskNum']) 108 | validseed += 1 109 | else: 110 | endflag = 1 111 | 112 | assert len(set1) == train_num 113 | assert len(set2) == valid_num 114 | assert len(set3) == test_num 115 | 116 | return (set1, set2, set3), (set1_idx, set2_idx, set3_idx) 117 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2022 Ziqiao Zhang, Yatao Bian 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ACNet 2 | 3 | The code repository of paper *ACNet: A Benchmark for Activity Cliff Prediction* 4 | 5 | 6 | ## homepage 7 | Introduction of this project: https://drugai.github.io/ACNet/ 8 | 9 | 10 | ## requirements 11 | - pytorch >= 1.11 12 | - numpy >= 1.21.2 13 | - pandas >= 1.2.3 14 | - rdkit >= 2020.09.5 15 | - ogb >= 1.3.3 16 | - pyg >= 2.0.4 17 | - scikit-learn >= 1.0.2 18 | 19 | ## Usage 20 | ### Clone the repository 21 | Run the following command to clone the repository to your device. 22 | 23 | ``` 24 | git clone https://github.com/DrugAI/ACNet.git 25 | cd ACNet/ACNet 26 | ``` 27 | 28 | **Note**: The current path `./` indicates `ACNet/` in the following part. 29 | 30 | ### Create conda environment 31 | Run the following command to create the environment. 32 | 33 | `conda create -f ./ACNetEnviron.yml` 34 | 35 | ### Download data files 36 | Download data files from [here](https://drive.google.com/drive/folders/1JogBAg9AI0pUxY44w9_g8RHboLf7V5q7?usp=sharing) 37 | 38 | Run the following command to put the data files into the directories 39 | 40 | ``` 41 | mkdir ./ACComponents/ACDataset/data_files 42 | mkdir ./ACComponents/ACDataset/data_files/raw_data 43 | mkdir ./ACComponents/ACDataset/data_files/generated_datasets 44 | mv all_smiles_target.csv ./ACComponents/ACDataset/data_files/raw_data/ 45 | mv mmp_ac_s_distinct.csv ./ACComponents/ACDataset/data_files/raw_data/ 46 | mv mmp_ac_s_neg_distinct.csv ./ACComponents/ACDataset/data_files/raw_data/ 47 | ``` 48 | 49 | ### Generate ACNet datasets 50 | 51 | Run the following command to generate ACNet datasets with **Default Configuration** 52 | 53 | ``` 54 | python ACNeet/ACComponents/ACDataset/GenerateACDatasets.py 55 | ``` 56 | 57 | The genearated dataset files are in `./ACComponents/ACDataset/data_files/generated_datasets/` 58 | 59 | The configuration can be customized in `./ACComponents/ACDataset/DataUtils.py` 60 | 61 | 62 | ``` 63 | class Config(object): 64 | def __init__(self): 65 | super(Config, self).__init__() 66 | self.mixed = True # Whether to generate the Mix dataset, default True 67 | self.random_sample_negative = False # Whether to use randomly sample the negative samples toward certain target, default False. 68 | self.random_sample_negative_seed = 8 # Random seed for sample negative samples if self.random_saple_negative == True. 69 | self.discard_extreme_imbalance = False # Whether to discard the subsets that are extremely imbalanced, default False. 70 | self.pn_rate_threshold = 0.2 # The threshold of Pos/Neg for extremely imbalanced subsets if self.discard_extreme_imbalance == True. 71 | self.discard_few_pos = True # Whether to discard the subsets that have only few positive samples. Default True. 72 | self.few_pos_threshold = 10 # The threshold to discard tasks with few positive samples, deault 10. 73 | self.large_thres = 20000 # Thresholds for grouping tasks into subsets. 74 | self.medium_thres = 1000 75 | self.small_thres = 100 76 | ``` 77 | 78 | ### Reproducing 79 | 80 | Our experimental results are obtained on RTX 3090 GPU, E5-2667 CPU, 256GB memory, and Ubuntu 18.04.5. 81 | 82 | To reproduce the results, execute the following steps. 83 | 84 | 85 | 86 | 1. Create the directories for model checkpoints. 87 | 88 | ``` 89 | mkdir ./TestExp 90 | mkdir ./TestExp/Large 91 | mkdir ./TestExp/Medium 92 | mkdir ./TestExp/Small 93 | mkdir ./TestExp/Few 94 | mkdir ./TestExp/Mix 95 | ``` 96 | 97 | 2. Run the scripts in `./` 98 | 99 | For instance, to run the ECFP+MLP on Large subset: 100 | 101 | ``` 102 | mkdir ./TestExp/Large/FPMLP 103 | python ./FPMLPLarge.py 104 | ``` 105 | 106 | To run the GRU on Small subset: 107 | 108 | ``` 109 | mkdir ./TestExp/Small/GRU 110 | python ./GRUSmall.py 111 | ``` 112 | 113 | 3. Run the following command before experiments of Graphormer model 114 | 115 | ``` 116 | cd ./Models/Graphormer 117 | python setup.py build_ext --inplace 118 | ``` 119 | 120 | 4. Molecular representations extracted by PTMs 121 | 122 | Representations extracted by 7 PTMs for the Few subset can be downloaded [here](https://drive.google.com/drive/folders/1JogBAg9AI0pUxY44w9_g8RHboLf7V5q7?usp=sharing) 123 | 124 | Run the following command to put them into the directory. 125 | 126 | ``` 127 | mv MMP_AC_Few_representation ./ACComponent/ACDatasets/data_files/ 128 | ``` 129 | 130 | 131 | **Note**: 132 | The GNNs (GCN, GIN, SGC) in the baseline experiments are implemented by PyG package, which uses `torch.scatter_` function. 133 | Remember that the `torch.scatter_` function is non-deterministic (See [here](https://pytorch.org/docs/stable/generated/torch.Tensor.scatter_.html#torch.Tensor.scatter_) ), so the results of the GNNs may be slightly different with our reported results in the manuscript. 134 | 135 | 136 | **Note**: 137 | The baseline experiments of ACNet are conducted by a *self-made* training framework. 138 | It is not as well-constructed as other training frameworks, e.g. *torchdrug*. 139 | It is just served as an example to show how our benchmark works and to show the reproducibility of our results reported in the manuscript. 140 | We can only guarantee that the experimental scripts can work to reproduce the results, but the stability of the training framework is not guaranteed when using other functions. 141 | And the illustration of this training framework is not our point. 142 | 143 | 144 | 145 | 146 | ## Illustration 147 | ### Data files 148 | 149 | - `all_smiles_target.csv` 150 | Contains 142,307 activities screened from ChEMBL. 151 | 152 | - `mmp_ac_s_distinct.csv` 153 | Contains 21,352 MMP-Cliffs. 154 | 155 | - `mmp_ac_s_neg_distinct.csv` 156 | Contains 423,282 non-AC MMPs. 157 | 158 | - `target_dictionary.xlsx` 159 | A dictionary that match target ids to the target names. Contains 1006 targets. 160 | 161 | - `MMP_AC.json` 162 | All of the MMP-Cliffs and non-AC MMPs. Contains samples against 190 targets. 163 | 164 | - `MMP_AC_Discarded.json` 165 | Discarded samples when organizing 21,352 positive samples and 423,282 negative samples. 166 | 167 | - `MMP_AC_Large.json`, `MMP_AC_Medium.json`, `MMP_AC_Small.json`, `MMP_AC_Few.json`, `MMP_AC_Mixed_Screened.json` 168 | Five subsets of the ACNet benchmark generated based on the configuration file. 169 | 170 | 171 | ### Data structure 172 | Each json file corresponds to a subset of ACNet, structured as a dictionary. 173 | Keys are target ids, and values are datasets of the targets. 174 | The datasets of targets are lists of samples. 175 | And each sample is a dictionary with keys `SMILES1, SMILES2, Value`. 176 | 177 | Using Large subset as an example: 178 | 179 | ``` 180 | >>> dataset.keys() 181 | dict_keys(['72','130','10102'] 182 | >>> taskset = dataset['72'] 183 | >>> len(taskset) 184 | 26376 185 | >>> data = taskset[0] 186 | >>> data 187 | {'SMILES1': 'OC1(c2ccc(Cl)cc2)CCN(Cc2c[nH]c3ccccc23)CC1', 'SMILES2': 'OCC1(c2ccc(Cl)cc2)CCN(Cc2c[nH]c3ccccc23)CC1', 'Value': '1'} 188 | ``` 189 | 190 | --------------------------------------------------------------------------------