├── ACNet
    ├── ACComponents
    │   ├── ACChemUtils.py
    │   ├── ACDataset
    │   │   ├── DataPreprocess.py
    │   │   ├── DataUtils.py
    │   │   ├── Dataset.py
    │   │   └── GenerateACDatasets.py
    │   ├── ACModels.py
    │   ├── ACProcessControllers.py
    │   └── ACSplitter.py
    ├── ACNetEnviron.yml
    ├── CMPNNLarge.py
    ├── CMPNNMedium.py
    ├── CMPNNSmall.py
    ├── ChemBERTFew.py
    ├── FPMLPFew.py
    ├── FPMLPLarge.py
    ├── FPMLPMedium.py
    ├── FPMLPMixRandom.py
    ├── FPMLPMixTarget.py
    ├── FPMLPSmall.py
    ├── GCNLarge.py
    ├── GCNMedium.py
    ├── GCNMixRandom.py
    ├── GCNMixTarget.py
    ├── GCNSmall.py
    ├── GINLarge.py
    ├── GINMedium.py
    ├── GINSmall.py
    ├── GROVERFew.py
    ├── GRULarge.py
    ├── GRUMedium.py
    ├── GRUMixRandom.py
    ├── GRUMixTarget.py
    ├── GRUSmall.py
    ├── GraphLoGFew.py
    ├── GraphormerLarge.py
    ├── GraphormerMedium.py
    ├── GraphormerMixRandom.py
    ├── GraphormerMixTarget.py
    ├── GraphormerSmall.py
    ├── LSTMLarge.py
    ├── LSTMMedium.py
    ├── LSTMSmall.py
    ├── MATFew.py
    ├── Models
    │   ├── BasicGNNs.py
    │   ├── CMPNN
    │   │   ├── CMPNNFeaturizer.py
    │   │   ├── CMPNNModel.py
    │   │   └── nn_utils.py
    │   ├── ClassifierModel.py
    │   └── Graphormer
    │   │   ├── Graphormer.py
    │   │   ├── algos.c
    │   │   ├── algos.cpython-37m-x86_64-linux-gnu.so
    │   │   ├── algos.cpython-38-x86_64-linux-gnu.so
    │   │   ├── algos.pyx
    │   │   ├── build
    │   │       ├── temp.linux-x86_64-3.7
    │   │       │   └── algos.o
    │   │       └── temp.linux-x86_64-3.8
    │   │       │   └── algos.o
    │   │   ├── collator.py
    │   │   ├── data.py
    │   │   ├── setup.py
    │   │   └── wrapper.py
    ├── Pretrain8Few.py
    ├── PretrainGNNsFew.py
    ├── SGCLarge.py
    ├── SGCMedium.py
    ├── SGCSmall.py
    ├── SMILESTransformerFew.py
    └── TrainingFramework
    │   ├── ChemUtils.py
    │   ├── Dataset.py
    │   ├── Evaluator.py
    │   ├── Featurizer.py
    │   ├── FileUtils.py
    │   ├── Initializer.py
    │   ├── Metrics.py
    │   ├── ProcessControllers.py
    │   ├── Scheduler.py
    │   ├── Splitter.py
    │   └── Utils.py
├── LICENSE
└── README.md


/ACNet/ACComponents/ACChemUtils.py:
--------------------------------------------------------------------------------
  1 | import rdkit
  2 | import rdkit.Chem as Chem
  3 | from rdkit.Chem import AllChem
  4 | from rdkit import DataStructs
  5 | from rdkit.Chem import MACCSkeys
  6 | from rdkit.Chem import AllChem
  7 | from rdkit.Chem import Draw
  8 | import numpy as np
  9 | from TrainingFramework.ChemUtils import BasicChecker, GetMol
 10 | 
 11 | class ACMolChecker(BasicChecker):
 12 |     def __init__(self, pair_wise=None):
 13 |         super(ACMolChecker, self).__init__()
 14 |         self.pair_wise = pair_wise
 15 | 
 16 |     def check(self, dataset):
 17 |         origin_dataset = dataset
 18 |         checked_dataset = []
 19 |         discarded_dataset = []
 20 |         for item in origin_dataset:
 21 |             if not self.pair_wise:
 22 |                 smiles = item['SMILES']
 23 |                 mol = GetMol(smiles)
 24 |                 if mol:
 25 |                     checked_dataset.append(item)
 26 |                 else:
 27 |                     discarded_dataset.append(item)
 28 |             else:
 29 |                 smiles1 = item['SMILES1']
 30 |                 smiles2 = item['SMILES2']
 31 |                 mol1 = GetMol(smiles1)
 32 |                 mol2 = GetMol(smiles2)
 33 |                 if mol1 and mol2:
 34 |                     checked_dataset.append(item)
 35 |                 else:
 36 |                     discarded_dataset.append(item)
 37 |         assert len(checked_dataset) + len(discarded_dataset) == len(origin_dataset)
 38 |         print("Total num of origin dataset: ", len(origin_dataset))
 39 |         print(len(checked_dataset), " molecules have passed check.")
 40 |         print(len(discarded_dataset), " molecules have been discarded.")
 41 |         print("Discarded molecules:")
 42 |         print(discarded_dataset)
 43 |         return checked_dataset
 44 | 
 45 | class ACAttentiveFPChecker(BasicChecker):
 46 |     # Rules proposed in the source code of Attentive FP
 47 |     # To screen the samples that not satisfy the rules
 48 |     # more rules can be added.
 49 |     def __init__(self, max_atom_num, max_degree, pair_wise=None):
 50 |         super(ACAttentiveFPChecker, self).__init__()
 51 |         self.max_atom_num = max_atom_num
 52 |         self.max_degree = max_degree
 53 |         self.mol_error_flag = 0
 54 |         self.pair_wise = pair_wise
 55 | 
 56 |     def check(self, dataset):
 57 |         origin_dataset = dataset
 58 |         checked_dataset = []
 59 |         discarded_dataset = []
 60 |         for item in origin_dataset:
 61 |             if self.pair_wise:
 62 |                 smiles1 = item['SMILES1']
 63 |                 smiles2 = item['SMILES2']
 64 |                 mol1 = GetMol(smiles1)
 65 |                 mol2 = GetMol(smiles2)
 66 |                 if mol1 and mol2:
 67 |                     #self.check_single_bonds(mol)
 68 |                     self.check_degree(mol1)
 69 |                     self.check_degree(mol2)
 70 |                     self.check_max_atom_num(mol1)
 71 |                     self.check_max_atom_num(mol2)
 72 |                     if self.mol_error_flag == 0:
 73 |                         checked_dataset.append(item)
 74 |                     else:
 75 |                         discarded_dataset.append(item)
 76 |                         self.mol_error_flag = 0
 77 |                 else:
 78 |                     discarded_dataset.append(item)
 79 |                     self.mol_error_flag = 0
 80 |             else:
 81 |                 smiles = item['SMILES']
 82 |                 mol = GetMol(smiles)
 83 |                 #check
 84 |                 if mol:
 85 |                     #self.check_single_bonds(mol)
 86 |                     self.check_degree(mol)
 87 |                     self.check_max_atom_num(mol)
 88 |                     if self.mol_error_flag == 0:
 89 |                         checked_dataset.append(item)
 90 |                     else:
 91 |                         discarded_dataset.append(item)
 92 |                         self.mol_error_flag = 0
 93 |                 else:
 94 |                     discarded_dataset.append(item)
 95 |                     self.mol_error_flag = 0
 96 | 
 97 |         assert len(checked_dataset) + len(discarded_dataset) == len(origin_dataset)
 98 |         print("Total num of origin dataset: ", len(origin_dataset))
 99 |         print(len(checked_dataset), " molecules has passed check.")
100 |         print(len(discarded_dataset), " molecules has been discarded.")
101 |         print("Discarded molecules:")
102 |         print(discarded_dataset)
103 |         return checked_dataset
104 | 
105 |     def check_degree(self, mol):
106 |         for atom in mol.GetAtoms():
107 |             if atom.GetDegree() > self.max_degree:
108 |                 self.mol_error_flag = 1
109 |                 break
110 | 
111 |     def check_max_atom_num(self, mol):
112 |         if len(mol.GetAtoms()) > self.max_atom_num:
113 |             self.mol_error_flag = 1
114 | 
115 |     def check_single_bonds(self, mol):
116 |         # check whether there is at least one single bond in the molecule
117 |         # this check is not used in FraGAT
118 |         self.mol_error_flag = 1
119 |         for bond in mol.GetBonds():
120 |             if bond.GetBondType() == Chem.rdchem.BondType.SINGLE:
121 |                 if not bond.IsInRing():
122 |                     self.mol_error_flag = 0
123 |                     break


--------------------------------------------------------------------------------
/ACNet/ACComponents/ACDataset/DataPreprocess.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import os
  3 | import numpy as np
  4 | from ACComponents.ACDataset.DataUtils import Config, SaveJson, LoadJson
  5 | import random
  6 | 
  7 | OriginDatasetAddrAll = './data_files/raw_data/all_smiles_target.csv'
  8 | OriginDatasetAddrPos = './data_files/raw_data/mmp_ac_s_distinct.csv'
  9 | OriginDatasetAddrNeg = './data_files/raw_data/mmp_ac_s_neg_distinct.csv'
 10 | 
 11 | GeneratedDatasetAddrAll = './data_files/generated_datasets/MMP_AC.json'
 12 | GeneratedDatasetAddrLarge = './data_files/generated_datasets/MMP_AC_Large.json'
 13 | GeneratedDatasetAddrMedium = './data_files/generated_datasets/MMP_AC_Medium.json'
 14 | GeneratedDatasetAddrSmall = './data_files/generated_datasets/MMP_AC_Small.json'
 15 | GeneratedDatasetAddrFew = './data_files/generated_datasets/MMP_AC_Few.json'
 16 | 
 17 | DiscardedDatasetAddr = './data_files/generated_datasets/MMP_AC_Discarded.json'
 18 | 
 19 | GeneratedDatasetAddrMixed = './data_files/generated_datasets/MMP_AC_Mixed.json'
 20 | GeneratedDatasetAddrMixedScreened = './data_files/generated_datasets/MMP_AC_Mixed_Screened.json'
 21 | 
 22 | 
 23 | 
 24 | def ReadACDatafile(AddrPos, AddrNeg):
 25 |     dfpos = pd.read_csv(AddrPos)
 26 |     dfneg = pd.read_csv(AddrNeg)
 27 | 
 28 |     targets = {}
 29 | 
 30 |     total_items1 = len(dfpos)
 31 |     total_items2 = len(dfneg)
 32 | 
 33 |     for i in range(total_items1):
 34 |         target = str(dfpos['tid'][i])
 35 |         if target not in targets.keys():
 36 |             targets.update({target:[]})
 37 | 
 38 |         SMILES1 = dfpos['c1'][i]
 39 |         SMILES2 = dfpos['c2'][i]
 40 |         targets[target].append({'SMILES1': SMILES1, 'SMILES2': SMILES2, 'Value': '1'})
 41 | 
 42 |     discard_cnt = 0
 43 |     valid_neg_cnt = 0
 44 |     for i in range(total_items2):
 45 |         target = str(dfneg['tid'][i])
 46 |         if target in targets.keys():
 47 |             SMILES1 = dfneg['c1'][i]
 48 |             SMILES2 = dfneg['c2'][i]
 49 |             targets[target].append({'SMILES1': SMILES1, 'SMILES2': SMILES2, 'Value': '0'})
 50 |             valid_neg_cnt += 1
 51 |         if target not in targets.keys():
 52 |             discard_cnt += 1
 53 | 
 54 |     print(f"Total positive count: {total_items1}")
 55 |     print(f"Total negative count: {total_items2}")
 56 |     print(f"Valid negative count: {valid_neg_cnt}")
 57 |     print(f"Discarded negative count: {discard_cnt}")
 58 | 
 59 |     return targets
 60 | 
 61 | def RandomScreenNeg(dataset, config):
 62 |     screened_dataset = {}
 63 |     discarded_dataset = {}
 64 | 
 65 |     org_tot_cnt = 0
 66 |     allowed_ratio = config.pn_rate_threshold
 67 |     for target in dataset.keys():
 68 |         print(f"Checking tid:{target}")
 69 |         subset = dataset[target]
 70 |         org_tot_cnt += len(subset)
 71 |         pos_set = []
 72 |         neg_set = []
 73 |         for item in subset:
 74 |             if item['Value'] == '1':
 75 |                 pos_set.append(item)
 76 |             else:
 77 |                 neg_set.append(item)
 78 |         pos_cnt = len(pos_set)
 79 |         neg_cnt = len(neg_set)
 80 |         print(f"Pos/Neg ratio: {pos_cnt/neg_cnt}.")
 81 | 
 82 |         if (pos_cnt / neg_cnt) > allowed_ratio:
 83 |             print(f"Allowed.")
 84 |             screened_dataset.update({target:subset})
 85 |         else:
 86 |             print(f"Screening...")
 87 |             screened_subset = pos_set.copy()
 88 |             max_sample_num = int(pos_cnt / allowed_ratio)
 89 |             print(f"Pos cnt: {pos_cnt}.")
 90 |             random.seed(config.random_sample_negative_seed)
 91 |             random.shuffle(neg_set)
 92 |             chosen_neg = neg_set[:max_sample_num]
 93 |             print(f"Randomly chosen: {len(chosen_neg)}")
 94 |             discarded_subset = neg_set[max_sample_num:]
 95 |             print(f"Discard: {len(discarded_subset)}")
 96 |             screened_subset.extend(chosen_neg)
 97 |             print(f"subset after screening: {len(screened_subset)}")
 98 |             print(f"ratio after screening: {len(pos_set) / len(chosen_neg)}.")
 99 |             screened_dataset.update({target:screened_subset})
100 |             discarded_dataset.update({target:discarded_subset})
101 | 
102 |     print(f"Dataset after screening: {len(screened_dataset)}")
103 |     tot_cnt = 0
104 |     dis_cnt = 0
105 |     for key in screened_dataset.keys():
106 |         subset = screened_dataset[key]
107 |         tot_cnt+=len(subset)
108 |     for key in discarded_dataset.keys():
109 |         subset = discarded_dataset[key]
110 |         dis_cnt+=len(subset)
111 |     print(f"Number of samples reserved:{tot_cnt}")
112 |     print(f"Number of samples discarded:{dis_cnt}")
113 |     assert (tot_cnt + dis_cnt) == org_tot_cnt
114 | 
115 |     return screened_dataset, discarded_dataset
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 
124 | def SubsetNumDistribution(dataset):
125 |     cnt = 0
126 |     cnt_distribution = []
127 |     cnt_distribution_tid = []
128 |     for idx, tid in enumerate(dataset):
129 |         item = dataset[(str(tid))]
130 |         cnt_distribution_tid.append(tid)
131 |         cnt_distribution.append(len(item))
132 |         cnt += len(item)
133 | 
134 |     print(f"Total number of samples in the dataset: {cnt}")
135 |     print(f"Size of all Subsets in the dataset: {cnt_distribution}")
136 |     print(f"The tid of all subsets in the dataset: {cnt_distribution_tid}")
137 |     print(f"The maximum size of subsets: {max(cnt_distribution)}")
138 |     print(f"The minimum size of subsets: {min(cnt_distribution)}")
139 | 
140 |     return cnt, cnt_distribution, cnt_distribution_tid
141 | 
142 | def SplitSubsetsByCnt(dataset, cnt_distribution, cnt_distribution_tid, config):
143 |     cnt_distribution = np.array(cnt_distribution)
144 | 
145 |     large_thres = config.large_thres
146 |     medium_thres = config.medium_thres
147 |     small_thres = config.small_thres
148 | 
149 |     cnt_subset_large = np.where(cnt_distribution > large_thres)[0]
150 |     cnt_subset_medium = np.where((cnt_distribution <= large_thres)&(cnt_distribution > medium_thres))[0]
151 |     cnt_subset_small = np.where((cnt_distribution <= medium_thres)&(cnt_distribution > small_thres))[0]
152 |     cnt_subset_few = np.where((cnt_distribution <= small_thres)&(cnt_distribution > 1))[0]
153 | 
154 |     print(f"The number of subsets in Large set is: {len(cnt_subset_large)}")
155 |     print(f"The number of subsets in Medium set is: {len(cnt_subset_medium)}")
156 |     print(f"The number of subsets in Small set is: {len(cnt_subset_small)}")
157 |     print(f"The number of subsets in Few set is: {len(cnt_subset_few)}")
158 | 
159 |     subset_large = {}
160 |     subset_medium = {}
161 |     subset_small = {}
162 |     subset_few = {}
163 | 
164 |     for i in range(len(cnt_subset_large)):
165 |         loc = cnt_subset_large[i]
166 |         tid = cnt_distribution_tid[loc]
167 |         item = dataset[tid]
168 |         subset_large.update({tid: item})
169 |     SaveJson(GeneratedDatasetAddrLarge, subset_large)
170 | 
171 | 
172 |     for i in range(len(cnt_subset_medium)):
173 |         loc = cnt_subset_medium[i]
174 |         tid = cnt_distribution_tid[loc]
175 |         item = dataset[tid]
176 |         subset_medium.update({tid: item})
177 |     SaveJson(GeneratedDatasetAddrMedium, subset_medium)
178 | 
179 |     for i in range(len(cnt_subset_small)):
180 |         loc = cnt_subset_small[i]
181 |         tid = cnt_distribution_tid[loc]
182 |         item = dataset[tid]
183 |         subset_small.update({tid: item})
184 |     SaveJson(GeneratedDatasetAddrSmall, subset_small)
185 | 
186 |     for i in range(len(cnt_subset_few)):
187 |         loc = cnt_subset_few[i]
188 |         tid = cnt_distribution_tid[loc]
189 |         item = dataset[tid]
190 |         subset_few.update({tid: item})
191 |     SaveJson(GeneratedDatasetAddrFew, subset_few)
192 | 
193 | def ScreenFewPosSubsets(dataset, config):
194 |     screened_dataset = dataset.copy()
195 |     discarded_subsets = {}
196 |     few_pos_threshold = config.few_pos_threshold
197 | 
198 |     for idx, tid in enumerate(dataset):
199 |         print(f"Checking subset of target {tid}")
200 |         item = dataset[str(tid)]
201 |         subset_num = len(item)
202 |         print(f"Total num of samples of this target: {subset_num}")
203 |         pos_cnt = 0
204 |         for i in range(subset_num):
205 |             sample = item[i]
206 |             if sample['Value'] == '1':
207 |                 pos_cnt += 1
208 | 
209 |         print(f"Total positive sample num {pos_cnt}")
210 |         if pos_cnt < few_pos_threshold:
211 |             print(f"Discard this subset.")
212 |             screened_dataset.pop(str(tid))
213 |             discarded_subsets.update({str(tid): item})
214 | 
215 |     assert len(screened_dataset) + len(discarded_subsets) == len(dataset)
216 |     return screened_dataset, discarded_subsets
217 | 
218 | def ScreenImbalancedSubsets(dataset, config):
219 |     screened_dataset = dataset.copy()
220 |     discarded_subsets = {}
221 |     pn_rate_threshold = config.pn_rate_threshold
222 | 
223 |     for idx, tid in enumerate(dataset):
224 |         print(f"Checking subset of target {tid}")
225 |         item = dataset[str(tid)]
226 |         subset_size = len(item)
227 |         print(f"Total num of samples of this target: {subset_size}")
228 | 
229 |         pos_cnt = 0
230 |         neg_cnt = 0
231 |         for i in range(subset_size):
232 |             sample = item[i]
233 |             if sample['Value'] == '1':
234 |                 pos_cnt += 1
235 |             elif sample['Value'] == '0':
236 |                 neg_cnt += 1
237 |             else:
238 |                 raise ValueError(
239 |                         f'Wrong Value of target {tid} and sample {sample} with idx {i}.'
240 |                 )
241 | 
242 |         rate = pos_cnt / neg_cnt
243 |         print(f"Positive / Negative rate is: {rate}")
244 | 
245 |         if rate < pn_rate_threshold:
246 |             print(f"Discard this subset.")
247 |             screened_dataset.pop(str(tid))
248 |             discarded_subsets.update({str(tid): item})
249 | 
250 |     assert len(screened_dataset) + len(discarded_subsets) == len(dataset)
251 |     return screened_dataset, discarded_subsets
252 | 
253 | def MixAllSubsets(dataset):
254 |     mixed_dataset = {'All':[]}
255 | 
256 |     # total_targets_num = len(mixed_dataset)
257 |     for idx, tid in enumerate(dataset):
258 |         item = dataset[str(tid)]
259 |         subset_size = len(item)
260 | 
261 |         for i in range(subset_size):
262 |             sample = item[i]
263 |             sample.update({'Target': tid})
264 |             mixed_dataset['All'].append(sample)
265 | 
266 |     return mixed_dataset
267 | 
268 | def CheckConflictSamples(dataset):
269 |     total_num = len(dataset)
270 |     print(f"Total number of samples in mixed dataset is {total_num}.")
271 | 
272 |     MolPairDict = {}
273 |     conflict_cnt = 0
274 |     for item in dataset:
275 |         smiles1 = item['SMILES1']
276 |         smiles2 = item['SMILES2']
277 |         molpair = smiles1 + '?' + smiles2
278 |         molpair_rev = smiles2 + '?' + smiles1
279 |         if (molpair not in MolPairDict.keys()) & (molpair_rev not in MolPairDict.keys()):
280 |             MolPairDict.update({molpair: item})
281 |         else:
282 |             if molpair in MolPairDict.keys():
283 |                 conflict_molpair = molpair
284 |             elif molpair_rev in MolPairDict.keys():
285 |                 conflict_molpair = molpair_rev
286 |             previous_value = MolPairDict[conflict_molpair]['Value']
287 |             current_value = item['Value']
288 |             if previous_value != current_value:
289 |                 print(f"Confilict encountered!")
290 |                 conflict_cnt += 1
291 |                 print(f"Previous conflict sample: {conflict_molpair} : {MolPairDict[conflict_molpair]}.")
292 |                 print(f"Current sample: {item}.")
293 |     print(f"Total conflict sample number is {conflict_cnt}")
294 | 
295 | def ScreenConflictSamples(dataset):
296 |     total_num = len(dataset)
297 |     print(f"Total number of samples in the mixed dataset is {total_num}.")
298 | 
299 |     MolPairDict = {}
300 |     ScreenedDataset = []
301 |     MolPairIndexDict = {}
302 |     ToBeScreenedMolPairList = []
303 |     discarded_cnt = 0
304 |     repeated_cnt = 0
305 |     for item in dataset:
306 |         smiles1 = item['SMILES1']
307 |         smiles2 = item['SMILES2']
308 |         molpair = smiles1 + '?' + smiles2
309 |         molpair_rev = smiles2 + '?' + smiles1
310 | 
311 |         if (molpair not in MolPairDict.keys()) & (molpair_rev not in MolPairDict.keys()):
312 |             MolPairDict.update({molpair: item})
313 |             ScreenedDataset.append(item)
314 |             MolPairIndexDict.update({molpair: len(ScreenedDataset)})
315 | 
316 |         else:
317 |             repeated_cnt += 1
318 |             if molpair in MolPairDict.keys():
319 |                 conflict_molpair = molpair
320 |             elif molpair_rev in MolPairDict.keys():
321 |                 conflict_molpair = molpair_rev
322 |             previous_value = MolPairDict[conflict_molpair]['Value']
323 |             current_value = item['Value']
324 | 
325 |             if previous_value != current_value:
326 |                 # previous_index = MolPairIndexDict[conflict_molpair]
327 |                 # previous_item = ScreenedDataset[previous_index]
328 |                 # previous_molpair = previous_item['SMILES1'] + '?' + previous_item['SMILES2']
329 |                 # assert previous_molpair == conflict_molpair
330 |                 # ScreenedDataset.pop(previous_index)
331 |                 if MolPairDict[conflict_molpair] not in ToBeScreenedMolPairList:
332 |                     ToBeScreenedMolPairList.append(MolPairDict[conflict_molpair])
333 |                 discarded_cnt += 1
334 | 
335 |     for item in ToBeScreenedMolPairList:
336 |         try:
337 |             ScreenedDataset.remove(item)
338 |         except:
339 |             print(f"{item} have been removed before.")
340 | 
341 |     print(f"Total repeated sample number is {repeated_cnt}.")
342 |     print(f"Total discarded sample number is {discarded_cnt}.")
343 |     print(f"Total to be screened sample number is {len(ToBeScreenedMolPairList)}.")
344 |     print(f"Remained sample number is {len(ScreenedDataset)}.")
345 | 
346 |     return ScreenedDataset
347 | 
348 | 
349 | ####################################
350 | 
351 | 
352 | def ACDatasetPreprocess(config):
353 |     if not os.path.exists(GeneratedDatasetAddrAll):
354 |         dataset = ReadACDatafile(OriginDatasetAddrPos, OriginDatasetAddrNeg)
355 |         discarded_dataset = {}
356 |         if config.discard_few_pos:
357 |             screened_dataset, discarded_dataset1 = ScreenFewPosSubsets(dataset, config)
358 |             discarded_dataset.update(discarded_dataset1)
359 |         if config.random_sample_negative:
360 |             screened_dataset, discarded_dataset2 = RandomScreenNeg(screened_dataset, config)
361 |             discarded_dataset.update(discarded_dataset2)
362 |         if config.discard_extreme_imbalance:
363 |             screened_dataset, discarded_dataset3 = ScreenImbalancedSubsets(screened_dataset, config)
364 |             discarded_dataset.update(discarded_dataset2)
365 | 
366 |         SaveJson(GeneratedDatasetAddrAll, screened_dataset)
367 |         SaveJson(DiscardedDatasetAddr, discarded_dataset)
368 | 
369 |         dataset = screened_dataset
370 | 
371 |     else:
372 |         dataset = LoadJson(GeneratedDatasetAddrAll)
373 | 
374 |     print(f'Total targets(subsets) of the dataset: {len(dataset)}')
375 | 
376 |     cnt, cnt_distribution, cnt_distribution_tid = SubsetNumDistribution(dataset)
377 | 
378 |     SplitSubsetsByCnt(dataset, cnt_distribution, cnt_distribution_tid, config)
379 | 
380 |     if config.mixed:
381 |         mixed_dataset = MixAllSubsets(dataset)
382 |         print(f"Total number of samples in the mixed dataset is {len(mixed_dataset['All'])}")
383 |         SaveJson(GeneratedDatasetAddrMixed, mixed_dataset)
384 |         CheckConflictSamples(mixed_dataset['All'])
385 |         screened_mixed_dataset = {'All': []}
386 |         screened_mixed_dataset['All'] = ScreenConflictSamples(mixed_dataset['All'])
387 |         SaveJson(GeneratedDatasetAddrMixedScreened, screened_mixed_dataset)
388 | 
389 | 
390 | 
391 | 


--------------------------------------------------------------------------------
/ACNet/ACComponents/ACDataset/DataUtils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import re
 3 | import pandas as pd
 4 | 
 5 | 
 6 | class Config(object):
 7 |     def __init__(self):
 8 |         super(Config, self).__init__()
 9 |         self.mixed = True
10 |         self.random_sample_negative = False
11 |         self.random_sample_negative_seed = 8
12 |         self.discard_extreme_imbalance = False  # Discard the subsets that are extremely imbalanced. Default False.
13 |         self.pn_rate_threshold = 0.2           # The threshold of Pos/Neg for extremely imbalanced subsets. Default 0.1.
14 |         self.discard_few_pos = True            # Discard the subsets that have only few positive samples. Default True.
15 |         self.few_pos_threshold = 10            # The threshold to identify few positive
16 |         self.large_thres = 20000
17 |         self.medium_thres = 1000
18 |         self.small_thres = 100
19 | 
20 | 
21 | 
22 | 
23 | def SaveJson(Addr, object):
24 |     with open(Addr, 'w') as f:
25 |         json.dump(object,f)
26 | 
27 | def LoadJson(Addr):
28 |     with open(Addr, 'r') as f:
29 |         content = json.load(f)
30 |     return content


--------------------------------------------------------------------------------
/ACNet/ACComponents/ACDataset/GenerateACDatasets.py:
--------------------------------------------------------------------------------
1 | import ACComponents.ACDataset.DataPreprocess as DP
2 | import ACComponents.ACDataset.DataUtils as DU
3 | 
4 | cuf_config = DU.Config()
5 | DP.ACDatasetPreprocess(cuf_config)


--------------------------------------------------------------------------------
/ACNet/ACComponents/ACModels.py:
--------------------------------------------------------------------------------
  1 | import torch as t
  2 | import torch.nn as nn
  3 | from sklearn import svm
  4 | from Models.CMPNN.CMPNNModel import *
  5 | from Models.BasicGNNs import *
  6 | from Models.Graphormer.Graphormer import Graphormer
  7 | from Models.ClassifierModel import DNN
  8 | 
  9 | 
 10 | class ACPredMLP(nn.Module):
 11 |     def __init__(self, opt):
 12 |         super(ACPredMLP, self).__init__()
 13 |         self.opt = opt
 14 |         # todo(zqzhang): updated in ACv8
 15 |         if self.opt.args['Feature'] == 'FP':
 16 |             self.input_size = self.opt.args['nBits']
 17 |         elif self.opt.args['Feature'] == 'Raw':
 18 |             self.input_size = self.opt.args['RawFeatureSize']
 19 |         self.Classifier = DNN(
 20 |                 input_size = 2 * self.input_size,
 21 |                 output_size = self.opt.args['OutputSize'],
 22 |                 layer_sizes = self.opt.args['DNNLayers'],
 23 |                 opt = self.opt
 24 |         )
 25 | 
 26 |     def forward(self, Input):
 27 |         Input1, Input2 = Input
 28 |         Input1 = Input1.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu'))
 29 |         Input2 = Input2.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu'))
 30 |         # print("Input1:")
 31 |         # print(Input1)
 32 |         # print(Input1.size())
 33 |         # print('Input2:')
 34 |         # print(Input2)
 35 |         # print(Input2.size())
 36 |         # [batch_size, nBits]
 37 |         PairwiseMolFeature = t.cat([Input1, Input2], dim=1)
 38 |         # print("PairwiseFeature:")
 39 |         # print(PairwiseMolFeature)
 40 |         # print(PairwiseMolFeature.size())
 41 |         prediction = self.Classifier(PairwiseMolFeature)
 42 |         # print("Prediction:")
 43 |         # print(prediction)
 44 |         # print(prediction.size())
 45 | 
 46 |         return prediction
 47 | 
 48 | class ACPredLSTM(nn.Module):
 49 |     def __init__(self, opt):
 50 |         super(ACPredLSTM, self).__init__()
 51 |         self.opt = opt
 52 |         self.WordEmbed = nn.Embedding(self.opt.args['MaxDictLength'],
 53 |                                       self.opt.args['FPSize'],
 54 |                                       padding_idx = self.opt.args['MaxDictLength']-1)
 55 |         self.MolFeatureExtractor = nn.LSTM(input_size = self.opt.args['FPSize'],
 56 |                                            hidden_size = self.opt.args['FPSize'],
 57 |                                            num_layers = self.opt.args['LSTMLayers'],
 58 |                                            batch_first = True,
 59 |                                            bidirectional = True
 60 | 
 61 |         )
 62 |         self.Classifier = DNN(
 63 |                 input_size = 2*self.opt.args['FPSize'],
 64 |                 output_size = self.opt.args['OutputSize'],
 65 |                 layer_sizes = self.opt.args['DNNLayers'],
 66 |                 opt = self.opt
 67 |         )
 68 | 
 69 |     def forward(self, Input):
 70 |         Input1, Input2 = Input
 71 |         Input1 = Input1.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu'))
 72 |         Input2 = Input2.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu'))
 73 |         Embed1 = self.WordEmbed(Input1)
 74 |         Embed2 = self.WordEmbed(Input2)
 75 |         _, (MolFeature1,_) = self.MolFeatureExtractor(Embed1)
 76 |         _, (MolFeature2,_) = self.MolFeatureExtractor(Embed2)
 77 |         # print(MolFeature1)
 78 |         # print(MolFeature1.size())
 79 |         # MolFeature: [ LSTMLayer*Bi, Batch_size, FP_size]
 80 |         MolFeature1 = MolFeature1.permute(1,0,2)
 81 |         MolFeature2 = MolFeature2.permute(1,0,2)
 82 |         # MolFeature: [Batch_size, LSMTLayer*Bi, FP_size]
 83 |         MolFeature1 = MolFeature1.sum(dim=1)
 84 |         MolFeature2 = MolFeature2.sum(dim=1)
 85 |         # MolFeature: [Batch_size, FP_size]
 86 |         PairwiseMolFeature = t.cat([MolFeature1,MolFeature2],dim=1)
 87 |         prediction = self.Classifier(PairwiseMolFeature)
 88 |         return prediction
 89 | 
 90 | class ACPredGRU(nn.Module):
 91 |     def __init__(self, opt):
 92 |         super(ACPredGRU, self).__init__()
 93 |         self.opt = opt
 94 |         self.WordEmbed = nn.Embedding(self.opt.args['MaxDictLength'],
 95 |                                       self.opt.args['FPSize'],
 96 |                                       padding_idx = self.opt.args['MaxDictLength'] - 1)
 97 |         self.MolFeatureExtractor = nn.GRU(input_size = self.opt.args['FPSize'],
 98 |                                           hidden_size = self.opt.args['FPSize'],
 99 |                                           num_layers = self.opt.args['GRULayers'],
100 |                                           batch_first = True,
101 |                                           bidirectional = True)
102 |         self.Classifier = DNN(
103 |                 input_size = 2 * self.opt.args['FPSize'],
104 |                 output_size = self.opt.args['OutputSize'],
105 |                 layer_sizes = self.opt.args['DNNLayers'],
106 |                 opt = self.opt
107 |         )
108 | 
109 |     def forward(self, Input):
110 |         Input1, Input2 = Input
111 |         Input1 = Input1.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu'))
112 |         Input2 = Input2.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu'))
113 |         Embed1 = self.WordEmbed(Input1)
114 |         Embed2 = self.WordEmbed(Input2)
115 |         _, MolFeature1 = self.MolFeatureExtractor(Embed1)
116 |         _, MolFeature2 = self.MolFeatureExtractor(Embed2)
117 |         # print(MolFeature1)
118 |         # print(MolFeature1.size())
119 |         # MolFeature: [ GRULayer*Bi, Batch_size, FP_size]
120 |         MolFeature1 = MolFeature1.permute(1, 0, 2)
121 |         MolFeature2 = MolFeature2.permute(1, 0, 2)
122 |         # MolFeature: [Batch_size, GRULayer*Bi, FP_size]
123 |         MolFeature1 = MolFeature1.sum(dim = 1)
124 |         MolFeature2 = MolFeature2.sum(dim = 1)
125 |         # MolFeature: [Batch_size, FP_size]
126 |         PairwiseMolFeature = t.cat([MolFeature1, MolFeature2], dim = 1)
127 |         prediction = self.Classifier(PairwiseMolFeature)
128 |         return prediction
129 | 
130 | class ACPredCMPNN(nn.Module):
131 |     def __init__(self, opt):
132 |         super(ACPredCMPNN, self).__init__()
133 |         self.opt = opt
134 |         self.MolFeatureExtractor = CMPNNModel(
135 |                     self.opt.args['dataset_type']=='classification',
136 |                     self.opt.args['dataset_type']=='multiclass',
137 |                     opt = self.opt)
138 |         self.Classifier = DNN(
139 |                 input_size = 2 * self.opt.args['FPSize'],
140 |                 output_size = self.opt.args['OutputSize'],
141 |                 layer_sizes = self.opt.args['DNNLayers'],
142 |                 opt = self.opt
143 |         )
144 | 
145 |     def forward(self, Input):
146 |         Input1, Input2 = Input
147 |         MolFeature1 = self.MolFeatureExtractor(Input1)
148 |         MolFeature2 = self.MolFeatureExtractor(Input2)
149 |         # print(f"size of Mol1 and Mol2: {MolFeature1.size()}")
150 |         PairwiseMolFeature = t.cat([MolFeature1,MolFeature2],dim=1)
151 |         # print(f'size of PairwiseMolFeature: {PairwiseMolFeature.size()}')
152 |         prediction = self.Classifier(PairwiseMolFeature)
153 | 
154 |         return prediction
155 | 
156 | class ACPredGCN(nn.Module):
157 |     def __init__(self, opt):
158 |         super(ACPredGCN, self).__init__()
159 |         self.opt = opt
160 |         if not self.opt.args['PyG']:
161 |             print(f"PyG arg should be {True}")
162 |             raise ValueError
163 |         self.MolFeatureExtractor = PyGGCN(self.opt, FeatureExtractor = True)
164 |         self.Classifier = DNN(
165 |                 input_size = 2 *self.opt.args['FPSize'],
166 |                 output_size = self.opt.args['OutputSize'],
167 |                 layer_sizes = self.opt.args['DNNLayers'],
168 |                 opt = self.opt
169 |         )
170 | 
171 |     def forward(self, Input):
172 |         self.reset_batch(Input)
173 |         Input = Input.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu'))
174 |         # print(f"Input.batch: {Input.batch}")
175 |         MolEmbeddings = self.MolFeatureExtractor(Input)
176 |         MolEmbeddings = self.decompose_mol_pair(MolEmbeddings)
177 | 
178 |         prediction = self.Classifier(MolEmbeddings)
179 |         return prediction
180 | 
181 |     def reset_batch(self, Input):
182 |         batch = Input.batch
183 |         atom_nums = Input.atom_num
184 |         bond_nums = Input.bond_num
185 |         MolNum = len(atom_nums)
186 |         # print(f"batch: {batch}")
187 |         # print(f"atom_nums: {atom_nums}")
188 |         # print(f"MolNum: {MolNum}")
189 |         # print(f"len batch: {len(batch)}")
190 |         # print(f"sum atom_nums: {t.sum(atom_nums)}")
191 |         assert len(batch) == t.sum(atom_nums)
192 | 
193 |         # reset batch by atom num
194 |         mol_cnt = 0
195 |         mol_batch = t.Tensor([])
196 |         for i in range(MolNum):
197 |             tmp = t.Tensor([mol_cnt])
198 |             tmp = tmp.repeat(atom_nums[i].item())
199 |             assert len(tmp) == atom_nums[i]
200 |             mol_batch = t.cat([mol_batch, tmp]).long()
201 |             mol_cnt += 1
202 |         Input.batch = mol_batch
203 | 
204 |     def decompose_mol_pair(self, MolEmbeddings):
205 |         # print(f"MolEmbedding size: {MolEmbeddings.size()}")
206 |         mol_num = MolEmbeddings.size()[0]
207 |         EmbLength = MolEmbeddings.size()[1]
208 |         assert mol_num % 2 == 0
209 |         return MolEmbeddings.view(int(mol_num/2), int(EmbLength*2))
210 | 
211 | class ACPredGIN(nn.Module):
212 |     def __init__(self, opt):
213 |         super(ACPredGIN, self).__init__()
214 |         self.opt = opt
215 |         if not self.opt.args['PyG']:
216 |             print(f"PyG arg should be {True}")
217 |             raise ValueError
218 |         self.MolFeatureExtractor = PyGGIN(self.opt, FeatureExtractor = True)
219 |         self.Classifier = DNN(
220 |                 input_size = 2 *self.opt.args['FPSize'],
221 |                 output_size = self.opt.args['OutputSize'],
222 |                 layer_sizes = self.opt.args['DNNLayers'],
223 |                 opt = self.opt
224 |         )
225 | 
226 |     def forward(self, Input):
227 |         self.reset_batch(Input)
228 |         Input = Input.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu'))
229 |         # print(f"Input.batch: {Input.batch}")
230 |         MolEmbeddings = self.MolFeatureExtractor(Input)
231 |         MolEmbeddings = self.decompose_mol_pair(MolEmbeddings)
232 | 
233 |         prediction = self.Classifier(MolEmbeddings)
234 |         return prediction
235 | 
236 |     def reset_batch(self, Input):
237 |         batch = Input.batch
238 |         atom_nums = Input.atom_num
239 |         bond_nums = Input.bond_num
240 |         MolNum = len(atom_nums)
241 |         # print(f"batch: {batch}")
242 |         # print(f"atom_nums: {atom_nums}")
243 |         # print(f"MolNum: {MolNum}")
244 |         # print(f"len batch: {len(batch)}")
245 |         # print(f"sum atom_nums: {t.sum(atom_nums)}")
246 |         assert len(batch) == t.sum(atom_nums)
247 | 
248 |         # reset batch by atom num
249 |         mol_cnt = 0
250 |         mol_batch = t.Tensor([])
251 |         for i in range(MolNum):
252 |             tmp = t.Tensor([mol_cnt])
253 |             tmp = tmp.repeat(atom_nums[i].item())
254 |             assert len(tmp) == atom_nums[i]
255 |             mol_batch = t.cat([mol_batch, tmp]).long()
256 |             mol_cnt += 1
257 |         Input.batch = mol_batch
258 | 
259 |     def decompose_mol_pair(self, MolEmbeddings):
260 |         # print(f"MolEmbedding size: {MolEmbeddings.size()}")
261 |         mol_num = MolEmbeddings.size()[0]
262 |         EmbLength = MolEmbeddings.size()[1]
263 |         assert mol_num % 2 == 0
264 |         return MolEmbeddings.view(int(mol_num/2), int(EmbLength*2))
265 | 
266 | class ACPredSGC(nn.Module):
267 |     def __init__(self, opt):
268 |         super(ACPredSGC, self).__init__()
269 |         self.opt = opt
270 |         if not self.opt.args['PyG']:
271 |             print(f"PyG arg should be {True}")
272 |             raise ValueError
273 |         self.MolFeatureExtractor = PyGSGC(self.opt, FeatureExtractor = True)
274 |         self.Classifier = DNN(
275 |                 input_size = 2 *self.opt.args['FPSize'],
276 |                 output_size = self.opt.args['OutputSize'],
277 |                 layer_sizes = self.opt.args['DNNLayers'],
278 |                 opt = self.opt
279 |         )
280 | 
281 |     def forward(self, Input):
282 |         self.reset_batch(Input)
283 |         Input = Input.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu'))
284 |         # print(f"Input.batch: {Input.batch}")
285 |         MolEmbeddings = self.MolFeatureExtractor(Input)
286 |         MolEmbeddings = self.decompose_mol_pair(MolEmbeddings)
287 | 
288 |         prediction = self.Classifier(MolEmbeddings)
289 |         return prediction
290 | 
291 |     def reset_batch(self, Input):
292 |         batch = Input.batch
293 |         atom_nums = Input.atom_num
294 |         bond_nums = Input.bond_num
295 |         MolNum = len(atom_nums)
296 |         # print(f"batch: {batch}")
297 |         # print(f"atom_nums: {atom_nums}")
298 |         # print(f"MolNum: {MolNum}")
299 |         # print(f"len batch: {len(batch)}")
300 |         # print(f"sum atom_nums: {t.sum(atom_nums)}")
301 |         assert len(batch) == t.sum(atom_nums)
302 | 
303 |         # reset batch by atom num
304 |         mol_cnt = 0
305 |         mol_batch = t.Tensor([])
306 |         for i in range(MolNum):
307 |             tmp = t.Tensor([mol_cnt])
308 |             tmp = tmp.repeat(atom_nums[i].item())
309 |             assert len(tmp) == atom_nums[i]
310 |             mol_batch = t.cat([mol_batch, tmp]).long()
311 |             mol_cnt += 1
312 |         Input.batch = mol_batch
313 | 
314 |     def decompose_mol_pair(self, MolEmbeddings):
315 |         # print(f"MolEmbedding size: {MolEmbeddings.size()}")
316 |         mol_num = MolEmbeddings.size()[0]
317 |         EmbLength = MolEmbeddings.size()[1]
318 |         assert mol_num % 2 == 0
319 |         return MolEmbeddings.view(int(mol_num/2), int(EmbLength*2))
320 | 
321 | class ACPredGraphormer(nn.Module):
322 |     def __init__(self, opt):
323 |         super(ACPredGraphormer, self).__init__()
324 |         self.opt = opt
325 |         self.MolFeatureExtractor = Graphormer(
326 |                 num_encoder_layers = self.opt.args['num_encoder_layers'],
327 |                 num_attention_heads = self.opt.args['num_attention_heads'],
328 |                 embedding_dim = self.opt.args['embedding_dim'],
329 |                 dropout_rate = self.opt.args['dropout_rate'],
330 |                 intput_dropout_rate = self.opt.args['intput_dropout_rate'],
331 |                 ffn_dim = self.opt.args['ffn_dim'],
332 |                 edge_type = self.opt.args['edge_type'],
333 |                 multi_hop_max_dist = self.opt.args['multi_hop_max_dist'],
334 |                 attention_dropout_rate = self.opt.args['attention_dropout_rate'],
335 |                 flag = self.opt.args['flag'],
336 |                 opt = self.opt,
337 |                 mode = 'Extractor'
338 |         )
339 |         self.Classifier = DNN(
340 |                 input_size = 2 * self.opt.args['embedding_dim'],
341 |                 output_size = self.opt.args['OutputSize'],
342 |                 layer_sizes = self.opt.args['DNNLayers'],
343 |                 opt = self.opt
344 |         )
345 | 
346 |     def forward(self, Input):
347 |         Input1, Input2 = Input
348 |         MolFeature1 = self.MolFeatureExtractor(Input1)
349 |         MolFeature2 = self.MolFeatureExtractor(Input2)
350 |         PairwiseMolFeature = t.cat([MolFeature1, MolFeature2], dim=1)
351 |         prediction = self.Classifier(PairwiseMolFeature)
352 | 
353 |         return prediction
354 | 
355 | 


--------------------------------------------------------------------------------
/ACNet/ACComponents/ACSplitter.py:
--------------------------------------------------------------------------------
  1 | from TrainingFramework.Splitter import *
  2 | 
  3 | 
  4 | class TargetSplitter(BasicSplitter):
  5 |     def __init__(self):
  6 |         super(TargetSplitter, self).__init__()
  7 | 
  8 |     def split(self, dataset, opt):
  9 |         rate = opt.args['SplitRate']
 10 |         validseed = opt.args['SplitValidSeed']
 11 |         testseed = opt.args['SplitTestSeed']
 12 |         total_num = len(dataset)
 13 |         if total_num == 1:
 14 |             dataset = dataset[0]
 15 |             total_num = len(dataset)
 16 | 
 17 |         tarid2size, tarid2sample = self.GetTargetidList(dataset)
 18 |         tarids = tarid2size.keys()
 19 | 
 20 |         # calculate the splitting thres
 21 |         if len(rate) == 1:
 22 |             assert rate[0] < 1
 23 |             train_num = int(total_num * rate[0])
 24 |             valid_num = total_num - train_num
 25 |         elif len(rate) == 2:
 26 |             assert rate[0] + rate[1] < 1
 27 |             train_num = int(total_num * rate[0])
 28 |             valid_num = int(total_num * rate[1])
 29 |             test_num = total_num - train_num - valid_num
 30 |         else:
 31 |             print("Wrong splitting rate")
 32 |             raise RuntimeError
 33 | 
 34 |         if len(rate) == 1:
 35 |             sample_size = int(len(tarids) * (1-rate[0]))
 36 |             validtargets, chosen_cnt = self.BinaryClassSample(tarid2size, tarids, sample_size, valid_num, validseed)
 37 |             validset, valididx = self.Target2Samples(validtargets,tarid2sample)
 38 |             assert len(validset) == chosen_cnt
 39 |             traintargets = self.excludedtargets(validtargets, tarids)
 40 |             trainset, trainidx = self.Target2Samples(traintargets, tarid2sample)
 41 |             assert len(validset) + len(trainset) == total_num
 42 |             return (trainset, validset), (trainidx, valididx)
 43 |         elif len(rate) == 2:
 44 |             sample_size = int(len(tarids) * (1-rate[0]-rate[1]))
 45 |             testtargets, chosen_cnt = self.BinaryClassSample(tarid2size, tarids, sample_size, test_num, testseed)
 46 |             testset, testidx = self.Target2Samples(testtargets, tarid2sample)
 47 |             assert len(testset) == chosen_cnt
 48 |             remained_tarids = self.excludedtargets(testtargets, tarids)
 49 |             sample_size = int(len(tarids) * rate[1])
 50 |             validtargets, chosen_cnt = self.BinaryClassSample(tarid2size, remained_tarids, sample_size, valid_num, validseed)
 51 |             validset, valididx = self.Target2Samples(validtargets, tarid2sample)
 52 |             assert len(validset) == chosen_cnt
 53 |             traintargets = self.excludedtargets(validtargets, remained_tarids)
 54 |             trainset, trainidx = self.Target2Samples(traintargets, tarid2sample)
 55 |             assert len(validset)+len(testset)+len(trainset) == total_num
 56 |             return (trainset, validset, testset), (trainidx, valididx, testidx)
 57 | 
 58 |     def BinaryClassSample(self, tarid2size, tarids, sample_size, optimal_count, seed):
 59 | 
 60 |         count = 0
 61 |         tried_times = 0
 62 |         error_rate = 0.1
 63 | 
 64 | 
 65 |         while (count < optimal_count * (1-error_rate)) or (count > optimal_count * (1+error_rate)):
 66 |             tried_times += 1
 67 | 
 68 |             if tried_times % 5000 == 0:
 69 |                 print("modify error rate.")
 70 |                 error_rate += 0.05
 71 |                 print("modify sample target number.")
 72 |                 sample_size = int(sample_size * 1.1)
 73 |                 assert sample_size < len(tarids)
 74 | 
 75 |             seed += 1
 76 |             random.seed(seed)
 77 |             chosen_targets = random.sample(tarids, sample_size)
 78 |             count = sum([tarid2size[target] for target in chosen_targets])
 79 | 
 80 |         print(f"Sample num: {count}")
 81 |         print(f"Tried times: {tried_times}")
 82 |         print(f"Available seed: {seed}")
 83 | 
 84 | 
 85 |         return chosen_targets, count
 86 | 
 87 |     def Target2Samples(self, chosen_targets, tarid2sample):
 88 |         set = []
 89 |         for targetid in chosen_targets:
 90 |             targetset = tarid2sample[targetid]
 91 |             set.extend(targetset)
 92 |         idx = []
 93 |         for item in set:
 94 |             id = item['idx']
 95 |             idx.append(id)
 96 |         return set, idx
 97 | 
 98 |     def excludedtargets(self, chosen_targets, tarids):
 99 |         excluded_targets = []
100 |         for target in tarids:
101 |             if target not in chosen_targets:
102 |                 excluded_targets.append(target)
103 |         return excluded_targets
104 | 
105 |     def GetTargetidList(self, dataset):
106 |         tarid2size = {}
107 |         tarid2sample = {}
108 |         for item in dataset:
109 |             tarid = item['Target']
110 |             if tarid not in tarid2size.keys():
111 |                 tarid2size.update({tarid: 0})
112 |                 tarid2sample.update({tarid: []})
113 |             tarid2size[tarid] += 1
114 |             tarid2sample[tarid].append(item)
115 |         return tarid2size, tarid2sample
116 | 
117 | 
118 | def verification(sets, opt):
119 |     rate = opt.args['SplitRate']
120 |     if len(rate)==1:
121 |         trainset, validset = sets
122 |         testset = None
123 |     elif len(rate) == 2:
124 |         trainset, validset, testset = sets
125 | 
126 |     train_targets = []
127 |     valid_targets = []
128 |     test_targets = []
129 | 
130 |     for item in trainset:
131 |         target = item['Target']
132 |         if target not in train_targets:
133 |             train_targets.append(target)
134 | 
135 |     for item in validset:
136 |         target = item['Target']
137 |         if target not in valid_targets:
138 |             valid_targets.append(target)
139 | 
140 |     if testset:
141 |         for item in testset:
142 |             target = item['Target']
143 |             if target not in test_targets:
144 |                 test_targets.append(target)
145 | 
146 |     # varify train and valid
147 |     for target in train_targets:
148 |         assert target not in valid_targets
149 | 
150 |     for target in valid_targets:
151 |         assert target not in train_targets
152 | 
153 |     # verify train and test
154 |     if testset:
155 |         for target in train_targets:
156 |             assert target not in test_targets
157 |         for target in test_targets:
158 |             assert target not in train_targets
159 | 
160 |         # verify valid and test
161 |         for target in valid_targets:
162 |             assert target not in test_targets
163 |         for target in test_targets:
164 |             assert target not in valid_targets
165 | 
166 |     print(f"Verification passed.")
167 |     print(f"trainset target num: {len(train_targets)}")
168 |     print(f"validset target num: {len(valid_targets)}")
169 |     print(f"testset target num: {len(test_targets)}")
170 | 


--------------------------------------------------------------------------------
/ACNet/ACNetEnviron.yml:
--------------------------------------------------------------------------------
  1 | name: MolGraphEnv-1.11
  2 | channels:
  3 |   - pytorch
  4 |   - pyg
  5 |   - conda-forge
  6 |   - defaults
  7 | dependencies:
  8 |   - _libgcc_mutex=0.1=main
  9 |   - _openmp_mutex=4.5=1_gnu
 10 |   - blas=1.0=mkl
 11 |   - boost=1.74.0=py38hc10631b_3
 12 |   - boost-cpp=1.74.0=h9359b55_0
 13 |   - brotlipy=0.7.0=py38h497a2fe_1001
 14 |   - bzip2=1.0.8=h7f98852_4
 15 |   - ca-certificates=2021.10.8=ha878542_0
 16 |   - cairo=1.16.0=hf32fb01_1
 17 |   - certifi=2021.10.8=py38h578d9bd_2
 18 |   - cffi=1.15.0=py38hd667e15_1
 19 |   - charset-normalizer=2.0.12=pyhd8ed1ab_0
 20 |   - colorama=0.4.4=pyh9f0ad1d_0
 21 |   - cryptography=35.0.0=py38ha5dfef3_0
 22 |   - cudatoolkit=11.3.1=h2bc3f7f_2
 23 |   - cycler=0.11.0=pyhd8ed1ab_0
 24 |   - cython=0.29.28=py38h295c915_0
 25 |   - decorator=4.4.2=py_0
 26 |   - ffmpeg=4.3.2=hca11adc_0
 27 |   - fontconfig=2.13.1=h6c09931_0
 28 |   - freetype=2.10.4=h0708190_1
 29 |   - glib=2.69.1=h4ff587b_1
 30 |   - gmp=6.2.1=h58526e2_0
 31 |   - gnutls=3.6.13=h85f3911_1
 32 |   - icu=67.1=he1b5a44_0
 33 |   - idna=3.3=pyhd8ed1ab_0
 34 |   - intel-openmp=2021.4.0=h06a4308_3561
 35 |   - jinja2=3.1.1=pyhd8ed1ab_0
 36 |   - joblib=1.1.0=pyhd8ed1ab_0
 37 |   - jpeg=9d=h7f8727e_0
 38 |   - kiwisolver=1.3.2=py38h295c915_0
 39 |   - lame=3.100=h7f98852_1001
 40 |   - ld_impl_linux-64=2.35.1=h7274673_9
 41 |   - libffi=3.3=he6710b0_2
 42 |   - libgcc-ng=9.3.0=h5101ec6_17
 43 |   - libgfortran-ng=7.5.0=h14aa051_20
 44 |   - libgfortran4=7.5.0=h14aa051_20
 45 |   - libgomp=9.3.0=h5101ec6_17
 46 |   - libiconv=1.16=h516909a_0
 47 |   - libpng=1.6.37=h21135ba_2
 48 |   - libstdcxx-ng=9.3.0=hd4cf53a_17
 49 |   - libtiff=4.0.10=hc3755c2_1005
 50 |   - libuuid=1.0.3=h7f8727e_2
 51 |   - libuv=1.40.0=h7b6447c_0
 52 |   - libxcb=1.14=h7b6447c_0
 53 |   - libxml2=2.9.10=h68273f3_2
 54 |   - littleutils=0.2.2=py_0
 55 |   - lz4-c=1.9.3=h9c3ff4c_1
 56 |   - markupsafe=2.0.1=py38h497a2fe_0
 57 |   - matplotlib-base=3.3.4=py38h0efea84_0
 58 |   - mkl=2021.4.0=h06a4308_640
 59 |   - mkl-service=2.4.0=py38h497a2fe_0
 60 |   - mkl_fft=1.3.1=py38hd3c417c_0
 61 |   - mkl_random=1.2.2=py38h1abd341_0
 62 |   - ncurses=6.3=h7f8727e_2
 63 |   - nettle=3.6=he412f7d_0
 64 |   - networkx=2.5.1=pyhd8ed1ab_0
 65 |   - numpy=1.21.2=py38h20f2e39_0
 66 |   - numpy-base=1.21.2=py38h79a1101_0
 67 |   - ogb=1.3.3=pyhd8ed1ab_0
 68 |   - olefile=0.46=pyh9f0ad1d_1
 69 |   - openh264=2.1.1=h780b84a_0
 70 |   - openssl=1.1.1n=h7f8727e_0
 71 |   - outdated=0.2.1=pyhd8ed1ab_0
 72 |   - packaging=21.3=pyhd8ed1ab_0
 73 |   - pandas=1.2.3=py38h51da96c_0
 74 |   - patsy=0.5.2=pyhd8ed1ab_0
 75 |   - pcre=8.45=h9c3ff4c_0
 76 |   - pillow=6.2.1=py38h6b7be26_0
 77 |   - pip=21.2.4=py38h06a4308_0
 78 |   - pixman=0.40.0=h36c2ea0_0
 79 |   - pycairo=1.19.1=py38h708ec4a_0
 80 |   - pycparser=2.21=pyhd8ed1ab_0
 81 |   - pyg=2.0.4=py38_torch_1.11.0_cu113
 82 |   - pyopenssl=22.0.0=pyhd8ed1ab_0
 83 |   - pyparsing=3.0.7=pyhd8ed1ab_0
 84 |   - pysocks=1.7.1=py38h578d9bd_5
 85 |   - python=3.8.13=h12debd9_0
 86 |   - python-dateutil=2.8.2=pyhd8ed1ab_0
 87 |   - python-louvain=0.15=pyhd8ed1ab_1
 88 |   - python_abi=3.8=2_cp38
 89 |   - pytorch=1.11.0=py3.8_cuda11.3_cudnn8.2.0_0
 90 |   - pytorch-cluster=1.6.0=py38_torch_1.11.0_cu113
 91 |   - pytorch-mutex=1.0=cuda
 92 |   - pytorch-scatter=2.0.9=py38_torch_1.11.0_cu113
 93 |   - pytorch-sparse=0.6.13=py38_torch_1.11.0_cu113
 94 |   - pytorch-spline-conv=1.2.1=py38_torch_1.11.0_cu113
 95 |   - pytz=2022.1=pyhd8ed1ab_0
 96 |   - pyyaml=5.4.1=py38h497a2fe_0
 97 |   - rdkit=2020.09.5=py38h2bca085_0
 98 |   - readline=8.1.2=h7f8727e_1
 99 |   - reportlab=3.5.68=py38hadf75a6_0
100 |   - requests=2.27.1=pyhd8ed1ab_0
101 |   - scikit-learn=1.0.2=py38h51133e4_1
102 |   - scipy=1.7.3=py38hc147768_0
103 |   - seaborn=0.11.2=hd8ed1ab_0
104 |   - seaborn-base=0.11.2=pyhd8ed1ab_0
105 |   - setuptools=58.0.4=py38h06a4308_0
106 |   - six=1.16.0=pyh6c4a22f_0
107 |   - sqlalchemy=1.3.23=py38h497a2fe_0
108 |   - sqlite=3.38.2=hc218d9a_0
109 |   - statsmodels=0.13.2=py38h7f8727e_0
110 |   - threadpoolctl=3.1.0=pyh8a188c0_0
111 |   - tk=8.6.11=h1ccaba5_0
112 |   - torchaudio=0.11.0=py38_cu113
113 |   - torchvision=0.12.0=py38_cu113
114 |   - tornado=6.1=py38h497a2fe_1
115 |   - tqdm=4.63.1=pyhd8ed1ab_0
116 |   - typing_extensions=4.1.1=pyha770c72_0
117 |   - urllib3=1.26.9=pyhd8ed1ab_0
118 |   - wheel=0.37.1=pyhd3eb1b0_0
119 |   - x264=1!161.3030=h7f98852_1
120 |   - xz=5.2.5=h7b6447c_0
121 |   - yacs=0.1.8=pyhd8ed1ab_0
122 |   - yaml=0.2.5=h516909a_0
123 |   - zlib=1.2.11=h7f8727e_4
124 |   - zstd=1.4.9=ha95c52a_0
125 |   - pip:
126 |     - class-resolver==0.3.8
127 | prefix: /opt/conda/envs/MolGraphEnv-1.11
128 | 


--------------------------------------------------------------------------------
/ACNet/CMPNNLarge.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 3,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACLarge',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Large.json',
14 |     'RootPath': './TestExp/Large/CMPNN/',
15 |     'CUDA_VISIBLE_DEVICES': '2',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'CMPNN',
20 |     'Model': 'CMPNN',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': False,
26 | 
27 |     'ValidRate': 40000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'ValidBalance': False,
31 |     'TestBalance': False,
32 |     'SplitRate': [0.8, 0.1],
33 |     'Splitter': 'Random',
34 |     'MaxEpoch': 300,
35 |     'LowerThanMaxLimit': 12,
36 |     'DecreasingLimit': 8,
37 | 
38 |     # if OnlyEval == True:
39 |     'EvalModelPath': None,
40 |     'EvalDatasetPath': None,
41 |     'EvalLogAllPreds': None,
42 | 
43 |     'Scheduler': 'PolynomialDecayLR',
44 |     # 'Scheduler': 'EmptyLRScheduler',
45 | 
46 | 
47 |     # Params for PolynomialDecayLR only
48 |     'WarmupEpoch': 2,
49 |     'LRMaxEpoch':300,
50 |     'EndLR':1e-9,
51 |     'Power':1.0,
52 |     # Params for StepLR only
53 |     'LRStep': 30,
54 |     'LRGamma': 0.1,
55 |     ##########
56 | 
57 |     'WeightIniter': None,
58 | 
59 |     # Params for NormWeightIniter only
60 |     'InitMean' : 0,
61 |     'InitStd' : 1,
62 | 
63 |     # Params for CMPNN only
64 |     'dataset_type': 'classification',
65 |     'activation': 'ReLU',
66 |     'ffn_num_layers':3,             # useless for AC
67 |     'ffn_hidden_size': 300,         # useless for AC
68 |     'no_cache': False,
69 |     'atom_messages': False,
70 |     'CommunicateKernel': 'Add',
71 |     'only_extract_feature': True,   # True for AC
72 | 
73 | 
74 |     # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here.
75 |     'SplitValidSeed': 8,
76 |     'SplitTestSeed': 8,
77 |     'BatchSize': 256,
78 | 
79 | }
80 | AdjustableParamList = {}
81 | SpecificParamList = {
82 |     'DropRate':[0.2],
83 |     'WeightDecay':[4.5],
84 |     'lr':[3],
85 |     'FPSize': [128],
86 |     'CMPNNLayers': [3],
87 |     'DNNLayers':[[128]],
88 | }
89 | 
90 | 
91 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
92 | 
93 | expcontroller.ExperimentStart()
94 | 
95 | 


--------------------------------------------------------------------------------
/ACNet/CMPNNMedium.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 64,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACMedium',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Medium.json',
14 |     'RootPath': './TestExp/Medium/CMPNN/',
15 |     'CUDA_VISIBLE_DEVICES': '2',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'CMPNN',
20 |     'Model': 'CMPNN',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': False,
26 | 
27 |     'ValidRate': 40000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'ValidBalance': False,
31 |     'TestBalance': False,
32 |     'SplitRate': [0.8, 0.1],
33 |     'Splitter': 'Random',
34 |     'MaxEpoch': 300,
35 |     'LowerThanMaxLimit': 12,
36 |     'DecreasingLimit': 8,
37 | 
38 |     # if OnlyEval == True:
39 |     'EvalModelPath': None,
40 |     'EvalDatasetPath': None,
41 |     'EvalLogAllPreds': None,
42 | 
43 |     'Scheduler': 'PolynomialDecayLR',
44 |     # 'Scheduler': 'EmptyLRScheduler',
45 | 
46 | 
47 |     # Params for PolynomialDecayLR only
48 |     'WarmupEpoch': 2,
49 |     'LRMaxEpoch':300,
50 |     'EndLR':1e-9,
51 |     'Power':1.0,
52 |     # Params for StepLR only
53 |     'LRStep': 30,
54 |     'LRGamma': 0.1,
55 |     ##########
56 | 
57 |     'WeightIniter': None,
58 | 
59 |     # Params for NormWeightIniter only
60 |     'InitMean' : 0,
61 |     'InitStd' : 1,
62 | 
63 |     # Params for CMPNN only
64 |     'dataset_type': 'classification',
65 |     'activation': 'ReLU',
66 |     'ffn_num_layers':3,             # useless for AC
67 |     'ffn_hidden_size': 300,         # useless for AC
68 |     'no_cache': False,
69 |     'atom_messages': False,
70 |     'CommunicateKernel': 'Add',
71 |     'only_extract_feature': True,   # True for AC
72 | 
73 | 
74 |     # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here.
75 |     'SplitValidSeed': 8,
76 |     'SplitTestSeed': 8,
77 |     'BatchSize': 256,
78 | 
79 | }
80 | AdjustableParamList = {}
81 | SpecificParamList = {
82 |     'DropRate':[0.2],
83 |     'WeightDecay':[4.5],
84 |     'lr':[3],
85 |     'FPSize': [128],
86 |     'CMPNNLayers': [3],
87 |     'DNNLayers':[[128]],
88 | }
89 | 
90 | 
91 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
92 | 
93 | expcontroller.ExperimentStart()
94 | 
95 | 


--------------------------------------------------------------------------------
/ACNet/CMPNNSmall.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 110,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACSmall',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Small.json',
14 |     'RootPath': './TestExp/Small/CMPNN/',
15 |     'CUDA_VISIBLE_DEVICES': '2',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'CMPNN',
20 |     'Model': 'CMPNN',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': False,
26 | 
27 |     'ValidRate': 40000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'ValidBalance': False,
31 |     'TestBalance': False,
32 |     'SplitRate': [0.8, 0.1],
33 |     'Splitter': 'Random',
34 |     'MaxEpoch': 300,
35 |     'LowerThanMaxLimit': 12,
36 |     'DecreasingLimit': 8,
37 | 
38 |     # if OnlyEval == True:
39 |     'EvalModelPath': None,
40 |     'EvalDatasetPath': None,
41 |     'EvalLogAllPreds': None,
42 | 
43 |     'Scheduler': 'PolynomialDecayLR',
44 |     # 'Scheduler': 'EmptyLRScheduler',
45 | 
46 | 
47 |     # Params for PolynomialDecayLR only
48 |     'WarmupEpoch': 2,
49 |     'LRMaxEpoch':300,
50 |     'EndLR':1e-9,
51 |     'Power':1.0,
52 |     # Params for StepLR only
53 |     'LRStep': 30,
54 |     'LRGamma': 0.1,
55 |     ##########
56 | 
57 |     'WeightIniter': None,
58 | 
59 |     # Params for NormWeightIniter only
60 |     'InitMean' : 0,
61 |     'InitStd' : 1,
62 | 
63 |     # Params for CMPNN only
64 |     'dataset_type': 'classification',
65 |     'activation': 'ReLU',
66 |     'ffn_num_layers':3,             # useless for AC
67 |     'ffn_hidden_size': 300,         # useless for AC
68 |     'no_cache': False,
69 |     'atom_messages': False,
70 |     'CommunicateKernel': 'Add',
71 |     'only_extract_feature': True,   # True for AC
72 | 
73 | 
74 |     # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here.
75 |     'SplitValidSeed': 8,
76 |     'SplitTestSeed': 8,
77 |     'BatchSize': 256,
78 | 
79 | }
80 | AdjustableParamList = {}
81 | SpecificParamList = {
82 |     'DropRate':[0.2],
83 |     'WeightDecay':[4.5],
84 |     'lr':[3],
85 |     'FPSize': [128],
86 |     'CMPNNLayers': [3],
87 |     'DNNLayers':[[128]],
88 | }
89 | 
90 | 
91 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
92 | 
93 | expcontroller.ExperimentStart()
94 | 
95 | 


--------------------------------------------------------------------------------
/ACNet/ChemBERTFew.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 13,
 7 |     'OnlyEval': False,
 8 |     'Finetune':True,
 9 | }
10 | 
11 | BasicParamList = {
12 |     'ExpName': 'ACFew',
13 |     'MainMetric': 'AUC',
14 |     'DataPath': './ACComponents/ACDataset/data_files/MMP_AC_Few_representation/ChemBERT.npz',
15 |     'RootPath': './TestExp/Few/ChemBERT/',
16 |     'CUDA_VISIBLE_DEVICES': '2',
17 |     'TaskNum': 1,
18 |     'ClassNum': 2,
19 |     'OutputSize': 2,
20 |     'Feature': 'Raw',
21 |     'Model': 'MLP',
22 | 
23 |     # if Feature == Raw
24 |     'RawFeatureSize': 1024,
25 | 
26 |     'OnlySpecific': True,
27 |     'Weight': True,
28 |     'AC': True,
29 |     'PyG': False,
30 | 
31 |     'ValidRate': 40000,
32 |     'PrintRate': 5,
33 |     'UpdateRate': 1,
34 |     'SplitRate': [0.8, 0.1],
35 |     'Splitter': 'Random',
36 |     'MaxEpoch': 300,
37 |     'LowerThanMaxLimit': 12,
38 |     'DecreasingLimit': 8,
39 | 
40 |     # if OnlyEval == True:
41 |     'EvalModelPath': None,
42 |     'EvalDatasetPath': None,
43 |     'EvalLogAllPreds': None,
44 | 
45 |     'Scheduler': 'PolynomialDecayLR',
46 | 
47 |     # Params for PolynomialDecayLR only
48 |     'WarmupEpoch': 2,
49 |     'LRMaxEpoch':300,
50 |     'EndLR':1e-9,
51 |     'Power':1.0,
52 |     # Params for StepLR only
53 |     'LRStep': 30,
54 |     'LRGamma': 0.1,
55 |     ##########
56 | 
57 |     'WeightIniter': None,
58 | 
59 |     # Params for NormWeightIniter only
60 |     'InitMean' : 0,
61 |     'InitStd' : 1,
62 | 
63 |     # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here.
64 |     'SplitValidSeed': 8,
65 |     'SplitTestSeed': 8,
66 |     'BatchSize': 8,
67 | 
68 | }
69 | AdjustableParamList = {}
70 | SpecificParamList = {
71 |     'DropRate':[0.2],
72 |     'WeightDecay':[5],
73 |     'lr':[4],
74 |     'DNNLayers':[[512, 128, 32]],
75 | }
76 | 
77 | 
78 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
79 | 
80 | expcontroller.ExperimentStart()
81 | 
82 | 


--------------------------------------------------------------------------------
/ACNet/FPMLPFew.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 13,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACFew',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Few.json',
14 |     'RootPath': './TestExp/Few/FPMLP/',
15 |     'CUDA_VISIBLE_DEVICES': '0',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'FP',
20 |     'Model': 'MLP',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': False,
26 | 
27 |     'ValidRate': 40000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'SplitRate': [0.8, 0.1],
31 |     'Splitter': 'Random',
32 |     'MaxEpoch': 300,
33 |     'LowerThanMaxLimit': 12,
34 |     'DecreasingLimit': 8,
35 | 
36 |     # if OnlyEval == True:
37 |     'EvalModelPath': None,
38 |     'EvalDatasetPath': None,
39 |     'EvalLogAllPreds': None,
40 | 
41 |     'Scheduler': 'PolynomialDecayLR',
42 | 
43 |     # Params for PolynomialDecayLR only
44 |     'WarmupEpoch': 2,
45 |     'LRMaxEpoch':300,
46 |     'EndLR':1e-9,
47 |     'Power':1.0,
48 |     # Params for StepLR only
49 |     'LRStep': 30,
50 |     'LRGamma': 0.1,
51 |     ##########
52 | 
53 |     'WeightIniter': None,
54 | 
55 |     # Params for NormWeightIniter only
56 |     'InitMean' : 0,
57 |     'InitStd' : 1,
58 | 
59 |     'AtomFeatureSize': 39,
60 |     'BondFeatureSize': 10,
61 |     'MolFP': 'MorganFP',
62 |     'radius': 2,
63 |     'nBits': 1024,
64 | 
65 |     # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here.
66 |     'SplitValidSeed': 8,
67 |     'SplitTestSeed': 8,
68 |     'BatchSize': 8,
69 | 
70 | }
71 | AdjustableParamList = {}
72 | SpecificParamList = {
73 |     'DropRate':[0.2],
74 |     'WeightDecay':[4.5],
75 |     'lr':[3],
76 |     'DNNLayers':[[128]],
77 | }
78 | 
79 | 
80 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
81 | 
82 | expcontroller.ExperimentStart()
83 | 
84 | 


--------------------------------------------------------------------------------
/ACNet/FPMLPLarge.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 3,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACLarge',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Large.json',
14 |     'RootPath': './TestExp/Large/FPMLP/',
15 |     'CUDA_VISIBLE_DEVICES': '0',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'FP',
20 |     'Model': 'MLP',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': False,
26 | 
27 |     'ValidRate': 40000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'SplitRate': [0.8, 0.1],
31 |     'Splitter': 'Random',
32 |     'MaxEpoch': 300,
33 |     'LowerThanMaxLimit': 12,
34 |     'DecreasingLimit': 8,
35 | 
36 |     # if OnlyEval == True:
37 |     'EvalModelPath': None,
38 |     'EvalDatasetPath': None,
39 |     'EvalLogAllPreds': None,
40 | 
41 |     'Scheduler': 'PolynomialDecayLR',
42 | 
43 |     # Params for PolynomialDecayLR only
44 |     'WarmupEpoch': 2,
45 |     'LRMaxEpoch':300,
46 |     'EndLR':1e-9,
47 |     'Power':1.0,
48 |     # Params for StepLR only
49 |     'LRStep': 30,
50 |     'LRGamma': 0.1,
51 |     ##########
52 | 
53 |     'WeightIniter': None,
54 | 
55 |     # Params for NormWeightIniter only
56 |     'InitMean' : 0,
57 |     'InitStd' : 1,
58 | 
59 |     'AtomFeatureSize': 39,
60 |     'BondFeatureSize': 10,
61 |     'MolFP': 'MorganFP',
62 |     'radius': 2,
63 |     'nBits': 1024,
64 | 
65 |     'SplitValidSeed': 8,
66 |     'SplitTestSeed': 8,
67 |     'BatchSize': 256,
68 | 
69 | }
70 | AdjustableParamList = {}
71 | SpecificParamList = {
72 |     'DropRate':[0.4],
73 |     'WeightDecay':[5],
74 |     'lr':[3],
75 |     'DNNLayers':[[512, 128, 32]],
76 | }
77 | 
78 | 
79 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
80 | 
81 | expcontroller.ExperimentStart()
82 | 
83 | 


--------------------------------------------------------------------------------
/ACNet/FPMLPMedium.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 64,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACMedium',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Medium.json',
14 |     'RootPath': './TestExp/Medium/FPMLP/',
15 |     'CUDA_VISIBLE_DEVICES': '0',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'FP',
20 |     'Model': 'MLP',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': False,
26 | 
27 |     'ValidRate': 40000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'SplitRate': [0.8, 0.1],
31 |     'Splitter': 'Random',
32 |     'MaxEpoch': 300,
33 |     'LowerThanMaxLimit': 12,
34 |     'DecreasingLimit': 8,
35 | 
36 |     # if OnlyEval == True:
37 |     'EvalModelPath': None,
38 |     'EvalDatasetPath': None,
39 |     'EvalLogAllPreds': None,
40 | 
41 |     'Scheduler': 'PolynomialDecayLR',
42 | 
43 |     # Params for PolynomialDecayLR only
44 |     'WarmupEpoch': 2,
45 |     'LRMaxEpoch':300,
46 |     'EndLR':1e-9,
47 |     'Power':1.0,
48 |     # Params for StepLR only
49 |     'LRStep': 30,
50 |     'LRGamma': 0.1,
51 |     ##########
52 | 
53 |     'WeightIniter': None,
54 | 
55 |     # Params for NormWeightIniter only
56 |     'InitMean' : 0,
57 |     'InitStd' : 1,
58 | 
59 |     'AtomFeatureSize': 39,
60 |     'BondFeatureSize': 10,
61 |     'MolFP': 'MorganFP',
62 |     'radius': 2,
63 |     'nBits': 1024,
64 | 
65 |     'SplitValidSeed': 8,
66 |     'SplitTestSeed': 8,
67 |     'BatchSize': 256,
68 | 
69 | }
70 | AdjustableParamList = {}
71 | SpecificParamList = {
72 |     'DropRate':[0.4],
73 |     'WeightDecay':[5],
74 |     'lr':[3],
75 |     'DNNLayers':[[512, 128, 32]],
76 | }
77 | 
78 | 
79 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
80 | 
81 | expcontroller.ExperimentStart()
82 | 
83 | 


--------------------------------------------------------------------------------
/ACNet/FPMLPMixRandom.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 1,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACMixRandom',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Mixed_Screened.json',
14 |     'RootPath': './TestExp/Mix/FPMLP/',
15 |     'CUDA_VISIBLE_DEVICES': '0',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'FP',
20 |     'Model': 'MLP',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': False,
26 | 
27 |     'ValidRate': 40000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'SplitRate': [0.8, 0.1],
31 |     'Splitter': 'Random',
32 |     'MaxEpoch': 300,
33 |     'LowerThanMaxLimit': 12,
34 |     'DecreasingLimit': 8,
35 | 
36 |     # if OnlyEval == True:
37 |     'EvalModelPath': None,
38 |     'EvalDatasetPath': None,
39 |     'EvalLogAllPreds': None,
40 | 
41 |     'Scheduler': 'PolynomialDecayLR',
42 | 
43 |     # Params for PolynomialDecayLR only
44 |     'WarmupEpoch': 2,
45 |     'LRMaxEpoch':300,
46 |     'EndLR':1e-9,
47 |     'Power':1.0,
48 |     # Params for StepLR only
49 |     'LRStep': 30,
50 |     'LRGamma': 0.1,
51 |     ##########
52 | 
53 |     'WeightIniter': None,
54 | 
55 |     # Params for NormWeightIniter only
56 |     'InitMean' : 0,
57 |     'InitStd' : 1,
58 | 
59 |     'AtomFeatureSize': 39,
60 |     'BondFeatureSize': 10,
61 |     'MolFP': 'MorganFP',
62 |     'radius': 2,
63 |     'nBits': 1024,
64 | 
65 |     # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here.
66 |     'SplitValidSeed': 8,
67 |     'SplitTestSeed': 8,
68 |     'BatchSize': 256,
69 | 
70 | }
71 | AdjustableParamList = {}
72 | SpecificParamList = {
73 |     'DropRate':[0.4],
74 |     'WeightDecay':[4.5],
75 |     'lr':[4],
76 |     'DNNLayers':[[128]],
77 | }
78 | 
79 | 
80 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
81 | 
82 | expcontroller.ExperimentStart()
83 | 
84 | 


--------------------------------------------------------------------------------
/ACNet/FPMLPMixTarget.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 1,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACMixTarget',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Mixed_Screened.json',
14 |     'RootPath': './TestExp/Mix/FPMLP/',
15 |     'CUDA_VISIBLE_DEVICES': '0',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'FP',
20 |     'Model': 'MLP',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': False,
26 | 
27 |     'ValidRate': 40000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'SplitRate': [0.8, 0.1],
31 |     'Splitter': 'TargetRandom',
32 |     'MaxEpoch': 300,
33 |     'LowerThanMaxLimit': 12,
34 |     'DecreasingLimit': 8,
35 | 
36 |     # if OnlyEval == True:
37 |     'EvalModelPath': None,
38 |     'EvalDatasetPath': None,
39 |     'EvalLogAllPreds': None,
40 | 
41 |     'Scheduler': 'PolynomialDecayLR',
42 | 
43 |     # Params for PolynomialDecayLR only
44 |     'WarmupEpoch': 2,
45 |     'LRMaxEpoch':300,
46 |     'EndLR':1e-9,
47 |     'Power':1.0,
48 |     # Params for StepLR only
49 |     'LRStep': 30,
50 |     'LRGamma': 0.1,
51 |     ##########
52 | 
53 |     'WeightIniter': None,
54 | 
55 |     # Params for NormWeightIniter only
56 |     'InitMean' : 0,
57 |     'InitStd' : 1,
58 | 
59 |     'AtomFeatureSize': 39,
60 |     'BondFeatureSize': 10,
61 |     'MolFP': 'MorganFP',
62 |     'radius': 2,
63 |     'nBits': 1024,
64 | 
65 |     # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here.
66 |     'SplitValidSeed': 8,
67 |     'SplitTestSeed': 8,
68 |     'BatchSize': 256,
69 | 
70 | }
71 | AdjustableParamList = {}
72 | SpecificParamList = {
73 |     'DropRate':[0.2],
74 |     'WeightDecay':[4.5],
75 |     'lr':[3],
76 |     'DNNLayers':[[128]],
77 | }
78 | 
79 | 
80 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
81 | 
82 | expcontroller.ExperimentStart()
83 | 
84 | 


--------------------------------------------------------------------------------
/ACNet/FPMLPSmall.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 110,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACSmall',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Small.json',
14 |     'RootPath': './TestExp/Small/FPMLP/',
15 |     'CUDA_VISIBLE_DEVICES': '0',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'FP',
20 |     'Model': 'MLP',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': False,
26 | 
27 |     'ValidRate': 40000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'SplitRate': [0.8, 0.1],
31 |     'Splitter': 'Random',
32 |     'MaxEpoch': 300,
33 |     'LowerThanMaxLimit': 12,
34 |     'DecreasingLimit': 8,
35 | 
36 |     # if OnlyEval == True:
37 |     'EvalModelPath': None,
38 |     'EvalDatasetPath': None,
39 |     'EvalLogAllPreds': None,
40 | 
41 |     'Scheduler': 'PolynomialDecayLR',
42 | 
43 |     # Params for PolynomialDecayLR only
44 |     'WarmupEpoch': 2,
45 |     'LRMaxEpoch':300,
46 |     'EndLR':1e-9,
47 |     'Power':1.0,
48 |     # Params for StepLR only
49 |     'LRStep': 30,
50 |     'LRGamma': 0.1,
51 |     ##########
52 | 
53 |     'WeightIniter': None,
54 | 
55 |     # Params for NormWeightIniter only
56 |     'InitMean' : 0,
57 |     'InitStd' : 1,
58 | 
59 |     'AtomFeatureSize': 39,
60 |     'BondFeatureSize': 10,
61 |     'MolFP': 'MorganFP',
62 |     'radius': 2,
63 |     'nBits': 1024,
64 | 
65 |     'SplitValidSeed': 8,
66 |     'SplitTestSeed': 8,
67 |     'BatchSize': 32,
68 | 
69 | }
70 | AdjustableParamList = {}
71 | SpecificParamList = {
72 |     'DropRate':[0.4],
73 |     'WeightDecay':[5],
74 |     'lr':[3],
75 |     'DNNLayers':[[256, 64]],
76 | }
77 | 
78 | 
79 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
80 | 
81 | expcontroller.ExperimentStart()
82 | 
83 | 


--------------------------------------------------------------------------------
/ACNet/GCNLarge.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 3,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACLarge',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Large.json',
14 |     'RootPath': './TestExp/Large/GCN/',
15 |     'CUDA_VISIBLE_DEVICES': '1',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'PyGGCN',
20 |     'Model': 'PyGGCN',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': True,
26 | 
27 |     'ValidRate': 4000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'SplitRate': [0.8, 0.1],
31 |     'Splitter': 'Random',
32 |     'MaxEpoch': 300,
33 |     'LowerThanMaxLimit': 30,
34 |     'DecreasingLimit': 12,
35 | 
36 |     # if OnlyEval == True:
37 |     'EvalModelPath': None,
38 |     'EvalDatasetPath': None,
39 |     'EvalLogAllPreds': None,
40 | 
41 |     'Scheduler': 'PolynomialDecayLR',
42 | 
43 |     # Params for PolynomialDecayLR only
44 |     'WarmupEpoch': 2,
45 |     'LRMaxEpoch':300,
46 |     'EndLR':1e-9,
47 |     'Power':1.0,
48 |     # Params for StepLR only
49 |     'LRStep': 30,
50 |     'LRGamma': 0.1,
51 |     ##########
52 | 
53 |     'WeightIniter': 'XavierNorm',
54 | 
55 |     # Params for NormWeightIniter only
56 |     'InitMean' : 0,
57 |     'InitStd' : 1,
58 | 
59 |     'AtomFeatureSize': 39,
60 |     'BondFeatureSize': 10,
61 | 
62 |     'GCNReadout': 'Add',
63 | 
64 | 
65 |     'SplitValidSeed': 8,
66 |     'SplitTestSeed': 8,
67 |     'BatchSize': 200,
68 | 
69 | }
70 | AdjustableParamList = {}
71 | SpecificParamList = {
72 |     'DropRate':[0.4],
73 |     'WeightDecay':[4.5],
74 |     'lr':[3],
75 |     'GCNInputSize': [64],
76 |     'GCNHiddenSize': [128],
77 |     'GCNLayers': [3],
78 |     'FPSize':[64],
79 |     'DNNLayers':[[32]],
80 | }
81 | 
82 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
83 | 
84 | expcontroller.ExperimentStart()
85 | 


--------------------------------------------------------------------------------
/ACNet/GCNMedium.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 64,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACMedium',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Medium.json',
14 |     'RootPath': './TestExp/Medium/GCN/',
15 |     'CUDA_VISIBLE_DEVICES': '1',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'PyGGCN',
20 |     'Model': 'PyGGCN',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': True,
26 | 
27 |     'ValidRate': 4000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'SplitRate': [0.8, 0.1],
31 |     'Splitter': 'Random',
32 |     'MaxEpoch': 300,
33 |     'LowerThanMaxLimit': 30,
34 |     'DecreasingLimit': 12,
35 | 
36 |     # if OnlyEval == True:
37 |     'EvalModelPath': None,
38 |     'EvalDatasetPath': None,
39 |     'EvalLogAllPreds': None,
40 | 
41 |     'Scheduler': 'PolynomialDecayLR',
42 | 
43 |     # Params for PolynomialDecayLR only
44 |     'WarmupEpoch': 2,
45 |     'LRMaxEpoch':300,
46 |     'EndLR':1e-9,
47 |     'Power':1.0,
48 |     # Params for StepLR only
49 |     'LRStep': 30,
50 |     'LRGamma': 0.1,
51 |     ##########
52 | 
53 |     'WeightIniter': 'XavierNorm',
54 | 
55 |     # Params for NormWeightIniter only
56 |     'InitMean' : 0,
57 |     'InitStd' : 1,
58 | 
59 |     'AtomFeatureSize': 39,
60 |     'BondFeatureSize': 10,
61 | 
62 |     'GCNReadout': 'Add',
63 | 
64 | 
65 |     'SplitValidSeed': 8,
66 |     'SplitTestSeed': 8,
67 |     'BatchSize': 200,
68 | 
69 | }
70 | AdjustableParamList = {}
71 | SpecificParamList = {
72 |     'DropRate':[0.2],
73 |     'WeightDecay':[4.5],
74 |     'lr':[3],
75 |     'GCNInputSize': [64],
76 |     'GCNHiddenSize': [128],
77 |     'GCNLayers': [3],
78 |     'FPSize':[64],
79 |     'DNNLayers':[[]],
80 | }
81 | 
82 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
83 | 
84 | expcontroller.ExperimentStart()
85 | 


--------------------------------------------------------------------------------
/ACNet/GCNMixRandom.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 1,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACMixRandom',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Mixed_Screened.json',
14 |     'RootPath': './TestExp/Mix/GCN/',
15 |     'CUDA_VISIBLE_DEVICES': '1',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'PyGGCN',
20 |     'Model': 'PyGGCN',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': True,
26 | 
27 |     'ValidRate': 4000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'SplitRate': [0.8, 0.1],
31 |     'Splitter': 'Random',
32 |     'MaxEpoch': 300,
33 |     'LowerThanMaxLimit': 30,
34 |     'DecreasingLimit': 12,
35 | 
36 |     # if OnlyEval == True:
37 |     'EvalModelPath': None,
38 |     'EvalDatasetPath': None,
39 |     'EvalLogAllPreds': None,
40 | 
41 |     'Scheduler': 'PolynomialDecayLR',
42 | 
43 |     # Params for PolynomialDecayLR only
44 |     'WarmupEpoch': 2,
45 |     'LRMaxEpoch':300,
46 |     'EndLR':1e-9,
47 |     'Power':1.0,
48 |     # Params for StepLR only
49 |     'LRStep': 30,
50 |     'LRGamma': 0.1,
51 |     ##########
52 | 
53 |     'WeightIniter': 'XavierNorm',
54 | 
55 |     # Params for NormWeightIniter only
56 |     'InitMean' : 0,
57 |     'InitStd' : 1,
58 | 
59 |     'AtomFeatureSize': 39,
60 |     'BondFeatureSize': 10,
61 | 
62 |     'GCNReadout': 'Add',
63 | 
64 | 
65 |     'SplitValidSeed': 8,
66 |     'SplitTestSeed': 8,
67 |     'BatchSize': 200,
68 | 
69 | }
70 | AdjustableParamList = {}
71 | SpecificParamList = {
72 |     'DropRate':[0.2],
73 |     'WeightDecay':[5],
74 |     'lr':[4],
75 |     'GCNInputSize': [128],
76 |     'GCNHiddenSize': [512],
77 |     'GCNLayers': [2],
78 |     'FPSize':[256],
79 |     'DNNLayers':[[128, 32]],
80 | }
81 | 
82 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
83 | 
84 | expcontroller.ExperimentStart()
85 | 


--------------------------------------------------------------------------------
/ACNet/GCNMixTarget.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 1,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACMixTarget',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Mixed_Screened.json',
14 |     'RootPath': './TestExp/Mix/GCN/',
15 |     'CUDA_VISIBLE_DEVICES': '1',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'PyGGCN',
20 |     'Model': 'PyGGCN',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': True,
26 | 
27 |     'ValidRate': 4000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'SplitRate': [0.8, 0.1],
31 |     'Splitter': 'TargetRandom',
32 |     'MaxEpoch': 300,
33 |     'LowerThanMaxLimit': 30,
34 |     'DecreasingLimit': 12,
35 | 
36 |     # if OnlyEval == True:
37 |     'EvalModelPath': None,
38 |     'EvalDatasetPath': None,
39 |     'EvalLogAllPreds': None,
40 | 
41 |     'Scheduler': 'PolynomialDecayLR',
42 | 
43 |     # Params for PolynomialDecayLR only
44 |     'WarmupEpoch': 2,
45 |     'LRMaxEpoch':300,
46 |     'EndLR':1e-9,
47 |     'Power':1.0,
48 |     # Params for StepLR only
49 |     'LRStep': 30,
50 |     'LRGamma': 0.1,
51 |     ##########
52 | 
53 |     'WeightIniter': 'XavierNorm',
54 | 
55 |     # Params for NormWeightIniter only
56 |     'InitMean' : 0,
57 |     'InitStd' : 1,
58 | 
59 |     'AtomFeatureSize': 39,
60 |     'BondFeatureSize': 10,
61 | 
62 |     'GCNReadout': 'Add',
63 | 
64 | 
65 |     'SplitValidSeed': 8,
66 |     'SplitTestSeed': 8,
67 |     'BatchSize': 200,
68 | 
69 | }
70 | AdjustableParamList = {}
71 | SpecificParamList = {
72 |     'DropRate':[0.2],
73 |     'WeightDecay':[5],
74 |     'lr':[4],
75 |     'GCNInputSize': [128],
76 |     'GCNHiddenSize': [256],
77 |     'GCNLayers': [2],
78 |     'FPSize':[128],
79 |     'DNNLayers':[[64]],
80 | }
81 | 
82 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
83 | 
84 | expcontroller.ExperimentStart()
85 | 


--------------------------------------------------------------------------------
/ACNet/GCNSmall.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 110,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACSmall',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Small.json',
14 |     'RootPath': './TestExp/Small/GCN/',
15 |     'CUDA_VISIBLE_DEVICES': '1',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'PyGGCN',
20 |     'Model': 'PyGGCN',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': True,
26 | 
27 |     'ValidRate': 4000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'SplitRate': [0.8, 0.1],
31 |     'Splitter': 'Random',
32 |     'MaxEpoch': 300,
33 |     'LowerThanMaxLimit': 30,
34 |     'DecreasingLimit': 12,
35 | 
36 |     # if OnlyEval == True:
37 |     'EvalModelPath': None,
38 |     'EvalDatasetPath': None,
39 |     'EvalLogAllPreds': None,
40 | 
41 |     'Scheduler': 'PolynomialDecayLR',
42 | 
43 |     # Params for PolynomialDecayLR only
44 |     'WarmupEpoch': 2,
45 |     'LRMaxEpoch':300,
46 |     'EndLR':1e-9,
47 |     'Power':1.0,
48 |     # Params for StepLR only
49 |     'LRStep': 30,
50 |     'LRGamma': 0.1,
51 |     ##########
52 | 
53 |     'WeightIniter': 'XavierNorm',
54 | 
55 |     # Params for NormWeightIniter only
56 |     'InitMean' : 0,
57 |     'InitStd' : 1,
58 | 
59 |     'AtomFeatureSize': 39,
60 |     'BondFeatureSize': 10,
61 | 
62 |     'GCNReadout': 'Add',
63 | 
64 | 
65 |     'SplitValidSeed': 8,
66 |     'SplitTestSeed': 8,
67 |     'BatchSize': 32,
68 | 
69 | }
70 | AdjustableParamList = {}
71 | SpecificParamList = {
72 |     'DropRate':[0.2],
73 |     'WeightDecay':[4.5],
74 |     'lr':[3],
75 |     'GCNInputSize': [64],
76 |     'GCNHiddenSize': [128],
77 |     'GCNLayers': [3],
78 |     'FPSize':[64],
79 |     'DNNLayers':[[]],
80 | }
81 | 
82 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
83 | 
84 | expcontroller.ExperimentStart()
85 | 


--------------------------------------------------------------------------------
/ACNet/GINLarge.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 3,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACLarge',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Large.json',
14 |     'RootPath': './TestExp/Large/GIN/',
15 |     'CUDA_VISIBLE_DEVICES': '1',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'PyGGIN',
20 |     'Model': 'PyGGIN',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': True,
26 | 
27 |     'ValidRate': 4000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'SplitRate': [0.8, 0.1],
31 |     'Splitter': 'Random',
32 |     'MaxEpoch': 300,
33 |     'LowerThanMaxLimit': 30,
34 |     'DecreasingLimit': 12,
35 | 
36 |     # if OnlyEval == True:
37 |     'EvalModelPath': None,
38 |     'EvalDatasetPath': None,
39 |     'EvalLogAllPreds': None,
40 | 
41 |     'Scheduler': 'PolynomialDecayLR',
42 | 
43 |     # Params for PolynomialDecayLR only
44 |     'WarmupEpoch': 2,
45 |     'LRMaxEpoch':300,
46 |     'EndLR':1e-9,
47 |     'Power':1.0,
48 |     # Params for StepLR only
49 |     'LRStep': 30,
50 |     'LRGamma': 0.1,
51 |     ##########
52 | 
53 |     'WeightIniter': 'XavierNorm',
54 | 
55 |     # Params for NormWeightIniter only
56 |     'InitMean' : 0,
57 |     'InitStd' : 1,
58 | 
59 |     'AtomFeatureSize': 39,
60 |     'BondFeatureSize': 10,
61 | 
62 |     'GCNReadout': 'Add',
63 | 
64 | 
65 |     'SplitValidSeed': 8,
66 |     'SplitTestSeed': 8,
67 |     'BatchSize': 200,
68 | 
69 | }
70 | AdjustableParamList = {}
71 | SpecificParamList = {
72 |     'DropRate':[0.2],
73 |     'WeightDecay':[4.5],
74 |     'lr':[3],
75 |     'GINInputSize': [64],
76 |     'GINHiddenSize': [64],
77 |     'GINLayers': [3],
78 |     'GINEps': [0],
79 |     'FPSize':[32],
80 |     'DNNLayers':[[]],
81 | }
82 | 
83 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
84 | 
85 | expcontroller.ExperimentStart()
86 | 


--------------------------------------------------------------------------------
/ACNet/GINMedium.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 64,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACMedium',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Medium.json',
14 |     'RootPath': './TestExp/Medium/GIN/',
15 |     'CUDA_VISIBLE_DEVICES': '1',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'PyGGIN',
20 |     'Model': 'PyGGIN',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': True,
26 | 
27 |     'ValidRate': 4000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'SplitRate': [0.8, 0.1],
31 |     'Splitter': 'Random',
32 |     'MaxEpoch': 300,
33 |     'LowerThanMaxLimit': 30,
34 |     'DecreasingLimit': 12,
35 | 
36 |     # if OnlyEval == True:
37 |     'EvalModelPath': None,
38 |     'EvalDatasetPath': None,
39 |     'EvalLogAllPreds': None,
40 | 
41 |     'Scheduler': 'PolynomialDecayLR',
42 | 
43 |     # Params for PolynomialDecayLR only
44 |     'WarmupEpoch': 2,
45 |     'LRMaxEpoch':300,
46 |     'EndLR':1e-9,
47 |     'Power':1.0,
48 |     # Params for StepLR only
49 |     'LRStep': 30,
50 |     'LRGamma': 0.1,
51 |     ##########
52 | 
53 |     'WeightIniter': 'XavierNorm',
54 | 
55 |     # Params for NormWeightIniter only
56 |     'InitMean' : 0,
57 |     'InitStd' : 1,
58 | 
59 |     'AtomFeatureSize': 39,
60 |     'BondFeatureSize': 10,
61 | 
62 |     'GCNReadout': 'Add',
63 | 
64 | 
65 |     'SplitValidSeed': 8,
66 |     'SplitTestSeed': 8,
67 |     'BatchSize': 200,
68 | 
69 | }
70 | AdjustableParamList = {}
71 | SpecificParamList = {
72 |     'DropRate':[0.2],
73 |     'WeightDecay':[4.5],
74 |     'lr':[3],
75 |     'GINInputSize': [64],
76 |     'GINHiddenSize': [128],
77 |     'GINLayers': [3],
78 |     'GINEps': [0],
79 |     'FPSize':[64],
80 |     'DNNLayers':[[]],
81 | }
82 | 
83 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
84 | 
85 | expcontroller.ExperimentStart()
86 | 


--------------------------------------------------------------------------------
/ACNet/GINSmall.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 110,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACSmall',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Small.json',
14 |     'RootPath': './TestExp/Small/GIN/',
15 |     'CUDA_VISIBLE_DEVICES': '1',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'PyGGIN',
20 |     'Model': 'PyGGIN',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': True,
26 | 
27 |     'ValidRate': 4000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'SplitRate': [0.8, 0.1],
31 |     'Splitter': 'Random',
32 |     'MaxEpoch': 300,
33 |     'LowerThanMaxLimit': 30,
34 |     'DecreasingLimit': 12,
35 | 
36 |     # if OnlyEval == True:
37 |     'EvalModelPath': None,
38 |     'EvalDatasetPath': None,
39 |     'EvalLogAllPreds': None,
40 | 
41 |     'Scheduler': 'PolynomialDecayLR',
42 | 
43 |     # Params for PolynomialDecayLR only
44 |     'WarmupEpoch': 2,
45 |     'LRMaxEpoch':300,
46 |     'EndLR':1e-9,
47 |     'Power':1.0,
48 |     # Params for StepLR only
49 |     'LRStep': 30,
50 |     'LRGamma': 0.1,
51 |     ##########
52 | 
53 |     'WeightIniter': 'XavierNorm',
54 | 
55 |     # Params for NormWeightIniter only
56 |     'InitMean' : 0,
57 |     'InitStd' : 1,
58 | 
59 |     'AtomFeatureSize': 39,
60 |     'BondFeatureSize': 10,
61 | 
62 |     'GCNReadout': 'Add',
63 | 
64 | 
65 |     'SplitValidSeed': 8,
66 |     'SplitTestSeed': 8,
67 |     'BatchSize': 32,
68 | 
69 | }
70 | AdjustableParamList = {}
71 | SpecificParamList = {
72 |     'DropRate':[0.2],
73 |     'WeightDecay':[4.5],
74 |     'lr':[3],
75 |     'GINInputSize': [64],
76 |     'GINHiddenSize': [64],
77 |     'GINLayers': [3],
78 |     'GINEps': [0],
79 |     'FPSize':[32],
80 |     'DNNLayers':[[]],
81 | }
82 | 
83 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
84 | 
85 | expcontroller.ExperimentStart()
86 | 


--------------------------------------------------------------------------------
/ACNet/GROVERFew.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 13,
 7 |     'OnlyEval': False,
 8 |     'Finetune':True,
 9 | }
10 | 
11 | BasicParamList = {
12 |     'ExpName': 'ACFew',
13 |     'MainMetric': 'AUC',
14 |     'DataPath': './ACComponents/ACDataset/data_files/MMP_AC_Few_representation/GROVER.npz',
15 |     'RootPath': './TestExp/Few/GROVER/',
16 |     'CUDA_VISIBLE_DEVICES': '2',
17 |     'TaskNum': 1,
18 |     'ClassNum': 2,
19 |     'OutputSize': 2,
20 |     'Feature': 'Raw',
21 |     'Model': 'MLP',
22 | 
23 |     # if Feature == Raw
24 |     'RawFeatureSize': 3400,
25 | 
26 |     'OnlySpecific': True,
27 |     'Weight': True,
28 |     'AC': True,
29 |     'PyG': False,
30 | 
31 |     'ValidRate': 40000,
32 |     'PrintRate': 5,
33 |     'UpdateRate': 1,
34 |     'ValidBalance': False,
35 |     'TestBalance': False,
36 |     'SplitRate': [0.8, 0.1],
37 |     'Splitter': 'Random',
38 |     'MaxEpoch': 300,
39 |     'LowerThanMaxLimit': 12,
40 |     'DecreasingLimit': 8,
41 | 
42 |     # if OnlyEval == True:
43 |     'EvalModelPath': None,
44 |     'EvalDatasetPath': None,
45 |     'EvalLogAllPreds': None,
46 | 
47 |     'Scheduler': 'PolynomialDecayLR',
48 | 
49 |     # Params for PolynomialDecayLR only
50 |     'WarmupEpoch': 2,
51 |     'LRMaxEpoch':300,
52 |     'EndLR':1e-9,
53 |     'Power':1.0,
54 |     # Params for StepLR only
55 |     'LRStep': 30,
56 |     'LRGamma': 0.1,
57 |     ##########
58 | 
59 |     'WeightIniter': None,
60 | 
61 |     # Params for NormWeightIniter only
62 |     'InitMean' : 0,
63 |     'InitStd' : 1,
64 | 
65 |     # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here.
66 |     'SplitValidSeed': 8,
67 |     'SplitTestSeed': 8,
68 |     'BatchSize': 8,
69 | 
70 | }
71 | AdjustableParamList = {}
72 | SpecificParamList = {
73 |     'DropRate':[0.2],
74 |     'WeightDecay':[5],
75 |     'lr':[4],
76 |     'DNNLayers':[[256,64]],
77 | }
78 | 
79 | 
80 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
81 | 
82 | expcontroller.ExperimentStart()
83 | 
84 | 


--------------------------------------------------------------------------------
/ACNet/GRULarge.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 3,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACLarge',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Large.json',
14 |     'RootPath': './TestExp/Large/GRU/',
15 |     'CUDA_VISIBLE_DEVICES': '0',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'SMILES',
20 |     'Model': 'GRU',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': False,
26 | 
27 |     'ValidRate': 40000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'SplitRate': [0.8, 0.1],
31 |     'Splitter': 'Random',
32 |     'MaxEpoch': 300,
33 |     'LowerThanMaxLimit': 12,
34 |     'DecreasingLimit': 8,
35 | 
36 |     # if OnlyEval == True:
37 |     'EvalModelPath': None,
38 |     'EvalDatasetPath': None,
39 |     'EvalLogAllPreds': None,
40 | 
41 |     'Scheduler': 'PolynomialDecayLR',
42 | 
43 |     # Params for PolynomialDecayLR only
44 |     'WarmupEpoch': 2,
45 |     'LRMaxEpoch':300,
46 |     'EndLR':1e-9,
47 |     'Power':1.0,
48 |     # Params for StepLR only
49 |     'LRStep': 30,
50 |     'LRGamma': 0.1,
51 |     ##########
52 | 
53 |     'WeightIniter': None,
54 | 
55 |     # Params for NormWeightIniter only
56 |     'InitMean' : 0,
57 |     'InitStd' : 1,
58 | 
59 |     'SplitValidSeed': 8,
60 |     'SplitTestSeed': 8,
61 |     'BatchSize': 256,
62 | 
63 | }
64 | AdjustableParamList = {}
65 | SpecificParamList = {
66 |     'DropRate':[0.2],
67 |     'WeightDecay':[5],
68 |     'lr':[4],
69 |     'GRULayers':[3],
70 |     'FPSize':[256],
71 |     'DNNLayers':[[512, 128, 32]],
72 | }
73 | 
74 | 
75 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
76 | 
77 | expcontroller.ExperimentStart()
78 | 
79 | 


--------------------------------------------------------------------------------
/ACNet/GRUMedium.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 64,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACMedium',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Medium.json',
14 |     'RootPath': './TestExp/Medium/GRU/',
15 |     'CUDA_VISIBLE_DEVICES': '0',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'SMILES',
20 |     'Model': 'GRU',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': False,
26 | 
27 |     'ValidRate': 40000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'SplitRate': [0.8, 0.1],
31 |     'Splitter': 'Random',
32 |     'MaxEpoch': 300,
33 |     'LowerThanMaxLimit': 12,
34 |     'DecreasingLimit': 8,
35 | 
36 |     # if OnlyEval == True:
37 |     'EvalModelPath': None,
38 |     'EvalDatasetPath': None,
39 |     'EvalLogAllPreds': None,
40 | 
41 |     'Scheduler': 'PolynomialDecayLR',
42 | 
43 |     # Params for PolynomialDecayLR only
44 |     'WarmupEpoch': 2,
45 |     'LRMaxEpoch':300,
46 |     'EndLR':1e-9,
47 |     'Power':1.0,
48 |     # Params for StepLR only
49 |     'LRStep': 30,
50 |     'LRGamma': 0.1,
51 |     ##########
52 | 
53 |     'WeightIniter': None,
54 | 
55 |     # Params for NormWeightIniter only
56 |     'InitMean' : 0,
57 |     'InitStd' : 1,
58 | 
59 |     'SplitValidSeed': 8,
60 |     'SplitTestSeed': 8,
61 |     'BatchSize': 256,
62 | 
63 | }
64 | AdjustableParamList = {}
65 | SpecificParamList = {
66 |     'DropRate':[0.2],
67 |     'WeightDecay':[4.5],
68 |     'lr':[3],
69 |     'GRULayers':[2],
70 |     'FPSize':[64],
71 |     'DNNLayers':[[128]],
72 | }
73 | 
74 | 
75 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
76 | 
77 | expcontroller.ExperimentStart()
78 | 
79 | 


--------------------------------------------------------------------------------
/ACNet/GRUMixRandom.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 1,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACMixRandom',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Mixed_Screened.json',
14 |     'RootPath': './TestExp/Mix/GRU/',
15 |     'CUDA_VISIBLE_DEVICES': '0',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'SMILES',
20 |     'Model': 'GRU',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': False,
26 | 
27 |     'ValidRate': 40000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'SplitRate': [0.8, 0.1],
31 |     'Splitter': 'Random',
32 |     'MaxEpoch': 300,
33 |     'LowerThanMaxLimit': 12,
34 |     'DecreasingLimit': 8,
35 | 
36 |     # if OnlyEval == True:
37 |     'EvalModelPath': None,
38 |     'EvalDatasetPath': None,
39 |     'EvalLogAllPreds': None,
40 | 
41 |     'Scheduler': 'PolynomialDecayLR',
42 | 
43 |     # Params for PolynomialDecayLR only
44 |     'WarmupEpoch': 2,
45 |     'LRMaxEpoch':300,
46 |     'EndLR':1e-9,
47 |     'Power':1.0,
48 |     # Params for StepLR only
49 |     'LRStep': 30,
50 |     'LRGamma': 0.1,
51 |     ##########
52 | 
53 |     'WeightIniter': None,
54 | 
55 |     # Params for NormWeightIniter only
56 |     'InitMean' : 0,
57 |     'InitStd' : 1,
58 | 
59 | 
60 |     # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here.
61 |     'SplitValidSeed': 8,
62 |     'SplitTestSeed': 8,
63 |     'BatchSize': 256,
64 | 
65 | }
66 | AdjustableParamList = {}
67 | SpecificParamList = {
68 |     'DropRate':[0.2],
69 |     'WeightDecay':[4.5],
70 |     'lr':[3],
71 |     'GRULayers':[2],
72 |     'FPSize':[64],
73 |     'DNNLayers':[[128]],
74 | }
75 | 
76 | 
77 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
78 | 
79 | expcontroller.ExperimentStart()
80 | 
81 | 


--------------------------------------------------------------------------------
/ACNet/GRUMixTarget.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 1,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACMixTarget',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Mixed_Screened.json',
14 |     'RootPath': './TestExp/Mix/GRU/',
15 |     'CUDA_VISIBLE_DEVICES': '0',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'SMILES',
20 |     'Model': 'GRU',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': False,
26 | 
27 |     'ValidRate': 40000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'SplitRate': [0.8, 0.1],
31 |     'Splitter': 'TargetRandom',
32 |     'MaxEpoch': 300,
33 |     'LowerThanMaxLimit': 12,
34 |     'DecreasingLimit': 8,
35 | 
36 |     # if OnlyEval == True:
37 |     'EvalModelPath': None,
38 |     'EvalDatasetPath': None,
39 |     'EvalLogAllPreds': None,
40 | 
41 |     'Scheduler': 'PolynomialDecayLR',
42 | 
43 |     # Params for PolynomialDecayLR only
44 |     'WarmupEpoch': 2,
45 |     'LRMaxEpoch':300,
46 |     'EndLR':1e-9,
47 |     'Power':1.0,
48 |     # Params for StepLR only
49 |     'LRStep': 30,
50 |     'LRGamma': 0.1,
51 |     ##########
52 | 
53 |     'WeightIniter': None,
54 | 
55 |     # Params for NormWeightIniter only
56 |     'InitMean' : 0,
57 |     'InitStd' : 1,
58 | 
59 | 
60 |     # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here.
61 |     'SplitValidSeed': 8,
62 |     'SplitTestSeed': 8,
63 |     'BatchSize': 256,
64 | 
65 | }
66 | AdjustableParamList = {}
67 | SpecificParamList = {
68 |     'DropRate':[0.2],
69 |     'WeightDecay':[5],
70 |     'lr':[4],
71 |     'GRULayers':[3],
72 |     'FPSize':[128],
73 |     'DNNLayers':[[256,64]],
74 | }
75 | 
76 | 
77 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
78 | 
79 | expcontroller.ExperimentStart()
80 | 
81 | 


--------------------------------------------------------------------------------
/ACNet/GRUSmall.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 110,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACSmall',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Small.json',
14 |     'RootPath': './TestExp/Small/GRU/',
15 |     'CUDA_VISIBLE_DEVICES': '0',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'SMILES',
20 |     'Model': 'GRU',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': False,
26 | 
27 |     'ValidRate': 40000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'SplitRate': [0.8, 0.1],
31 |     'Splitter': 'Random',
32 |     'MaxEpoch': 300,
33 |     'LowerThanMaxLimit': 12,
34 |     'DecreasingLimit': 8,
35 | 
36 |     # if OnlyEval == True:
37 |     'EvalModelPath': None,
38 |     'EvalDatasetPath': None,
39 |     'EvalLogAllPreds': None,
40 | 
41 |     'Scheduler': 'PolynomialDecayLR',
42 | 
43 |     # Params for PolynomialDecayLR only
44 |     'WarmupEpoch': 2,
45 |     'LRMaxEpoch':300,
46 |     'EndLR':1e-9,
47 |     'Power':1.0,
48 |     # Params for StepLR only
49 |     'LRStep': 30,
50 |     'LRGamma': 0.1,
51 |     ##########
52 | 
53 |     'WeightIniter': None,
54 | 
55 |     # Params for NormWeightIniter only
56 |     'InitMean' : 0,
57 |     'InitStd' : 1,
58 | 
59 |     'SplitValidSeed': 8,
60 |     'SplitTestSeed': 8,
61 |     'BatchSize': 32,
62 | 
63 | }
64 | AdjustableParamList = {}
65 | SpecificParamList = {
66 |     'DropRate':[0.2],
67 |     'WeightDecay':[4.5],
68 |     'lr':[3],
69 |     'GRULayers':[2],
70 |     'FPSize':[64],
71 |     'DNNLayers':[[128]],
72 | }
73 | 
74 | 
75 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
76 | 
77 | expcontroller.ExperimentStart()
78 | 
79 | 


--------------------------------------------------------------------------------
/ACNet/GraphLoGFew.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 13,
 7 |     'OnlyEval': False,
 8 |     'Finetune':True,
 9 | }
10 | 
11 | BasicParamList = {
12 |     'ExpName': 'ACFew',
13 |     'MainMetric': 'AUC',
14 |     'DataPath': './ACComponents/ACDataset/data_files/MMP_AC_Few_representation/GraphLoG.npz',
15 |     'RootPath': './TestExp/Few/GraphLoG/',
16 |     'CUDA_VISIBLE_DEVICES': '2',
17 |     'TaskNum': 1,
18 |     'ClassNum': 2,
19 |     'OutputSize': 2,
20 |     'Feature': 'Raw',
21 |     'Model': 'MLP',
22 | 
23 |     # if Feature == Raw
24 |     'RawFeatureSize': 300,
25 | 
26 |     'OnlySpecific': True,
27 |     'Weight': True,
28 |     'AC': True,
29 |     'PyG': False,
30 | 
31 |     'ValidRate': 40000,
32 |     'PrintRate': 5,
33 |     'UpdateRate': 1,
34 |     'ValidBalance': False,
35 |     'TestBalance': False,
36 |     'SplitRate': [0.8, 0.1],
37 |     'Splitter': 'Random',
38 |     'MaxEpoch': 300,
39 |     'LowerThanMaxLimit': 12,
40 |     'DecreasingLimit': 8,
41 | 
42 |     # if OnlyEval == True:
43 |     'EvalModelPath': None,
44 |     'EvalDatasetPath': None,
45 |     'EvalLogAllPreds': None,
46 | 
47 |     'Scheduler': 'PolynomialDecayLR',
48 | 
49 |     # Params for PolynomialDecayLR only
50 |     'WarmupEpoch': 2,
51 |     'LRMaxEpoch':300,
52 |     'EndLR':1e-9,
53 |     'Power':1.0,
54 |     # Params for StepLR only
55 |     'LRStep': 30,
56 |     'LRGamma': 0.1,
57 |     ##########
58 | 
59 |     'WeightIniter': None,
60 | 
61 |     # Params for NormWeightIniter only
62 |     'InitMean' : 0,
63 |     'InitStd' : 1,
64 | 
65 |     # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here.
66 |     'SplitValidSeed': 8,
67 |     'SplitTestSeed': 8,
68 |     'BatchSize': 8,
69 | 
70 | }
71 | AdjustableParamList = {}
72 | SpecificParamList = {
73 |     'DropRate':[0.2],
74 |     'WeightDecay':[5],
75 |     'lr':[4],
76 |     'DNNLayers':[[256,64]],
77 | }
78 | 
79 | 
80 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
81 | 
82 | expcontroller.ExperimentStart()
83 | 
84 | 


--------------------------------------------------------------------------------
/ACNet/GraphormerLarge.py:
--------------------------------------------------------------------------------
  1 | from ACComponents.ACProcessControllers import *
  2 | 
  3 | ExpOptions = {
  4 |     'Search': 'greedy',
  5 |     'SeedPerOpt': 3,
  6 |     'SubsetsNum': 3,
  7 |     'OnlyEval': False,
  8 | }
  9 | 
 10 | BasicParamList = {
 11 |     'ExpName': 'ACLarge',
 12 |     'MainMetric': 'AUC',
 13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Large.json',
 14 |     'RootPath': './TestExp/Large/Graphormer/',
 15 |     'CUDA_VISIBLE_DEVICES': '3',
 16 |     'TaskNum': 1,
 17 |     'ClassNum': 2,
 18 |     'OutputSize': 2,
 19 |     'Feature': 'Graphormer',
 20 |     'Model': 'Graphormer',
 21 | 
 22 |     'OnlySpecific': True,
 23 |     'Weight': True,
 24 |     'AC': True,
 25 |     'PyG': False,
 26 | 
 27 |     'ValidRate': 4000,
 28 |     'PrintRate': 5,
 29 |     'UpdateRate': 1,
 30 |     'SplitRate': [0.8, 0.1],
 31 |     'Splitter': 'Random',
 32 |     'MaxEpoch': 300,
 33 |     'LowerThanMaxLimit': 30,
 34 |     'DecreasingLimit': 12,
 35 | 
 36 |     # if OnlyEval == True:
 37 |     'EvalModelPath': None,
 38 |     'EvalDatasetPath': None,
 39 |     'EvalLogAllPreds': None,
 40 | 
 41 |     'Scheduler': 'PolynomialDecayLR',
 42 |     # 'Scheduler': 'EmptyLRScheduler',
 43 | 
 44 | 
 45 |     # Params for PolynomialDecayLR only
 46 |     'WarmupEpoch': 2,
 47 |     'LRMaxEpoch':300,
 48 |     'EndLR':1e-9,
 49 |     'Power':1.0,
 50 |     # Params for StepLR only
 51 |     'LRStep': 30,
 52 |     'LRGamma': 0.1,
 53 |     ##########
 54 | 
 55 |     'WeightIniter': None,
 56 | 
 57 |     # Params for NormWeightIniter only
 58 |     'InitMean' : 0,
 59 |     'InitStd' : 1,
 60 | 
 61 |     'FeatureCategory': 'BaseOH',
 62 | 
 63 |     # Params for Graphormer only
 64 |     'num_offset': 16,
 65 |     'num_atoms': 16 * 39,      # offset * AtomFeatureNum
 66 |     'num_in_degree': 16,       # length of indegree dictionary
 67 |     'num_out_degree': 16,      # length of outdegree dictionary
 68 |     'num_edges': 16 * 10,      # offset * BondFeatureNum
 69 |     'num_spatial': 512,         # length of SPD dictionary, must be larger than the largest SPD
 70 |     'num_edge_dis': 30,         # must be larger than multi-hop-max-dist
 71 |     'dropout_rate': 0.1,
 72 |     'intput_dropout_rate': 0.1,
 73 |     'edge_type': 'multi_hop',
 74 |     'multi_hop_max_dist': 20,
 75 |     'flag': False,
 76 |     'spatial_pos_max': 20,
 77 |     'max_node': 512,
 78 | 
 79 |     # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here.
 80 |     'SplitValidSeed': 8,
 81 |     'SplitTestSeed': 8,
 82 |     'BatchSize': 32,
 83 | 
 84 | }
 85 | AdjustableParamList = {}
 86 | SpecificParamList = {
 87 |     'DropRate':[0.2],
 88 |     'WeightDecay':[5],
 89 |     'lr':[4],
 90 |     'num_encoder_layers':[8],
 91 |     'num_attention_heads':[16],
 92 |     'embedding_dim':[128],
 93 |     'ffn_dim':[128],
 94 |     'attention_dropout_rate':[0.2],
 95 |     'DNNLayers':[[64]],
 96 | }
 97 | 
 98 | 
 99 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
100 | 
101 | expcontroller.ExperimentStart()
102 | 
103 | 


--------------------------------------------------------------------------------
/ACNet/GraphormerMedium.py:
--------------------------------------------------------------------------------
  1 | from ACComponents.ACProcessControllers import *
  2 | 
  3 | ExpOptions = {
  4 |     'Search': 'greedy',
  5 |     'SeedPerOpt': 3,
  6 |     'SubsetsNum': 64,
  7 |     'OnlyEval': False,
  8 | }
  9 | 
 10 | BasicParamList = {
 11 |     'ExpName': 'ACMedium',
 12 |     'MainMetric': 'AUC',
 13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Medium.json',
 14 |     'RootPath': './TestExp/Medium/Graphormer/',
 15 |     'CUDA_VISIBLE_DEVICES': '3',
 16 |     'TaskNum': 1,
 17 |     'ClassNum': 2,
 18 |     'OutputSize': 2,
 19 |     'Feature': 'Graphormer',
 20 |     'Model': 'Graphormer',
 21 | 
 22 |     'OnlySpecific': True,
 23 |     'Weight': True,
 24 |     'AC': True,
 25 |     'PyG': False,
 26 | 
 27 |     'ValidRate': 4000,
 28 |     'PrintRate': 5,
 29 |     'UpdateRate': 1,
 30 |     'SplitRate': [0.8, 0.1],
 31 |     'Splitter': 'Random',
 32 |     'MaxEpoch': 300,
 33 |     'LowerThanMaxLimit': 30,
 34 |     'DecreasingLimit': 12,
 35 | 
 36 |     # if OnlyEval == True:
 37 |     'EvalModelPath': None,
 38 |     'EvalDatasetPath': None,
 39 |     'EvalLogAllPreds': None,
 40 | 
 41 |     'Scheduler': 'PolynomialDecayLR',
 42 |     # 'Scheduler': 'EmptyLRScheduler',
 43 | 
 44 | 
 45 |     # Params for PolynomialDecayLR only
 46 |     'WarmupEpoch': 2,
 47 |     'LRMaxEpoch':300,
 48 |     'EndLR':1e-9,
 49 |     'Power':1.0,
 50 |     # Params for StepLR only
 51 |     'LRStep': 30,
 52 |     'LRGamma': 0.1,
 53 |     ##########
 54 | 
 55 |     'WeightIniter': None,
 56 | 
 57 |     # Params for NormWeightIniter only
 58 |     'InitMean' : 0,
 59 |     'InitStd' : 1,
 60 | 
 61 |     'FeatureCategory': 'BaseOH',
 62 | 
 63 |     # Params for Graphormer only
 64 |     'num_offset': 16,
 65 |     'num_atoms': 16 * 39,      # offset * AtomFeatureNum
 66 |     'num_in_degree': 16,       # length of indegree dictionary
 67 |     'num_out_degree': 16,      # length of outdegree dictionary
 68 |     'num_edges': 16 * 10,      # offset * BondFeatureNum
 69 |     'num_spatial': 512,         # length of SPD dictionary, must be larger than the largest SPD
 70 |     'num_edge_dis': 30,         # must be larger than multi-hop-max-dist
 71 |     'dropout_rate': 0.1,
 72 |     'intput_dropout_rate': 0.1,
 73 |     'edge_type': 'multi_hop',
 74 |     'multi_hop_max_dist': 20,
 75 |     'flag': False,
 76 |     'spatial_pos_max': 20,
 77 |     'max_node': 512,
 78 | 
 79 |     # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here.
 80 |     'SplitValidSeed': 8,
 81 |     'SplitTestSeed': 8,
 82 |     'BatchSize': 32,
 83 | 
 84 | }
 85 | AdjustableParamList = {}
 86 | SpecificParamList = {
 87 |     'DropRate':[0.2],
 88 |     'WeightDecay':[4.5],
 89 |     'lr':[3],
 90 |     'num_encoder_layers':[4],
 91 |     'num_attention_heads':[8],
 92 |     'embedding_dim':[32],
 93 |     'ffn_dim':[32],
 94 |     'attention_dropout_rate':[0.1],
 95 |     'DNNLayers':[[]],
 96 | }
 97 | 
 98 | 
 99 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
100 | 
101 | expcontroller.ExperimentStart()
102 | 
103 | 


--------------------------------------------------------------------------------
/ACNet/GraphormerMixRandom.py:
--------------------------------------------------------------------------------
  1 | from ACComponents.ACProcessControllers import *
  2 | 
  3 | ExpOptions = {
  4 |     'Search': 'greedy',
  5 |     'SeedPerOpt': 3,
  6 |     'SubsetsNum': 1,
  7 |     'OnlyEval': False,
  8 | }
  9 | 
 10 | BasicParamList = {
 11 |     'ExpName': 'ACMixRandom',
 12 |     'MainMetric': 'AUC',
 13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Mixed_Screened.json',
 14 |     'RootPath': './TestExp/Mix/Graphormer/',
 15 |     'CUDA_VISIBLE_DEVICES': '3',
 16 |     'TaskNum': 1,
 17 |     'ClassNum': 2,
 18 |     'OutputSize': 2,
 19 |     'Feature': 'Graphormer',
 20 |     'Model': 'Graphormer',
 21 | 
 22 |     'OnlySpecific': True,
 23 |     'Weight': True,
 24 |     'AC': True,
 25 |     'PyG': False,
 26 | 
 27 |     'ValidRate': 40000,
 28 |     'PrintRate': 5,
 29 |     'UpdateRate': 1,
 30 |     'SplitRate': [0.8, 0.1],
 31 |     'Splitter': 'Random',
 32 |     'MaxEpoch': 300,
 33 |     'LowerThanMaxLimit': 12,
 34 |     'DecreasingLimit': 8,
 35 | 
 36 |     # if OnlyEval == True:
 37 |     'EvalModelPath': None,
 38 |     'EvalDatasetPath': None,
 39 |     'EvalLogAllPreds': None,
 40 | 
 41 |     'Scheduler': 'PolynomialDecayLR',
 42 |     # 'Scheduler': 'EmptyLRScheduler',
 43 | 
 44 | 
 45 |     # Params for PolynomialDecayLR only
 46 |     'WarmupEpoch': 2,
 47 |     'LRMaxEpoch':300,
 48 |     'EndLR':1e-9,
 49 |     'Power':1.0,
 50 |     # Params for StepLR only
 51 |     'LRStep': 30,
 52 |     'LRGamma': 0.1,
 53 |     ##########
 54 | 
 55 |     'WeightIniter': None,
 56 | 
 57 |     # Params for NormWeightIniter only
 58 |     'InitMean' : 0,
 59 |     'InitStd' : 1,
 60 | 
 61 |     'FeatureCategory': 'BaseED',
 62 | 
 63 |     # Params for Graphormer only
 64 |     'num_offset': 16,
 65 |     'num_atoms': 16 * 8,      # offset * AtomFeatureNum
 66 |     'num_in_degree': 16,       # length of indegree dictionary
 67 |     'num_out_degree': 16,      # length of outdegree dictionary
 68 |     'num_edges': 16 * 4,      # offset * BondFeatureNum
 69 |     'num_spatial': 512,         # length of SPD dictionary, must be larger than the largest SPD
 70 |     'num_edge_dis': 30,         # must be larger than multi-hop-max-dist
 71 |     'dropout_rate': 0.1,
 72 |     'intput_dropout_rate': 0.1,
 73 |     'edge_type': 'multi_hop',
 74 |     'multi_hop_max_dist': 20,
 75 |     'flag': False,
 76 |     'spatial_pos_max': 20,
 77 |     'max_node': 512,
 78 | 
 79 |     # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here.
 80 |     'SplitValidSeed': 8,
 81 |     'SplitTestSeed': 8,
 82 |     'BatchSize': 32,
 83 | 
 84 | }
 85 | AdjustableParamList = {}
 86 | SpecificParamList = {
 87 |     'DropRate':[0.4],
 88 |     'WeightDecay':[5],
 89 |     'lr':[4],
 90 |     'num_encoder_layers':[10],
 91 |     'num_attention_heads':[32],
 92 |     'embedding_dim':[256],
 93 |     'ffn_dim':[256],
 94 |     'attention_dropout_rate':[0.4],
 95 |     'DNNLayers':[[64, 16]],
 96 | }
 97 | 
 98 | 
 99 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
100 | 
101 | expcontroller.ExperimentStart()
102 | 
103 | 


--------------------------------------------------------------------------------
/ACNet/GraphormerMixTarget.py:
--------------------------------------------------------------------------------
  1 | from ACComponents.ACProcessControllers import *
  2 | 
  3 | ExpOptions = {
  4 |     'Search': 'greedy',
  5 |     'SeedPerOpt': 3,
  6 |     'SubsetsNum': 1,
  7 |     'OnlyEval': False,
  8 | }
  9 | 
 10 | BasicParamList = {
 11 |     'ExpName': 'ACMixTarget',
 12 |     'MainMetric': 'AUC',
 13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Mixed_Screened.json',
 14 |     'RootPath': './TestExp/Mix/Graphormer/',
 15 |     'CUDA_VISIBLE_DEVICES': '3',
 16 |     'TaskNum': 1,
 17 |     'ClassNum': 2,
 18 |     'OutputSize': 2,
 19 |     'Feature': 'Graphormer',
 20 |     'Model': 'Graphormer',
 21 | 
 22 |     'OnlySpecific': True,
 23 |     'Weight': True,
 24 |     'AC': True,
 25 |     'PyG': False,
 26 | 
 27 |     'ValidRate': 40000,
 28 |     'PrintRate': 5,
 29 |     'UpdateRate': 1,
 30 |     'SplitRate': [0.8, 0.1],
 31 |     'Splitter': 'TargetRandom',
 32 |     'MaxEpoch': 300,
 33 |     'LowerThanMaxLimit': 12,
 34 |     'DecreasingLimit': 8,
 35 | 
 36 |     # if OnlyEval == True:
 37 |     'EvalModelPath': None,
 38 |     'EvalDatasetPath': None,
 39 |     'EvalLogAllPreds': None,
 40 | 
 41 |     'Scheduler': 'PolynomialDecayLR',
 42 |     # 'Scheduler': 'EmptyLRScheduler',
 43 | 
 44 | 
 45 |     # Params for PolynomialDecayLR only
 46 |     'WarmupEpoch': 2,
 47 |     'LRMaxEpoch':300,
 48 |     'EndLR':1e-9,
 49 |     'Power':1.0,
 50 |     # Params for StepLR only
 51 |     'LRStep': 30,
 52 |     'LRGamma': 0.1,
 53 |     ##########
 54 | 
 55 |     'WeightIniter': None,
 56 | 
 57 |     # Params for NormWeightIniter only
 58 |     'InitMean' : 0,
 59 |     'InitStd' : 1,
 60 | 
 61 |     'FeatureCategory': 'BaseED',
 62 | 
 63 |     # Params for Graphormer only
 64 |     'num_offset': 16,
 65 |     'num_atoms': 16 * 8,      # offset * AtomFeatureNum
 66 |     'num_in_degree': 16,       # length of indegree dictionary
 67 |     'num_out_degree': 16,      # length of outdegree dictionary
 68 |     'num_edges': 16 * 4,      # offset * BondFeatureNum
 69 |     'num_spatial': 512,         # length of SPD dictionary, must be larger than the largest SPD
 70 |     'num_edge_dis': 30,         # must be larger than multi-hop-max-dist
 71 |     'dropout_rate': 0.1,
 72 |     'intput_dropout_rate': 0.1,
 73 |     'edge_type': 'multi_hop',
 74 |     'multi_hop_max_dist': 20,
 75 |     'flag': False,
 76 |     'spatial_pos_max': 20,
 77 |     'max_node': 512,
 78 | 
 79 |     # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here.
 80 |     'SplitValidSeed': 8,
 81 |     'SplitTestSeed': 8,
 82 |     'BatchSize': 32,
 83 | 
 84 | }
 85 | AdjustableParamList = {}
 86 | SpecificParamList = {
 87 |     'DropRate':[0.4],
 88 |     'WeightDecay':[5],
 89 |     'lr':[4],
 90 |     'num_encoder_layers':[10],
 91 |     'num_attention_heads':[32],
 92 |     'embedding_dim':[256],
 93 |     'ffn_dim':[256],
 94 |     'attention_dropout_rate':[0.4],
 95 |     'DNNLayers':[[64, 16]],
 96 | }
 97 | 
 98 | 
 99 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
100 | 
101 | expcontroller.ExperimentStart()
102 | 
103 | 


--------------------------------------------------------------------------------
/ACNet/GraphormerSmall.py:
--------------------------------------------------------------------------------
  1 | from ACComponents.ACProcessControllers import *
  2 | 
  3 | ExpOptions = {
  4 |     'Search': 'greedy',
  5 |     'SeedPerOpt': 3,
  6 |     'SubsetsNum': 110,
  7 |     'OnlyEval': False,
  8 | }
  9 | 
 10 | BasicParamList = {
 11 |     'ExpName': 'ACSmall',
 12 |     'MainMetric': 'AUC',
 13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Small.json',
 14 |     'RootPath': './TestExp/Small/Graphormer/',
 15 |     'CUDA_VISIBLE_DEVICES': '3',
 16 |     'TaskNum': 1,
 17 |     'ClassNum': 2,
 18 |     'OutputSize': 2,
 19 |     'Feature': 'Graphormer',
 20 |     'Model': 'Graphormer',
 21 | 
 22 |     'OnlySpecific': True,
 23 |     'Weight': True,
 24 |     'AC': True,
 25 |     'PyG': False,
 26 | 
 27 |     'ValidRate': 4000,
 28 |     'PrintRate': 5,
 29 |     'UpdateRate': 1,
 30 |     'SplitRate': [0.8, 0.1],
 31 |     'Splitter': 'Random',
 32 |     'MaxEpoch': 300,
 33 |     'LowerThanMaxLimit': 30,
 34 |     'DecreasingLimit': 12,
 35 | 
 36 |     # if OnlyEval == True:
 37 |     'EvalModelPath': None,
 38 |     'EvalDatasetPath': None,
 39 |     'EvalLogAllPreds': None,
 40 | 
 41 |     'Scheduler': 'PolynomialDecayLR',
 42 |     # 'Scheduler': 'EmptyLRScheduler',
 43 | 
 44 | 
 45 |     # Params for PolynomialDecayLR only
 46 |     'WarmupEpoch': 2,
 47 |     'LRMaxEpoch':300,
 48 |     'EndLR':1e-9,
 49 |     'Power':1.0,
 50 |     # Params for StepLR only
 51 |     'LRStep': 30,
 52 |     'LRGamma': 0.1,
 53 |     ##########
 54 | 
 55 |     'WeightIniter': None,
 56 | 
 57 |     # Params for NormWeightIniter only
 58 |     'InitMean' : 0,
 59 |     'InitStd' : 1,
 60 | 
 61 |     'FeatureCategory': 'BaseOH',
 62 | 
 63 |     # Params for Graphormer only
 64 |     'num_offset': 16,
 65 |     'num_atoms': 16 * 39,      # offset * AtomFeatureNum
 66 |     'num_in_degree': 16,       # length of indegree dictionary
 67 |     'num_out_degree': 16,      # length of outdegree dictionary
 68 |     'num_edges': 16 * 10,      # offset * BondFeatureNum
 69 |     'num_spatial': 512,         # length of SPD dictionary, must be larger than the largest SPD
 70 |     'num_edge_dis': 30,         # must be larger than multi-hop-max-dist
 71 |     'dropout_rate': 0.1,
 72 |     'intput_dropout_rate': 0.1,
 73 |     'edge_type': 'multi_hop',
 74 |     'multi_hop_max_dist': 20,
 75 |     'flag': False,
 76 |     'spatial_pos_max': 20,
 77 |     'max_node': 512,
 78 | 
 79 |     # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here.
 80 |     'SplitValidSeed': 8,
 81 |     'SplitTestSeed': 8,
 82 |     'BatchSize': 32,
 83 | 
 84 | }
 85 | AdjustableParamList = {}
 86 | SpecificParamList = {
 87 |     'DropRate':[0.2],
 88 |     'WeightDecay':[5],
 89 |     'lr':[4],
 90 |     'num_encoder_layers':[8],
 91 |     'num_attention_heads':[16],
 92 |     'embedding_dim':[128],
 93 |     'ffn_dim':[128],
 94 |     'attention_dropout_rate':[0.2],
 95 |     'DNNLayers':[[64]],
 96 | }
 97 | 
 98 | 
 99 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
100 | 
101 | expcontroller.ExperimentStart()
102 | 
103 | 


--------------------------------------------------------------------------------
/ACNet/LSTMLarge.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 3,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACLarge',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Large.json',
14 |     'RootPath': './TestExp/Large/LSTM/',
15 |     'CUDA_VISIBLE_DEVICES': '0',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'SMILES',
20 |     'Model': 'LSTM',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': False,
26 | 
27 |     'ValidRate': 40000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'SplitRate': [0.8, 0.1],
31 |     'Splitter': 'Random',
32 |     'MaxEpoch': 300,
33 |     'LowerThanMaxLimit': 12,
34 |     'DecreasingLimit': 8,
35 | 
36 |     # if OnlyEval == True:
37 |     'EvalModelPath': None,
38 |     'EvalDatasetPath': None,
39 |     'EvalLogAllPreds': None,
40 | 
41 |     'Scheduler': 'PolynomialDecayLR',
42 | 
43 |     # Params for PolynomialDecayLR only
44 |     'WarmupEpoch': 2,
45 |     'LRMaxEpoch':300,
46 |     'EndLR':1e-9,
47 |     'Power':1.0,
48 |     # Params for StepLR only
49 |     'LRStep': 30,
50 |     'LRGamma': 0.1,
51 |     ##########
52 | 
53 |     'WeightIniter': None,
54 | 
55 |     # Params for NormWeightIniter only
56 |     'InitMean' : 0,
57 |     'InitStd' : 1,
58 | 
59 |     'AtomFeatureSize': 39,
60 |     'BondFeatureSize': 10,
61 |     'MolFP': 'MorganFP',
62 |     'radius': 2,
63 |     'nBits': 1024,
64 | 
65 |     'SplitValidSeed': 8,
66 |     'SplitTestSeed': 8,
67 |     'BatchSize': 256,
68 | 
69 | }
70 | AdjustableParamList = {}
71 | SpecificParamList = {
72 |     'DropRate':[0.4],
73 |     'WeightDecay':[5],
74 |     'lr':[3],
75 |     'LSTMLayers': [3],
76 |     'FPSize':[512],
77 |     'DNNLayers':[[512, 128, 32]],
78 | }
79 | 
80 | 
81 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
82 | 
83 | expcontroller.ExperimentStart()
84 | 
85 | 


--------------------------------------------------------------------------------
/ACNet/LSTMMedium.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 64,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACMedium',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Medium.json',
14 |     'RootPath': './TestExp/Medium/LSTM/',
15 |     'CUDA_VISIBLE_DEVICES': '0',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'SMILES',
20 |     'Model': 'LSTM',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': False,
26 | 
27 |     'ValidRate': 40000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'SplitRate': [0.8, 0.1],
31 |     'Splitter': 'Random',
32 |     'MaxEpoch': 300,
33 |     'LowerThanMaxLimit': 12,
34 |     'DecreasingLimit': 8,
35 | 
36 |     # if OnlyEval == True:
37 |     'EvalModelPath': None,
38 |     'EvalDatasetPath': None,
39 |     'EvalLogAllPreds': None,
40 | 
41 |     'Scheduler': 'PolynomialDecayLR',
42 | 
43 |     # Params for PolynomialDecayLR only
44 |     'WarmupEpoch': 2,
45 |     'LRMaxEpoch':300,
46 |     'EndLR':1e-9,
47 |     'Power':1.0,
48 |     # Params for StepLR only
49 |     'LRStep': 30,
50 |     'LRGamma': 0.1,
51 |     ##########
52 | 
53 |     'WeightIniter': None,
54 | 
55 |     # Params for NormWeightIniter only
56 |     'InitMean' : 0,
57 |     'InitStd' : 1,
58 | 
59 |     'AtomFeatureSize': 39,
60 |     'BondFeatureSize': 10,
61 |     'MolFP': 'MorganFP',
62 |     'radius': 2,
63 |     'nBits': 1024,
64 | 
65 |     'SplitValidSeed': 8,
66 |     'SplitTestSeed': 8,
67 |     'BatchSize': 256,
68 | 
69 | }
70 | AdjustableParamList = {}
71 | SpecificParamList = {
72 |     'DropRate':[0.2],
73 |     'WeightDecay':[4.5],
74 |     'lr':[3],
75 |     'LSTMLayers': [2],
76 |     'FPSize':[64],
77 |     'DNNLayers':[[]],
78 | }
79 | 
80 | 
81 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
82 | 
83 | expcontroller.ExperimentStart()
84 | 
85 | 


--------------------------------------------------------------------------------
/ACNet/LSTMSmall.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 110,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACSmall',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Small.json',
14 |     'RootPath': './TestExp/Small/LSTM/',
15 |     'CUDA_VISIBLE_DEVICES': '0',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'SMILES',
20 |     'Model': 'LSTM',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': False,
26 | 
27 |     'ValidRate': 40000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'SplitRate': [0.8, 0.1],
31 |     'Splitter': 'Random',
32 |     'MaxEpoch': 300,
33 |     'LowerThanMaxLimit': 12,
34 |     'DecreasingLimit': 8,
35 | 
36 |     # if OnlyEval == True:
37 |     'EvalModelPath': None,
38 |     'EvalDatasetPath': None,
39 |     'EvalLogAllPreds': None,
40 | 
41 |     'Scheduler': 'PolynomialDecayLR',
42 | 
43 |     # Params for PolynomialDecayLR only
44 |     'WarmupEpoch': 2,
45 |     'LRMaxEpoch':300,
46 |     'EndLR':1e-9,
47 |     'Power':1.0,
48 |     # Params for StepLR only
49 |     'LRStep': 30,
50 |     'LRGamma': 0.1,
51 |     ##########
52 | 
53 |     'WeightIniter': None,
54 | 
55 |     # Params for NormWeightIniter only
56 |     'InitMean' : 0,
57 |     'InitStd' : 1,
58 | 
59 |     'AtomFeatureSize': 39,
60 |     'BondFeatureSize': 10,
61 |     'MolFP': 'MorganFP',
62 |     'radius': 2,
63 |     'nBits': 1024,
64 | 
65 |     'SplitValidSeed': 8,
66 |     'SplitTestSeed': 8,
67 |     'BatchSize': 32,
68 | 
69 | }
70 | AdjustableParamList = {}
71 | SpecificParamList = {
72 |     'DropRate':[0.2],
73 |     'WeightDecay':[4.5],
74 |     'lr':[3],
75 |     'LSTMLayers': [2],
76 |     'FPSize':[128],
77 |     'DNNLayers':[[128]],
78 | }
79 | 
80 | 
81 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
82 | 
83 | expcontroller.ExperimentStart()
84 | 
85 | 


--------------------------------------------------------------------------------
/ACNet/MATFew.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 13,
 7 |     'OnlyEval': False,
 8 |     'Finetune':True,
 9 | }
10 | 
11 | BasicParamList = {
12 |     'ExpName': 'ACFew',
13 |     'MainMetric': 'AUC',
14 |     'DataPath': './ACComponents/ACDataset/data_files/MMP_AC_Few_representation/MAT.npz',
15 |     'RootPath': './TestExp/Few/MAT/',
16 |     'CUDA_VISIBLE_DEVICES': '2',
17 |     'TaskNum': 1,
18 |     'ClassNum': 2,
19 |     'OutputSize': 2,
20 |     'Feature': 'Raw',
21 |     'Model': 'MLP',
22 | 
23 |     # if Feature == Raw
24 |     'RawFeatureSize': 1024,
25 | 
26 |     'OnlySpecific': True,
27 |     'Weight': True,
28 |     'AC': True,
29 |     'PyG': False,
30 | 
31 |     'ValidRate': 40000,
32 |     'PrintRate': 5,
33 |     'UpdateRate': 1,
34 |     'SplitRate': [0.8, 0.1],
35 |     'Splitter': 'Random',
36 |     'MaxEpoch': 300,
37 |     'LowerThanMaxLimit': 12,
38 |     'DecreasingLimit': 8,
39 | 
40 |     # if OnlyEval == True:
41 |     'EvalModelPath': None,
42 |     'EvalDatasetPath': None,
43 |     'EvalLogAllPreds': None,
44 | 
45 |     'Scheduler': 'PolynomialDecayLR',
46 | 
47 |     # Params for PolynomialDecayLR only
48 |     'WarmupEpoch': 2,
49 |     'LRMaxEpoch':300,
50 |     'EndLR':1e-9,
51 |     'Power':1.0,
52 |     # Params for StepLR only
53 |     'LRStep': 30,
54 |     'LRGamma': 0.1,
55 |     ##########
56 | 
57 |     'WeightIniter': None,
58 | 
59 |     # Params for NormWeightIniter only
60 |     'InitMean' : 0,
61 |     'InitStd' : 1,
62 | 
63 | 
64 |     # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here.
65 |     'SplitValidSeed': 8,
66 |     'SplitTestSeed': 8,
67 |     'BatchSize': 8,
68 | 
69 | }
70 | AdjustableParamList = {}
71 | SpecificParamList = {
72 |     'DropRate':[0.2],
73 |     'WeightDecay':[5],
74 |     'lr':[4],
75 |     'DNNLayers':[[512, 128, 32]],
76 | }
77 | 
78 | 
79 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
80 | 
81 | expcontroller.ExperimentStart()
82 | 
83 | 


--------------------------------------------------------------------------------
/ACNet/Models/BasicGNNs.py:
--------------------------------------------------------------------------------
  1 | import torch as t
  2 | import torch.nn as nn
  3 | from torch_geometric.nn import GCN, global_add_pool, global_mean_pool, global_max_pool, MLP, GIN, SGConv, MessagePassing
  4 | from torch_geometric.nn.models.basic_gnn import BasicGNN
  5 | 
  6 | 
  7 | 
  8 | class PyGGCN(nn.Module):
  9 |     def __init__(self, opt, FeatureExtractor = False):
 10 |         super(PyGGCN, self).__init__()
 11 |         self.opt = opt
 12 |         self.node_feat_size = opt.args['AtomFeatureSize']
 13 |         self.in_channel = opt.args['GCNInputSize']
 14 |         self.hidden_channel = opt.args['GCNHiddenSize']
 15 |         self.out_channel = opt.args['FPSize']
 16 |         self.num_layers = opt.args['GCNLayers']
 17 |         self.MLPChannels = opt.args['DNNLayers']
 18 |         self.MLPOutputSize = opt.args['OutputSize']
 19 |         self.dropout = opt.args['DropRate']
 20 |         self.FeatureExtractor = FeatureExtractor
 21 | 
 22 |         self.MLPChannels = [self.out_channel] + self.MLPChannels + [self.MLPOutputSize]
 23 | 
 24 |         self.GCN = GCN(in_channels = self.in_channel,
 25 |                        hidden_channels = self.hidden_channel,
 26 |                        out_channels = self.out_channel,
 27 |                        num_layers = self.num_layers,
 28 |                        dropout = self.dropout)
 29 |         self.NodeFeatEmbed = MLP([self.node_feat_size, self.in_channel], dropout = self.dropout)
 30 |         if not self.FeatureExtractor:
 31 |             self.TaskLayer = MLP(self.MLPChannels, dropout = self.dropout)
 32 | 
 33 |         self.ReadoutList = {
 34 |             'Add': global_add_pool,
 35 |             'Mean': global_mean_pool,
 36 |             'Max': global_max_pool
 37 |         }
 38 |         self.readout = self.ReadoutList[opt.args['GCNReadout']]
 39 | 
 40 |     def forward(self, Input):
 41 |         # Input: Batch data of PyG
 42 |         Input = Input.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu'))
 43 |         x = self.NodeFeatEmbed(Input.x)
 44 |         x = self.GCN(x, Input.edge_index)
 45 |         x = self.readout(x, Input.batch)
 46 |         if not self.FeatureExtractor:
 47 |             x = self.TaskLayer(x)
 48 |         return x
 49 | 
 50 | class PyGGIN(nn.Module):
 51 |     def __init__(self, opt, FeatureExtractor = False):
 52 |         super(PyGGIN, self).__init__()
 53 |         self.opt = opt
 54 |         self.node_feat_size = opt.args['AtomFeatureSize']
 55 |         self.in_channel = opt.args['GINInputSize']
 56 |         self.hidden_channel = opt.args['GINHiddenSize']
 57 |         self.out_channel = opt.args['FPSize']
 58 |         self.eps = opt.args['GINEps']
 59 |         self.num_layers = opt.args['GINLayers']
 60 |         self.MLPChannels = opt.args['DNNLayers']
 61 |         self.MLPOutputSize = opt.args['OutputSize']
 62 |         self.dropout = opt.args['DropRate']
 63 |         self.FeatureExtractor = FeatureExtractor
 64 | 
 65 |         self.MLPChannels = [self.out_channel] + self.MLPChannels + [self.MLPOutputSize]
 66 | 
 67 |         self.GIN = GIN(in_channels = self.in_channel,
 68 |                        hidden_channels = self.hidden_channel,
 69 |                        out_channels = self.out_channel,
 70 |                        num_layers = self.num_layers,
 71 |                        dropout = self.dropout,
 72 |                        eps = self.eps)
 73 |         self.NodeFeatEmbed = MLP([self.node_feat_size, self.in_channel], dropout = self.dropout)
 74 |         if not self.FeatureExtractor:
 75 |             self.TaskLayer = MLP(self.MLPChannels, dropout = self.dropout)
 76 | 
 77 |         self.ReadoutList = {
 78 |             'Add': global_add_pool,
 79 |             'Mean': global_mean_pool,
 80 |             'Max': global_max_pool,
 81 |         }
 82 |         self.readout = self.ReadoutList[opt.args['GINReadout']]
 83 | 
 84 |     def forward(self, Input):
 85 |         # Input: Batch data of PyG
 86 |         Input = Input.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu'))
 87 |         x = self.NodeFeatEmbed(Input.x)
 88 |         x = self.GIN(x, Input.edge_index)
 89 |         x = self.readout(x, Input.batch)
 90 |         if not self.FeatureExtractor:
 91 |             x = self.TaskLayer(x)
 92 |         return x
 93 | 
 94 | 
 95 | class SGC(BasicGNN):
 96 |     def init_conv(self, in_channels: int, out_channels: int, **kwargs) -> MessagePassing:
 97 |         return SGConv(in_channels, out_channels, **kwargs)
 98 | 
 99 | class PyGSGC(nn.Module):
100 |     def __init__(self, opt, FeatureExtractor = False):
101 |         super(PyGSGC, self).__init__()
102 |         self.opt = opt
103 |         self.node_feat_size = opt.args['AtomFeatureSize']
104 |         self.in_channel = opt.args['SGCInputSize']
105 |         self.hidden_channel = opt.args['SGCHiddenSize']
106 |         self.out_channel = opt.args['FPSize']
107 |         self.K = opt.args['SGCK']
108 |         self.num_layers = opt.args['SGCLayers']
109 |         self.MLPChannels = opt.args['DNNLayers']
110 |         self.MLPOutputSize = opt.args['OutputSize']
111 |         self.dropout = opt.args['DropRate']
112 |         self.FeatureExtractor = FeatureExtractor
113 | 
114 | 
115 |         self.MLPChannels = [self.out_channel] + self.MLPChannels + [self.MLPOutputSize]
116 | 
117 |         self.SGC = SGC(in_channels = self.in_channel,
118 |                        hidden_channels = self.hidden_channel,
119 |                        out_channels = self.out_channel,
120 |                        num_layers = self.num_layers,
121 |                        dropout = self.dropout,
122 |                        K = self.K)
123 |         self.NodeFeatEmbed = MLP([self.node_feat_size, self.in_channel], dropout = self.dropout)
124 |         self.TaskLayer = MLP(self.MLPChannels, dropout = self.dropout)
125 | 
126 |         self.ReadoutList = {
127 |             'Add': global_add_pool,
128 |             'Mean': global_mean_pool,
129 |             'Max': global_max_pool
130 |         }
131 |         self.readout = self.ReadoutList[opt.args['SGCReadout']]
132 | 
133 |     def forward(self, Input):
134 |         # Input: Batch data of PyG
135 |         Input = Input.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu'))
136 |         x = self.NodeFeatEmbed(Input.x)
137 |         x = self.SGC(x, Input.edge_index)
138 |         x = self.readout(x, Input.batch)
139 |         if not self.FeatureExtractor:
140 |             x = self.TaskLayer(x)
141 |         return x


--------------------------------------------------------------------------------
/ACNet/Models/CMPNN/CMPNNModel.py:
--------------------------------------------------------------------------------
  1 | import torch as t
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from Models.CMPNN.nn_utils import get_activation_function, index_select_ND
  5 | from Models.CMPNN.CMPNNFeaturizer import mol2graph,get_atom_fdim, get_bond_fdim
  6 | import math
  7 | 
  8 | class CommunicateKernel(nn.Module):
  9 |     def __init__(self, opt):
 10 |         super(CommunicateKernel, self).__init__()
 11 |         self.opt = opt
 12 |         self.kernel = self.opt.args['CommunicateKernel']
 13 | 
 14 |         # if self.kernel == 'MultilayerPerception':
 15 |         #     self.linear = nn.Linear()
 16 | 
 17 |     def forward(self, hidden, agg_message):
 18 |         # hidden: h^{k-1} (v)
 19 |         # agg_message: m^k (v)
 20 | 
 21 |         if self.kernel == 'Add':
 22 |             return hidden + agg_message
 23 |         # elif self.opt.args['CommunicateKernel'] == 'MultilayerPerception':
 24 | 
 25 | class MPNLayer(nn.Module):
 26 |     def __init__(self, opt):
 27 |         super(MPNLayer, self).__init__()
 28 |         self.opt = opt
 29 |         self.hidden_size = self.opt.args['FPSize']
 30 |         self.W_bond = nn.Linear(self.hidden_size, self.hidden_size)
 31 |         self.dropout_layer = nn.Dropout(p=self.opt.args['DropRate'])
 32 |         self.act_func = get_activation_function(opt)
 33 |         self.communicate_kernel = CommunicateKernel(opt)
 34 | 
 35 |     def forward(self, message_atom, message_bond, a2b, b2a, b2revb, input_bond):
 36 |         # message_atom: h^{k-1} (v)
 37 |         # message_bond: h^{k-1} (e_{wv})
 38 |         # a2b, b2a, b2revb are index to find neighbors
 39 |         # input_bond: h^0 (e_{vw})
 40 |         # nodes
 41 |         agg_message = index_select_ND(message_bond, a2b)
 42 |         agg_message = self.MessageBooster(agg_message)
 43 |         message_atom = self.communicate_kernel(message_atom, agg_message)
 44 | 
 45 |         # edges
 46 |         rev_message = message_bond[b2revb]
 47 |         message_bond = message_atom[b2a] - rev_message
 48 |         message_bond = self.W_bond(message_bond)
 49 |         message_bond = self.dropout_layer(self.act_func(input_bond + message_bond))
 50 | 
 51 |         return message_atom, message_bond
 52 | 
 53 |     def MessageBooster(self, agg_message):
 54 |         return agg_message.sum(dim=1) * agg_message.max(dim=1)[0]
 55 | 
 56 | class BatchGRU(nn.Module):
 57 |     def __init__(self, hidden_size):
 58 |         super(BatchGRU, self).__init__()
 59 |         self.hidden_size = hidden_size
 60 |         self.gru = nn.GRU(self.hidden_size, self.hidden_size, batch_first = True,
 61 |                           bidirectional = True)
 62 |         self.bias = nn.Parameter(t.Tensor(self.hidden_size))
 63 |         self.bias.data.uniform_(-1.0 / math.sqrt(self.hidden_size),
 64 |                                 1.0 / math.sqrt(self.hidden_size))
 65 | 
 66 |     def forward(self, node, a_scope):
 67 |         # 输入：node为一个batch的大图中所有节点v的features， a_scope为这个batch的大图中，哪些节点隶属于一个mol
 68 |         #
 69 |         hidden = node
 70 |         message = F.relu(node + self.bias)  # 节点信息加了一个偏置以后过relu激活（线性系数为1）
 71 |         MAX_atom_len = max([a_size for a_start, a_size in a_scope])  # 最大的原子数量
 72 |         # padding
 73 |         message_lst = []
 74 |         hidden_lst = []
 75 | 
 76 |         for i, (a_start, a_size) in enumerate(a_scope):
 77 |             if a_size == 0:
 78 |                 assert 0
 79 |             cur_message = message.narrow(0, a_start, a_size)
 80 |             # torch.Tensor.narrow函数的功能是从第dimension维中，从start开始，选取length个，得到切片
 81 |             cur_hidden = hidden.narrow(0, a_start, a_size)
 82 |             # message和hidden的区别：hidden是K层以后得到的各个节点的feature，message是加了偏置并激活以后的feature
 83 | 
 84 |             hidden_lst.append(cur_hidden.max(0)[0].unsqueeze(0).unsqueeze(0))
 85 |             # cur_hidden的尺寸应该是[a_size, hidden_size]
 86 |             # cur_hidden.max(0)[0]的结果是返回cur_hidden中，feature各个元素在不同atom上的最大值，返回尺寸[hidden_size]
 87 |             # 两次unsquezze(0)以后的尺寸为[1,1,hidden_size]，append到list中
 88 | 
 89 |             cur_message = t.nn.ZeroPad2d((0, 0, 0, MAX_atom_len - cur_message.shape[0]))(cur_message)
 90 |             # 这句话就是简单的填充。把所有的cur_message，按照最大原子数填充一致
 91 |             # 从[a_size, hidden_size]填充为[max_atom_len, hidden_size]
 92 |             message_lst.append(cur_message.unsqueeze(0))
 93 |             # unsqueeze成[1,max_atom_len,hidden_size]后，append到list中
 94 | 
 95 |         message_lst = t.cat(message_lst, 0)
 96 |         hidden_lst = t.cat(hidden_lst, 1)
 97 |         # 把两个list转化为两个tensor。list的长度均为batch_size
 98 |         # message_lst的尺寸为[batch_size, max_atom_len, hidden_size]
 99 |         # hidden_lst的尺寸为[1,batch_size, hidden_size]
100 |         hidden_lst = hidden_lst.repeat(2, 1, 1)  # [2,batch_size,hidden_size]
101 |         cur_message, cur_hidden = self.gru(message_lst, hidden_lst)
102 |         # 这里，GRU的输入，input为message_lst，h_0为hidden_lst。也就是说，K层MPN提出来的node embedding是GRU的初始隐变量
103 |         # 而message_lst则是序列化的输入特征
104 |         # 这里gru只有一层,但是因为选择了bidirectional，所以要把h复制成2.
105 | 
106 |         # 所以可以明确，GRU的作用就是，将一个分子中的各个原子的feature，按照序列输入到GRU，GRU将融合这个序列前后的其它各个原子的信息，对这个原子的信息进行更新
107 |         # 这个GRU接收的序列，是一个分子中的各个原子组成的序列，因此并不是"不同层的信息更新"，也没有包含有拓扑信息，
108 |         # 因为这个序列只是按照原子序号进行组合的，并没有考虑拓扑
109 |         # 所以这个GRU的作用值得商榷
110 | 
111 |         # unpadding
112 |         cur_message_unpadding = []
113 |         for i, (a_start, a_size) in enumerate(a_scope):
114 |             cur_message_unpadding.append(cur_message[i, :a_size].view(-1, 2 * self.hidden_size))
115 |         cur_message_unpadding = t.cat(cur_message_unpadding, 0)
116 | 
117 |         message = t.cat([t.cat([message.narrow(0, 0, 1), message.narrow(0, 0, 1)], 1),
118 |                              cur_message_unpadding], 0)
119 |         return message
120 | 
121 | 
122 | #######################################################################################
123 | 
124 | class MPNEncoder(nn.Module):
125 |     def __init__(self, opt, atom_fdim, bond_fdim):
126 |         super(MPNEncoder, self).__init__()
127 |         self.opt = opt
128 |         self.atom_fdim = atom_fdim
129 |         self.bond_fdim = bond_fdim
130 |         self.hidden_size = opt.args['FPSize']
131 |         #self.bias = opt.args['bias']
132 |         self.depth = opt.args['CMPNNLayers']
133 | 
134 |         # print(f"atom dim:{self.atom_fdim}")
135 |         # print(f"hidden size: {self.hidden_size}")
136 |         self.W_i_atom = nn.Linear(self.atom_fdim, self.hidden_size)
137 |         self.W_i_bond = nn.Linear(self.bond_fdim, self.hidden_size)
138 | 
139 |         self.MPNLayers = nn.ModuleList()
140 |         for k in range(self.depth - 1):
141 |             self.MPNLayers.append(MPNLayer(opt))
142 | 
143 |         self.lr = nn.Linear(self.hidden_size*3, self.hidden_size)
144 |         self.gru = BatchGRU(hidden_size = self.hidden_size)
145 | 
146 |         self.W_o = nn.Linear(self.hidden_size*2, self.hidden_size)
147 |         self.act_func = get_activation_function(opt)
148 |         self.dropout_layer = nn.Dropout(p=self.opt.args['DropRate'])
149 | 
150 | 
151 |     def forward(self, input):
152 |         f_atoms, f_bonds, a2b, b2a, b2revb, a_scope, b_scope, bonds = self._unpack_inputs(input)
153 | 
154 |         # Input feature transform
155 |         #print(f_atoms[0])
156 |         #print(f_atoms.size())
157 |         input_atom = self.W_i_atom(f_atoms)
158 |         #print(input_atom.size())
159 |         input_atom = self.act_func(input_atom)
160 |         message_atom = input_atom.clone()
161 | 
162 |         #print(f_bonds)
163 |         #print(f_bonds.size())
164 |         #print(self.W_i_bond)
165 |         input_bond = self.W_i_bond(f_bonds)
166 |         input_bond = self.act_func(input_bond)
167 |         message_bond = input_bond.clone()
168 | 
169 |         # Message Passing
170 |         for layer in self.MPNLayers:
171 |             message_atom, message_bond = layer(message_atom, message_bond, a2b,b2a,b2revb,input_bond)
172 | 
173 |         agg_message = index_select_ND(message_bond, a2b)
174 |         agg_message = self.MessageBooster(agg_message)
175 | 
176 |         agg_message = self.lr(t.cat([agg_message, message_atom, input_atom], 1))
177 | 
178 |         agg_message = self.gru(agg_message, a_scope)
179 | 
180 |         atom_hiddens = self.act_func(self.W_o(agg_message))
181 |         atom_hiddens = self.dropout_layer(atom_hiddens)
182 | 
183 |         # Readout
184 |         mol_vecs = []
185 |         for i, (a_start, a_size) in enumerate(a_scope):
186 |             if a_size == 0:
187 |                 assert 0
188 |             cur_hiddens = atom_hiddens.narrow(0, a_start, a_size)
189 |             mol_vecs.append(cur_hiddens.mean(0))
190 |         mol_vecs = t.stack(mol_vecs, dim=0)
191 | 
192 |         return mol_vecs
193 | 
194 |     def MessageBooster(self, agg_message):
195 |         return agg_message.sum(dim=1) * agg_message.max(dim=1)[0]
196 | 
197 |     def _unpack_inputs(self, input):
198 |         f_atoms, f_bonds, a2b, b2a, b2revb, a_scope, b_scope, bonds = input.get_components()
199 |         #print(f_bonds)
200 |         #print(f_bonds.size())
201 |         #print(self.opt.args['CUDA_VISIBLE_DEVICES'])
202 |         f_atoms, f_bonds, a2b, b2a, b2revb = (
203 |                 f_atoms.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu')),
204 |                 f_bonds.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu')),
205 |                 a2b.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu')),
206 |                 b2a.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu')),
207 |                 b2revb.to(t.device(f"cuda:{self.opt.args['CUDA_VISIBLE_DEVICES']}" if t.cuda.is_available() else 'cpu')))
208 | 
209 |         return f_atoms, f_bonds, a2b, b2a, b2revb, a_scope, b_scope, bonds
210 | 
211 | class MPN(nn.Module):
212 |     def __init__(self, opt):
213 |         super(MPN, self).__init__()
214 |         self.opt = opt
215 |         self.atom_fdim = get_atom_fdim()
216 |         self.bond_fdim = get_bond_fdim() + (not opt.args['atom_messages']) * self.atom_fdim
217 |         self.encoder = MPNEncoder(self.opt, self.atom_fdim, self.bond_fdim)
218 | 
219 |     def forward(self, input):
220 |         input = mol2graph(input, self.opt)
221 |         output = self.encoder.forward(input)
222 | 
223 |         return output
224 | 
225 | ######################################################################################
226 | 
227 | 
228 | class CMPNNModel(nn.Module):
229 |     # A CMPNN Model includes a message passing network following by a FCN.
230 | 
231 |     def __init__(self, classification: bool, multiclass: bool, opt):
232 |         super(CMPNNModel, self).__init__()
233 | 
234 |         self.classification = classification
235 |         if self.classification:
236 |             self.sigmoid = nn.Sigmoid()
237 |         self.multiclass = multiclass
238 |         if self.multiclass:
239 |             self.multiclass_softmax = nn.Softmax(dim=2)
240 |         assert not (self.classification and self.multiclass)
241 | 
242 |         self.opt = opt
243 |         self.hidden_size = opt.args['FPSize']
244 |         self.num_classes = opt.args['ClassNum']
245 |         self.dataset_type = opt.args['dataset_type']
246 |         self.output_size = opt.args['OutputSize']
247 |         self.ffn_hidden_size = opt.args['ffn_hidden_size']
248 | 
249 |         self.only_extract_feature = opt.args['only_extract_feature']
250 | 
251 |         if self.dataset_type == 'multicalss':
252 |             self.multiclass == True
253 | 
254 |         self.create_encoder()
255 |         self.create_ffn()
256 | 
257 |     def create_encoder(self):
258 |         self.encoder = MPN(self.opt)
259 | 
260 |     def create_ffn(self):
261 |         first_linear_dim = self.hidden_size * 1
262 | 
263 |         dropout = nn.Dropout(self.opt.args['DropRate'])
264 |         activation = get_activation_function(self.opt)
265 | 
266 |         # Create FNN Layers
267 |         if self.opt.args['ffn_num_layers'] == 1:
268 |             ffn = [
269 |                 dropout,
270 |                 nn.Linear(first_linear_dim, self.output_size)
271 |             ]
272 |         else:
273 |             ffn = [
274 |                 dropout,
275 |                 nn.Linear(first_linear_dim, self.ffn_hidden_size)
276 |             ]
277 |             for _ in range(self.opt.args['ffn_num_layers'] - 2):
278 |                 ffn.extend([
279 |                     activation,
280 |                     dropout,
281 |                     nn.Linear(self.ffn_hidden_size, self.ffn_hidden_size)
282 |                 ])
283 |             ffn.extend([
284 |                 activation,
285 |                 dropout,
286 |                 nn.Linear(self.ffn_hidden_size, self.output_size)
287 |             ])
288 | 
289 |         self.ffn = nn.Sequential(*ffn)
290 | 
291 |     def forward(self, input):
292 |         # An encoder to extract information of a graph
293 |         # and a FCN as task layer to make prediction
294 |         # output = self.ffn(self.encoder(input))
295 |         # print(input)
296 |         output = self.encoder(input)
297 |         if self.only_extract_feature:
298 |             # print(f"size of output is: {output.size()}")
299 |             return output
300 | 
301 |         # self.ffn has the same function with the DNN classifier model
302 |         # If we only need to extract features, ffn is not needed.
303 |         output = self.ffn(output)
304 |         # output layer
305 |         if self.classification and not self.training:
306 |             output = self.sigmoid(output)
307 |         if self.multiclass:
308 |             output = output.reshape((output.size(0),-1, self.num_classes))
309 |             if not self.training:
310 |                 output = self.multiclass_softmax(output)
311 | 
312 |         return output
313 | 
314 | 


--------------------------------------------------------------------------------
/ACNet/Models/CMPNN/nn_utils.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from typing import List, Union
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | import torch.nn as nn
  7 | from torch.optim import Optimizer
  8 | from torch.optim.lr_scheduler import _LRScheduler
  9 | 
 10 | 
 11 | class NoamLR(_LRScheduler):
 12 |     """
 13 |     Noam learning rate scheduler with piecewise linear increase and exponential decay.
 14 | 
 15 |     The learning rate increases linearly from init_lr to max_lr over the course of
 16 |     the first warmup_steps (where warmup_steps = warmup_epochs * steps_per_epoch).
 17 |     Then the learning rate decreases exponentially from max_lr to final_lr over the
 18 |     course of the remaining total_steps - warmup_steps (where total_steps =
 19 |     total_epochs * steps_per_epoch). This is roughly based on the learning rate
 20 |     schedule from Attention is All You Need, section 5.3 (https://arxiv.org/abs/1706.03762).
 21 |     """
 22 |     def __init__(self,
 23 |                  optimizer: Optimizer,
 24 |                  warmup_epochs: List[Union[float, int]],
 25 |                  total_epochs: List[int],
 26 |                  steps_per_epoch: int,
 27 |                  init_lr: List[float],
 28 |                  max_lr: List[float],
 29 |                  final_lr: List[float]):
 30 |         """
 31 |         Initializes the learning rate scheduler.
 32 | 
 33 |         :param optimizer: A PyTorch optimizer.
 34 |         :param warmup_epochs: The number of epochs during which to linearly increase the learning rate.
 35 |         :param total_epochs: The total number of epochs.
 36 |         :param steps_per_epoch: The number of steps (batches) per epoch.
 37 |         :param init_lr: The initial learning rate.
 38 |         :param max_lr: The maximum learning rate (achieved after warmup_epochs).
 39 |         :param final_lr: The final learning rate (achieved after total_epochs).
 40 |         """
 41 |         assert len(optimizer.param_groups) == len(warmup_epochs) == len(total_epochs) == len(init_lr) == \
 42 |                len(max_lr) == len(final_lr)
 43 | 
 44 |         self.num_lrs = len(optimizer.param_groups)
 45 | 
 46 |         self.optimizer = optimizer
 47 |         self.warmup_epochs = np.array(warmup_epochs)
 48 |         self.total_epochs = np.array(total_epochs)
 49 |         self.steps_per_epoch = steps_per_epoch
 50 |         self.init_lr = np.array(init_lr)
 51 |         self.max_lr = np.array(max_lr)
 52 |         self.final_lr = np.array(final_lr)
 53 | 
 54 |         self.current_step = 0
 55 |         self.lr = init_lr
 56 |         self.warmup_steps = (self.warmup_epochs * self.steps_per_epoch).astype(int)
 57 |         self.total_steps = self.total_epochs * self.steps_per_epoch
 58 |         self.linear_increment = (self.max_lr - self.init_lr) / self.warmup_steps
 59 | 
 60 |         self.exponential_gamma = (self.final_lr / self.max_lr) ** (1 / (self.total_steps - self.warmup_steps))
 61 | 
 62 |         super(NoamLR, self).__init__(optimizer)
 63 | 
 64 |     def get_lr(self) -> List[float]:
 65 |         """Gets a list of the current learning rates."""
 66 |         return list(self.lr)
 67 | 
 68 |     def step(self, current_step: int = None):
 69 |         """
 70 |         Updates the learning rate by taking a step.
 71 | 
 72 |         :param current_step: Optionally specify what step to set the learning rate to.
 73 |         If None, current_step = self.current_step + 1.
 74 |         """
 75 |         if current_step is not None:
 76 |             self.current_step = current_step
 77 |         else:
 78 |             self.current_step += 1
 79 | 
 80 |         for i in range(self.num_lrs):
 81 |             if self.current_step <= self.warmup_steps[i]:
 82 |                 self.lr[i] = self.init_lr[i] + self.current_step * self.linear_increment[i]
 83 |             elif self.current_step <= self.total_steps[i]:
 84 |                 self.lr[i] = self.max_lr[i] * (self.exponential_gamma[i] ** (self.current_step - self.warmup_steps[i]))
 85 |             else:  # theoretically this case should never be reached since training should stop at total_steps
 86 |                 self.lr[i] = self.final_lr[i]
 87 | 
 88 |             self.optimizer.param_groups[i]['lr'] = self.lr[i]
 89 | 
 90 | 
 91 | def get_activation_function(opt) -> nn.Module:
 92 |     """
 93 |     Gets an activation function module given the name of the activation.
 94 | 
 95 |     :param activation: The name of the activation function.
 96 |     :return: The activation function module.
 97 |     """
 98 |     activation = opt.args['activation']
 99 | 
100 |     if activation == 'ReLU':
101 |         return nn.ReLU()
102 |     elif activation == 'LeakyReLU':
103 |         return nn.LeakyReLU(0.1)
104 |     elif activation == 'PReLU':
105 |         return nn.PReLU()
106 |     elif activation == 'tanh':
107 |         return nn.Tanh()
108 |     elif activation == 'SELU':
109 |         return nn.SELU()
110 |     elif activation == 'ELU':
111 |         return nn.ELU()
112 |     else:
113 |         raise ValueError(f'Activation "{activation}" not supported.')
114 | 
115 | 
116 | def initialize_weights(model: nn.Module):
117 |     """
118 |     Initializes the weights of a model in place.
119 | 
120 |     :param model: An nn.Module.
121 |     """
122 |     for param in model.parameters():
123 |         if param.dim() == 1:
124 |             nn.init.constant_(param, 0)
125 |         else:
126 |             nn.init.xavier_normal_(param)
127 | 
128 | 
129 | def index_select_ND(source: torch.Tensor, index: torch.Tensor) -> torch.Tensor:
130 |     """
131 |     Selects the message features from source corresponding to the atom or bond indices in index.
132 | 
133 |     :param source: A tensor of shape (num_bonds, hidden_size) containing message features.
134 |     :param index: A tensor of shape (num_atoms/num_bonds, max_num_bonds) containing the atom or bond
135 |     indices to select from source.
136 |     :return: A tensor of shape (num_atoms/num_bonds, max_num_bonds, hidden_size) containing the message
137 |     features corresponding to the atoms/bonds specified in index.
138 |     """
139 |     index_size = index.size()  # (num_atoms/num_bonds, max_num_bonds)
140 |     suffix_dim = source.size()[1:]  # (hidden_size,)
141 |     final_size = index_size + suffix_dim  # (num_atoms/num_bonds, max_num_bonds, hidden_size)
142 | 
143 |     target = source.index_select(dim = 0, index = index.view(-1))  # (num_atoms/num_bonds * max_num_bonds, hidden_size)
144 |     target = target.view(final_size)  # (num_atoms/num_bonds, max_num_bonds, hidden_size)
145 | 
146 |     target[index == 0] = 0
147 |     return target


--------------------------------------------------------------------------------
/ACNet/Models/ClassifierModel.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | class DNN(nn.Module):
 4 |     def __init__(self, input_size, layer_sizes, output_size, opt):
 5 |         super(DNN, self).__init__()
 6 |         self.output_size = output_size
 7 |         self.opt = opt
 8 |         self.LayerList = nn.ModuleList()
 9 |         if len(layer_sizes) == 0:
10 |             self.FC = nn.Linear(input_size, output_size)
11 |         else:
12 |             for i in range(len(layer_sizes)):
13 |                 if i == 0:
14 |                     self.LayerList.append(nn.Linear(input_size, layer_sizes[i]))
15 |                 else:
16 |                     self.LayerList.append(nn.Linear(layer_sizes[i-1], layer_sizes[i]))
17 |                 self.LayerList.append(nn.ReLU())
18 |             self.Output = nn.Linear(layer_sizes[-1], output_size)
19 |         self.layer_sizes = layer_sizes
20 |         self.Drop = nn.Dropout(p=self.opt.args['DropRate'])
21 |         self.Softmax = nn.Softmax(dim=1)
22 | 
23 |     def forward(self, x):
24 |         if len(self.layer_sizes) == 0:
25 |             x = self.FC(x)
26 |             if self.opt.args['ClassNum'] != 1:
27 |                 if not self.training:
28 |                     # print(f"x size: {x.size()}")
29 |                     x = self.Softmax(x)
30 |         else:
31 |             for layer in self.LayerList:
32 |                 x = layer(x)
33 |             x = self.Drop(x)
34 |             x = self.Output(x)
35 |             if self.opt.args['ClassNum'] != 1:
36 |                 if not self.training:
37 |                     # print(f"x size: {x.size()}")
38 |                     x = self.Softmax(x)
39 | 
40 |         return x


--------------------------------------------------------------------------------
/ACNet/Models/Graphormer/algos.cpython-37m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DrugAI/ACNet/1a4902c46f8a0bf97a0f8494d45989b81e053faa/ACNet/Models/Graphormer/algos.cpython-37m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/ACNet/Models/Graphormer/algos.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DrugAI/ACNet/1a4902c46f8a0bf97a0f8494d45989b81e053faa/ACNet/Models/Graphormer/algos.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/ACNet/Models/Graphormer/algos.pyx:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import cython
 5 | from cython.parallel cimport prange, parallel
 6 | cimport numpy
 7 | import numpy
 8 | 
 9 | def floyd_warshall(adjacency_matrix):
10 | 
11 |     (nrows, ncols) = adjacency_matrix.shape
12 |     assert nrows == ncols
13 |     cdef unsigned int n = nrows
14 | 
15 |     adj_mat_copy = adjacency_matrix.astype(long, order='C', casting='safe', copy=True)
16 |     assert adj_mat_copy.flags['C_CONTIGUOUS']
17 |     cdef numpy.ndarray[long, ndim=2, mode='c'] M = adj_mat_copy
18 |     cdef numpy.ndarray[long, ndim=2, mode='c'] path = numpy.zeros([n, n], dtype=numpy.int64)
19 | 
20 |     cdef unsigned int i, j, k
21 |     cdef long M_ij, M_ik, cost_ikkj
22 |     cdef long* M_ptr = &M[0,0]
23 |     cdef long* M_i_ptr
24 |     cdef long* M_k_ptr
25 | 
26 |     # set unreachable nodes distance to 510
27 |     for i in range(n):
28 |         for j in range(n):
29 |             if i == j:
30 |                 M[i][j] = 0
31 |             elif M[i][j] == 0:
32 |                 M[i][j] = 510
33 | 
34 |     # floyed algo
35 |     for k in range(n):
36 |         M_k_ptr = M_ptr + n*k
37 |         for i in range(n):
38 |             M_i_ptr = M_ptr + n*i
39 |             M_ik = M_i_ptr[k]
40 |             for j in range(n):
41 |                 cost_ikkj = M_ik + M_k_ptr[j]
42 |                 M_ij = M_i_ptr[j]
43 |                 if M_ij > cost_ikkj:
44 |                     M_i_ptr[j] = cost_ikkj
45 |                     path[i][j] = k
46 |                     # Path[i][j] means, if want go from i to j, traveler should go to k first. Then, from k to j.
47 | 
48 |     # set unreachable path to 510
49 |     for i in range(n):
50 |         for j in range(n):
51 |             if M[i][j] >= 510:
52 |                 path[i][j] = 510
53 |                 M[i][j] = 510
54 | 
55 |     return M, path
56 | 
57 | 
58 | def get_all_edges(path, i, j):
59 |     cdef unsigned int k = path[i][j]
60 |     if k == 0:
61 |         return []
62 |     else:
63 |         return get_all_edges(path, i, k) + [k] + get_all_edges(path, k, j)
64 | # returns a list with all passing nodes from i to j in the SP
65 | 
66 | def gen_edge_input(max_dist, path, edge_feat):
67 | 
68 |     (nrows, ncols) = path.shape
69 |     assert nrows == ncols
70 |     cdef unsigned int n = nrows
71 |     cdef unsigned int max_dist_copy = max_dist
72 | 
73 |     path_copy = path.astype(long, order='C', casting='safe', copy=True)
74 |     edge_feat_copy = edge_feat.astype(long, order='C', casting='safe', copy=True)
75 |     assert path_copy.flags['C_CONTIGUOUS']
76 |     assert edge_feat_copy.flags['C_CONTIGUOUS']
77 | 
78 |     cdef numpy.ndarray[long, ndim=4, mode='c'] edge_fea_all = -1 * numpy.ones([n, n, max_dist_copy, edge_feat.shape[-1]], dtype=numpy.int64)
79 |     cdef unsigned int i, j, k, num_path, cur
80 | 
81 |     for i in range(n):
82 |         for j in range(n):
83 |             if i == j:
84 |                 continue
85 |             if path_copy[i][j] == 510:
86 |                 continue
87 |             path = [i] + get_all_edges(path_copy, i, j) + [j]
88 |             # path: [i, k1, k2, k3, ..., j]
89 |             # so the number of edges passing in this path is len(path) - 1
90 |             num_path = len(path) - 1
91 |             for k in range(num_path):
92 |                 edge_fea_all[i, j, k, :] = edge_feat_copy[path[k], path[k+1], :]
93 | 
94 |     # edge_fea_all: [node_num, node_num, max_distance(N), edge_feat_num]
95 |     # it stores all of the edge_attr of edges passing from node i to node j.
96 |     return edge_fea_all
97 | 


--------------------------------------------------------------------------------
/ACNet/Models/Graphormer/build/temp.linux-x86_64-3.7/algos.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DrugAI/ACNet/1a4902c46f8a0bf97a0f8494d45989b81e053faa/ACNet/Models/Graphormer/build/temp.linux-x86_64-3.7/algos.o


--------------------------------------------------------------------------------
/ACNet/Models/Graphormer/build/temp.linux-x86_64-3.8/algos.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DrugAI/ACNet/1a4902c46f8a0bf97a0f8494d45989b81e053faa/ACNet/Models/Graphormer/build/temp.linux-x86_64-3.8/algos.o


--------------------------------------------------------------------------------
/ACNet/Models/Graphormer/collator.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT License.
  3 | 
  4 | import torch
  5 | 
  6 | 
  7 | def pad_1d_unsqueeze(x, padlen):
  8 |     x = x + 1  # pad id = 0
  9 |     xlen = x.size(0)
 10 |     if xlen < padlen:
 11 |         new_x = x.new_zeros([padlen], dtype=x.dtype)
 12 |         new_x[:xlen] = x
 13 |         x = new_x
 14 |     return x.unsqueeze(0)
 15 | 
 16 | 
 17 | def pad_2d_unsqueeze(x, padlen):
 18 |     x = x + 1  # pad id = 0
 19 |     xlen, xdim = x.size()
 20 |     if xlen < padlen:
 21 |         new_x = x.new_zeros([padlen, xdim], dtype=x.dtype)
 22 |         new_x[:xlen, :] = x
 23 |         x = new_x
 24 |     return x.unsqueeze(0)
 25 | 
 26 | 
 27 | def pad_attn_bias_unsqueeze(x, padlen):
 28 |     xlen = x.size(0)
 29 |     if xlen < padlen:
 30 |         new_x = x.new_zeros(
 31 |             [padlen, padlen], dtype=x.dtype).fill_(float('-inf'))
 32 |         new_x[:xlen, :xlen] = x
 33 |         new_x[xlen:, :xlen] = 0
 34 |         x = new_x
 35 |     return x.unsqueeze(0)
 36 | 
 37 | 
 38 | def pad_edge_type_unsqueeze(x, padlen):
 39 |     xlen = x.size(0)
 40 |     if xlen < padlen:
 41 |         new_x = x.new_zeros([padlen, padlen, x.size(-1)], dtype=x.dtype)
 42 |         new_x[:xlen, :xlen, :] = x
 43 |         x = new_x
 44 |     return x.unsqueeze(0)
 45 | 
 46 | 
 47 | def pad_spatial_pos_unsqueeze(x, padlen):
 48 |     x = x + 1
 49 |     xlen = x.size(0)
 50 |     if xlen < padlen:
 51 |         new_x = x.new_zeros([padlen, padlen], dtype=x.dtype)
 52 |         new_x[:xlen, :xlen] = x
 53 |         x = new_x
 54 |     return x.unsqueeze(0)
 55 | 
 56 | 
 57 | def pad_3d_unsqueeze(x, padlen1, padlen2, padlen3):
 58 |     x = x + 1
 59 |     xlen1, xlen2, xlen3, xlen4 = x.size()
 60 |     if xlen1 < padlen1 or xlen2 < padlen2 or xlen3 < padlen3:
 61 |         new_x = x.new_zeros([padlen1, padlen2, padlen3, xlen4], dtype=x.dtype)
 62 |         new_x[:xlen1, :xlen2, :xlen3, :] = x
 63 |         x = new_x
 64 |     return x.unsqueeze(0)
 65 | 
 66 | 
 67 | class Batch():
 68 |     def __init__(self, idx, attn_bias, attn_edge_type, spatial_pos, in_degree, out_degree, x, edge_input, y):
 69 |         super(Batch, self).__init__()
 70 |         self.idx = idx
 71 |         self.in_degree, self.out_degree = in_degree, out_degree
 72 |         self.x, self.y = x, y
 73 |         self.attn_bias, self.attn_edge_type, self.spatial_pos = attn_bias, attn_edge_type, spatial_pos
 74 |         self.edge_input = edge_input
 75 | 
 76 |     def to(self, device):
 77 |         self.idx = self.idx.to(device)
 78 |         self.in_degree, self.out_degree = self.in_degree.to(
 79 |             device), self.out_degree.to(device)
 80 |         self.x, self.y = self.x.to(device), self.y.to(device)
 81 |         self.attn_bias, self.attn_edge_type, self.spatial_pos = self.attn_bias.to(
 82 |             device), self.attn_edge_type.to(device), self.spatial_pos.to(device)
 83 |         self.edge_input = self.edge_input.to(device)
 84 |         return self
 85 | 
 86 |     def __len__(self):
 87 |         return self.in_degree.size(0)
 88 | 
 89 | def collator(items, max_node, multi_hop_max_dist, spatial_pos_max):
 90 |     data = []
 91 |     for item in items:
 92 |         [x,
 93 |          adj,
 94 |          attn_bias,
 95 |          attn_edge_type,
 96 |          spatial_pos,
 97 |          in_degree,
 98 |          out_degree,
 99 |          edge_input,
100 |          label,
101 |          idx]= item
102 |         if item is not None and x.size(0) <= max_node:
103 |             edge_input = edge_input[:, :, :multi_hop_max_dist, :]
104 |             item = [attn_bias,
105 |                     attn_edge_type,
106 |                     spatial_pos,
107 |                     in_degree,
108 |                     out_degree,
109 |                     x,
110 |                     edge_input,
111 |                     label,
112 |                     idx]
113 |             data.append(item)
114 |     attn_biases, attn_edge_types, spatial_poses, in_degrees, out_degrees, xs, edge_inputs, ys, idxs = zip(
115 |         *data)
116 |     for idx, _ in enumerate(attn_biases):
117 |         attn_biases[idx][1:, 1:][spatial_poses[idx] >= spatial_pos_max] = float('-inf')
118 |     max_node_num = max(i.size(0) for i in xs)
119 |     max_dist = max(i.size(-2) for i in edge_inputs)
120 | 
121 |     y = torch.cat(ys)
122 | 
123 |     x = torch.cat([pad_2d_unsqueeze(i, max_node_num) for i in xs])
124 |     edge_input = torch.cat([pad_3d_unsqueeze(
125 |         i, max_node_num, max_node_num, max_dist) for i in edge_inputs])
126 |     attn_bias = torch.cat([pad_attn_bias_unsqueeze(
127 |         i, max_node_num + 1) for i in attn_biases])
128 |     attn_edge_type = torch.cat(
129 |         [pad_edge_type_unsqueeze(i, max_node_num) for i in attn_edge_types])
130 |     spatial_pos = torch.cat([pad_spatial_pos_unsqueeze(i, max_node_num)
131 |                         for i in spatial_poses])
132 |     in_degree = torch.cat([pad_1d_unsqueeze(i, max_node_num)
133 |                           for i in in_degrees])
134 |     out_degree = torch.cat([pad_1d_unsqueeze(i, max_node_num)
135 |                            for i in out_degrees])
136 | 
137 |     #generate batch_data
138 |     return Batch(
139 |         idx=torch.LongTensor(idxs),
140 |         attn_bias=attn_bias,
141 |         attn_edge_type=attn_edge_type,
142 |         spatial_pos=spatial_pos,
143 |         in_degree=in_degree,
144 |         out_degree=out_degree,
145 |         x=x,
146 |         edge_input=edge_input,
147 |         y=y,
148 |     )
149 | 


--------------------------------------------------------------------------------
/ACNet/Models/Graphormer/data.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT License.
  3 | 
  4 | from Models.Graphormer.collator import collator
  5 | from Models.Graphormer.wrapper import MyPygPCQM4MDataset, MyGraphPropPredDataset, MyZINCDataset
  6 | 
  7 | from pytorch_lightning import LightningDataModule
  8 | import torch
  9 | from torch.nn import functional as F
 10 | from torch.utils.data import DataLoader
 11 | import ogb
 12 | import ogb.lsc
 13 | import ogb.graphproppred
 14 | from functools import partial
 15 | 
 16 | 
 17 | dataset = None
 18 | 
 19 | 
 20 | def get_dataset(dataset_name='abaaba'):
 21 |     global dataset
 22 |     if dataset is not None:
 23 |         return dataset
 24 | 
 25 |     # max_node is set to max(max(num_val_graph_nodes), max(num_test_graph_nodes))
 26 |     if dataset_name == 'ogbg-molpcba':
 27 |         dataset = {
 28 |             'num_class': 128,
 29 |             'loss_fn': F.binary_cross_entropy_with_logits,
 30 |             'metric': 'ap',
 31 |             'metric_mode': 'max',
 32 |             'evaluator': ogb.graphproppred.Evaluator('ogbg-molpcba'),
 33 |             'dataset': MyGraphPropPredDataset('ogbg-molpcba', root='../../dataset'),
 34 |             'max_node': 128,
 35 |         }
 36 |     elif dataset_name == 'ogbg-molhiv':
 37 |         dataset = {
 38 |             'num_class': 1,
 39 |             'loss_fn': F.binary_cross_entropy_with_logits,
 40 |             'metric': 'rocauc',
 41 |             'metric_mode': 'max',
 42 |             'evaluator': ogb.graphproppred.Evaluator('ogbg-molhiv'),
 43 |             'dataset': MyGraphPropPredDataset('ogbg-molhiv', root='../../dataset'),
 44 |             'max_node': 128,
 45 |         }
 46 |     elif dataset_name == 'PCQM4M-LSC':
 47 |         dataset = {
 48 |             'num_class': 1,
 49 |             'loss_fn': F.l1_loss,
 50 |             'metric': 'mae',
 51 |             'metric_mode': 'min',
 52 |             'evaluator': ogb.lsc.PCQM4MEvaluator(),
 53 |             'dataset': MyPygPCQM4MDataset(root='../../dataset'),
 54 |             'max_node': 128,
 55 |         }
 56 |     elif dataset_name == 'ZINC':
 57 |         dataset = {
 58 |             'num_class': 1,
 59 |             'loss_fn': F.l1_loss,
 60 |             'metric': 'mae',
 61 |             'metric_mode': 'min',
 62 |             'evaluator': ogb.lsc.PCQM4MEvaluator(),  # same objective function, so reuse it
 63 |             'train_dataset': MyZINCDataset(subset=True, root='../../dataset/pyg_zinc', split='train'),
 64 |             'valid_dataset': MyZINCDataset(subset=True, root='../../dataset/pyg_zinc', split='val'),
 65 |             'test_dataset': MyZINCDataset(subset=True, root='../../dataset/pyg_zinc', split='test'),
 66 |             'max_node': 128,
 67 |         }
 68 | 
 69 |     else:
 70 |         raise NotImplementedError
 71 | 
 72 |     print(f' > {dataset_name} loaded!')
 73 |     print(dataset)
 74 |     print(f' > dataset info ends')
 75 |     return dataset
 76 | 
 77 | 
 78 | class GraphDataModule(LightningDataModule):
 79 |     name = "OGB-GRAPH"
 80 | 
 81 |     def __init__(
 82 |         self,
 83 |         dataset_name: str = 'ogbg-molpcba',
 84 |         num_workers: int = 4,
 85 |         batch_size: int = 128,
 86 |         seed: int = 42,
 87 |         multi_hop_max_dist: int = 5,
 88 |         spatial_pos_max: int = 1024,
 89 |         *args,
 90 |         **kwargs,
 91 |     ):
 92 |         super().__init__(*args, **kwargs)
 93 |         self.dataset_name = dataset_name
 94 |         self.dataset = get_dataset(self.dataset_name)
 95 | 
 96 |         self.num_workers = num_workers
 97 |         self.batch_size = batch_size
 98 |         self.dataset_train = ...
 99 |         self.dataset_val = ...
100 |         self.multi_hop_max_dist = multi_hop_max_dist
101 |         self.spatial_pos_max = spatial_pos_max
102 | 
103 |     def setup(self, stage: str = None):
104 |         if self.dataset_name == 'ZINC':
105 |             self.dataset_train = self.dataset['train_dataset']
106 |             self.dataset_val = self.dataset['valid_dataset']
107 |             self.dataset_test = self.dataset['test_dataset']
108 |         else:
109 |             split_idx = self.dataset['dataset'].get_idx_split()
110 |             self.dataset_train = self.dataset['dataset'][split_idx["train"]]
111 |             self.dataset_val = self.dataset['dataset'][split_idx["valid"]]
112 |             self.dataset_test = self.dataset['dataset'][split_idx["test"]]
113 | 
114 |     def train_dataloader(self):
115 |         loader = DataLoader(
116 |             self.dataset_train,
117 |             batch_size=self.batch_size,
118 |             shuffle=True,
119 |             num_workers=self.num_workers,
120 |             pin_memory=True,
121 |             collate_fn=partial(collator, max_node=get_dataset(self.dataset_name)[
122 |                                'max_node'], multi_hop_max_dist=self.multi_hop_max_dist, spatial_pos_max=self.spatial_pos_max),
123 |         )
124 |         print('len(train_dataloader)', len(loader))
125 |         return loader
126 | 
127 |     def val_dataloader(self):
128 |         loader = DataLoader(
129 |             self.dataset_val,
130 |             batch_size=self.batch_size,
131 |             shuffle=False,
132 |             num_workers=self.num_workers,
133 |             pin_memory=False,
134 |             collate_fn=partial(collator, max_node=get_dataset(self.dataset_name)[
135 |                                'max_node'], multi_hop_max_dist=self.multi_hop_max_dist, spatial_pos_max=self.spatial_pos_max),
136 |         )
137 |         print('len(val_dataloader)', len(loader))
138 |         return loader
139 | 
140 |     def test_dataloader(self):
141 |         loader = DataLoader(
142 |             self.dataset_test,
143 |             batch_size=self.batch_size,
144 |             shuffle=False,
145 |             num_workers=self.num_workers,
146 |             pin_memory=False,
147 |             collate_fn=partial(collator, max_node=get_dataset(self.dataset_name)[
148 |                                'max_node'], multi_hop_max_dist=self.multi_hop_max_dist, spatial_pos_max=self.spatial_pos_max),
149 |         )
150 |         print('len(test_dataloader)', len(loader))
151 |         return loader
152 | 


--------------------------------------------------------------------------------
/ACNet/Models/Graphormer/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from Cython.Build import cythonize
3 | import numpy
4 | setup(name='Alogs app',
5 |       ext_modules=cythonize('algos.pyx'),
6 |       include_dirs=[numpy.get_include()])
7 | 


--------------------------------------------------------------------------------
/ACNet/Models/Graphormer/wrapper.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT License.
  3 | 
  4 | import torch
  5 | import numpy as np
  6 | import torch_geometric.datasets
  7 | from ogb.lsc.pcqm4m_pyg import PygPCQM4MDataset
  8 | from ogb.graphproppred import PygGraphPropPredDataset
  9 | import pyximport
 10 | 
 11 | pyximport.install(setup_args={'include_dirs': np.get_include()})
 12 | from Models.Graphormer.algos import *
 13 | 
 14 | 
 15 | def convert_to_single_emb(x, offset=512):
 16 |     feature_num = x.size(1) if len(x.size()) > 1 else 1
 17 |     feature_offset = 1 + \
 18 |         torch.arange(0, feature_num * offset, offset, dtype=torch.long)
 19 |     x = x + feature_offset
 20 |     return x
 21 | 
 22 | def preprocess_item(item):
 23 |     # edge_index [2,num_edges]
 24 |     # edge_attr [num_nodes, 3]
 25 |     # x: node_feature [num_nodes,9]
 26 |     # y: graph_label [num_graphs,1]
 27 |     edge_attr, edge_index, x = item.edge_attr, item.edge_index, item.x
 28 |     N = x.size(0)
 29 |     x = convert_to_single_emb(x)
 30 | 
 31 |     # node adj matrix [N, N] bool
 32 |     adj = torch.zeros([N, N], dtype=torch.bool)
 33 |     adj[edge_index[0, :], edge_index[1, :]] = True
 34 | 
 35 |     # edge feature here
 36 |     if len(edge_attr.size()) == 1:
 37 |         edge_attr = edge_attr[:, None]
 38 |     attn_edge_type = torch.zeros([N, N, edge_attr.size(-1)], dtype=torch.long)
 39 |     attn_edge_type[edge_index[0, :], edge_index[1, :]
 40 |                    ] = convert_to_single_emb(edge_attr) + 1
 41 | 
 42 |     shortest_path_result, path = algos.floyd_warshall(adj.numpy())
 43 |     max_dist = np.amax(shortest_path_result)
 44 |     edge_input = algos.gen_edge_input(max_dist, path, attn_edge_type.numpy())
 45 |     spatial_pos = torch.from_numpy((shortest_path_result)).long()
 46 |     attn_bias = torch.zeros(
 47 |         [N + 1, N + 1], dtype=torch.float)  # with graph token
 48 | 
 49 |     # combine
 50 |     item.x = x
 51 |     item.adj = adj
 52 |     item.attn_bias = attn_bias
 53 |     item.attn_edge_type = attn_edge_type
 54 |     item.spatial_pos = spatial_pos
 55 |     item.in_degree = adj.long().sum(dim=1).view(-1)
 56 |     item.out_degree = adj.long().sum(dim=0).view(-1)
 57 |     item.edge_input = torch.from_numpy(edge_input).long()
 58 | 
 59 |     return item
 60 | 
 61 | 
 62 | class MyGraphPropPredDataset(PygGraphPropPredDataset):
 63 |     def download(self):
 64 |         super(MyGraphPropPredDataset, self).download()
 65 | 
 66 |     def process(self):
 67 |         super(MyGraphPropPredDataset, self).process()
 68 | 
 69 |     def __getitem__(self, idx):
 70 |         if isinstance(idx, int):
 71 |             item = self.get(self.indices()[idx])
 72 |             item.idx = idx
 73 |             return preprocess_item(item)
 74 |         else:
 75 |             return self.index_select(idx)
 76 | 
 77 | 
 78 | class MyPygPCQM4MDataset(PygPCQM4MDataset):
 79 |     def download(self):
 80 |         super(MyPygPCQM4MDataset, self).download()
 81 | 
 82 |     def process(self):
 83 |         super(MyPygPCQM4MDataset, self).process()
 84 | 
 85 |     def __getitem__(self, idx):
 86 |         if isinstance(idx, int):
 87 |             item = self.get(self.indices()[idx])
 88 |             item.idx = idx
 89 |             return preprocess_item(item)
 90 |         else:
 91 |             return self.index_select(idx)
 92 | 
 93 | 
 94 | class MyZINCDataset(torch_geometric.datasets.ZINC):
 95 |     def download(self):
 96 |         super(MyZINCDataset, self).download()
 97 | 
 98 |     def process(self):
 99 |         super(MyZINCDataset, self).process()
100 | 
101 |     def __getitem__(self, idx):
102 |         if isinstance(idx, int):
103 |             item = self.get(self.indices()[idx])
104 |             item.idx = idx
105 |             return preprocess_item(item)
106 |         else:
107 |             return self.index_select(idx)
108 | 


--------------------------------------------------------------------------------
/ACNet/Pretrain8Few.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 13,
 7 |     'OnlyEval': False,
 8 |     'Finetune':True,
 9 | }
10 | 
11 | BasicParamList = {
12 |     'ExpName': 'ACFew',
13 |     'MainMetric': 'AUC',
14 |     'DataPath': './ACComponents/ACDataset/data_files/MMP_AC_Few_representation/Pretrain8.npz',
15 |     'RootPath': './TestExp/Few/Pretrain8/',
16 |     'CUDA_VISIBLE_DEVICES': '3',
17 |     'TaskNum': 1,
18 |     'ClassNum': 2,
19 |     'OutputSize': 2,
20 |     'Feature': 'Raw',
21 |     'Model': 'MLP',
22 | 
23 |     # if Feature == Raw
24 |     'RawFeatureSize': 512,
25 | 
26 |     'OnlySpecific': True,
27 |     'Weight': True,
28 |     'AC': True,
29 |     'PyG': False,
30 | 
31 |     'ValidRate': 40000,
32 |     'PrintRate': 5,
33 |     'UpdateRate': 1,
34 |     'SplitRate': [0.8, 0.1],
35 |     'Splitter': 'Random',
36 |     'MaxEpoch': 300,
37 |     'LowerThanMaxLimit': 12,
38 |     'DecreasingLimit': 8,
39 | 
40 |     # if OnlyEval == True:
41 |     'EvalModelPath': None,
42 |     'EvalDatasetPath': None,
43 |     'EvalLogAllPreds': None,
44 | 
45 |     'Scheduler': 'PolynomialDecayLR',
46 | 
47 |     # Params for PolynomialDecayLR only
48 |     'WarmupEpoch': 2,
49 |     'LRMaxEpoch':300,
50 |     'EndLR':1e-9,
51 |     'Power':1.0,
52 |     # Params for StepLR only
53 |     'LRStep': 30,
54 |     'LRGamma': 0.1,
55 |     ##########
56 | 
57 |     'WeightIniter': None,
58 | 
59 |     # Params for NormWeightIniter only
60 |     'InitMean' : 0,
61 |     'InitStd' : 1,
62 | 
63 |     # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here.
64 |     'SplitValidSeed': 8,
65 |     'SplitTestSeed': 8,
66 |     'BatchSize': 8,
67 | }
68 | AdjustableParamList = {}
69 | SpecificParamList = {
70 |     'DropRate':[0.4],
71 |     'WeightDecay':[5],
72 |     'lr':[3],
73 |     'DNNLayers':[[256,64]],
74 | }
75 | 
76 | 
77 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
78 | 
79 | expcontroller.ExperimentStart()
80 | 
81 | 


--------------------------------------------------------------------------------
/ACNet/PretrainGNNsFew.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 13,
 7 |     'OnlyEval': False,
 8 |     'Finetune':True,
 9 | }
10 | 
11 | BasicParamList = {
12 |     'ExpName': 'ACFew',
13 |     'MainMetric': 'AUC',
14 |     'DataPath': './ACComponents/ACDataset/data_files/MMP_AC_Few_representation/PretrainGNNs.npz',
15 |     'RootPath': './TestExp/Few/PretrainGNNs/',
16 |     'CUDA_VISIBLE_DEVICES': '3',
17 |     'TaskNum': 1,
18 |     'ClassNum': 2,
19 |     'OutputSize': 2,
20 |     'Feature': 'Raw',
21 |     'Model': 'MLP',
22 | 
23 |     # if Feature == Raw
24 |     'RawFeatureSize': 300,
25 | 
26 |     'OnlySpecific': True,
27 |     'Weight': True,
28 |     'AC': True,
29 |     'PyG': False,
30 | 
31 |     'ValidRate': 40000,
32 |     'PrintRate': 5,
33 |     'UpdateRate': 1,
34 |     'SplitRate': [0.8, 0.1],
35 |     'Splitter': 'Random',
36 |     'MaxEpoch': 300,
37 |     'LowerThanMaxLimit': 12,
38 |     'DecreasingLimit': 8,
39 | 
40 |     # if OnlyEval == True:
41 |     'EvalModelPath': None,
42 |     'EvalDatasetPath': None,
43 |     'EvalLogAllPreds': None,
44 | 
45 |     'Scheduler': 'PolynomialDecayLR',
46 | 
47 |     # Params for PolynomialDecayLR only
48 |     'WarmupEpoch': 2,
49 |     'LRMaxEpoch':300,
50 |     'EndLR':1e-9,
51 |     'Power':1.0,
52 |     # Params for StepLR only
53 |     'LRStep': 30,
54 |     'LRGamma': 0.1,
55 |     ##########
56 | 
57 |     'WeightIniter': None,
58 | 
59 |     # Params for NormWeightIniter only
60 |     'InitMean' : 0,
61 |     'InitStd' : 1,
62 | 
63 |     # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here.
64 |     'SplitValidSeed': 8,
65 |     'SplitTestSeed': 8,
66 |     'BatchSize': 8,
67 | 
68 | }
69 | AdjustableParamList = {}
70 | SpecificParamList = {
71 |     'DropRate':[0.2],
72 |     'WeightDecay':[4.5],
73 |     'lr':[3],
74 |     'DNNLayers':[[128]],
75 | }
76 | 
77 | 
78 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
79 | 
80 | expcontroller.ExperimentStart()
81 | 
82 | 


--------------------------------------------------------------------------------
/ACNet/SGCLarge.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 3,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACLarge',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Large.json',
14 |     'RootPath': './TestExp/Large/SGC/',
15 |     'CUDA_VISIBLE_DEVICES': '1',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'PyGSGC',
20 |     'Model': 'PyGSGC',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': True,
26 | 
27 |     'ValidRate': 4000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'SplitRate': [0.8, 0.1],
31 |     'Splitter': 'Random',
32 |     'MaxEpoch': 300,
33 |     'LowerThanMaxLimit': 30,
34 |     'DecreasingLimit': 12,
35 | 
36 |     # if OnlyEval == True:
37 |     'EvalModelPath': None,
38 |     'EvalDatasetPath': None,
39 |     'EvalLogAllPreds': None,
40 | 
41 |     'Scheduler': 'PolynomialDecayLR',
42 | 
43 |     # Params for PolynomialDecayLR only
44 |     'WarmupEpoch': 2,
45 |     'LRMaxEpoch':300,
46 |     'EndLR':1e-9,
47 |     'Power':1.0,
48 |     # Params for StepLR only
49 |     'LRStep': 30,
50 |     'LRGamma': 0.1,
51 |     ##########
52 | 
53 |     'WeightIniter': 'XavierNorm',
54 | 
55 |     # Params for NormWeightIniter only
56 |     'InitMean' : 0,
57 |     'InitStd' : 1,
58 | 
59 |     'AtomFeatureSize': 39,
60 |     'BondFeatureSize': 10,
61 | 
62 |     'GCNReadout': 'Add',
63 | 
64 | 
65 |     'SplitValidSeed': 8,
66 |     'SplitTestSeed': 8,
67 |     'BatchSize': 200,
68 | 
69 | }
70 | AdjustableParamList = {}
71 | SpecificParamList = {
72 |     'DropRate':[0.4],
73 |     'WeightDecay':[5],
74 |     'lr':[4],
75 |     'SGCInputSize': [128],
76 |     'SGCHiddenSize': [256],
77 |     'SGCK': [2],
78 |     'SGCLayers': [4],
79 |     'FPSize':[128],
80 |     'DNNLayers':[[64]],
81 | }
82 | 
83 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
84 | 
85 | expcontroller.ExperimentStart()
86 | 


--------------------------------------------------------------------------------
/ACNet/SGCMedium.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 64,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACMedium',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Medium.json',
14 |     'RootPath': './TestExp/Medium/SGC/',
15 |     'CUDA_VISIBLE_DEVICES': '1',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'PyGSGC',
20 |     'Model': 'PyGSGC',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': True,
26 | 
27 |     'ValidRate': 4000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'SplitRate': [0.8, 0.1],
31 |     'Splitter': 'Random',
32 |     'MaxEpoch': 300,
33 |     'LowerThanMaxLimit': 30,
34 |     'DecreasingLimit': 12,
35 | 
36 |     # if OnlyEval == True:
37 |     'EvalModelPath': None,
38 |     'EvalDatasetPath': None,
39 |     'EvalLogAllPreds': None,
40 | 
41 |     'Scheduler': 'PolynomialDecayLR',
42 | 
43 |     # Params for PolynomialDecayLR only
44 |     'WarmupEpoch': 2,
45 |     'LRMaxEpoch':300,
46 |     'EndLR':1e-9,
47 |     'Power':1.0,
48 |     # Params for StepLR only
49 |     'LRStep': 30,
50 |     'LRGamma': 0.1,
51 |     ##########
52 | 
53 |     'WeightIniter': 'XavierNorm',
54 | 
55 |     # Params for NormWeightIniter only
56 |     'InitMean' : 0,
57 |     'InitStd' : 1,
58 | 
59 |     'AtomFeatureSize': 39,
60 |     'BondFeatureSize': 10,
61 | 
62 |     'GCNReadout': 'Add',
63 | 
64 | 
65 |     'SplitValidSeed': 8,
66 |     'SplitTestSeed': 8,
67 |     'BatchSize': 200,
68 | 
69 | }
70 | AdjustableParamList = {}
71 | SpecificParamList = {
72 |     'DropRate':[0.2],
73 |     'WeightDecay':[4.5],
74 |     'lr':[3],
75 |     'SGCInputSize': [64],
76 |     'SGCHiddenSize': [64],
77 |     'SGCK': [2],
78 |     'SGCLayers': [3],
79 |     'FPSize':[32],
80 |     'DNNLayers':[[]],
81 | }
82 | 
83 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
84 | 
85 | expcontroller.ExperimentStart()
86 | 


--------------------------------------------------------------------------------
/ACNet/SGCSmall.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 110,
 7 |     'OnlyEval': False,
 8 | }
 9 | 
10 | BasicParamList = {
11 |     'ExpName': 'ACSmall',
12 |     'MainMetric': 'AUC',
13 |     'DataPath': './ACComponents/ACDataset/data_files/generated_datasets/MMP_AC_Small.json',
14 |     'RootPath': './TestExp/Small/SGC/',
15 |     'CUDA_VISIBLE_DEVICES': '1',
16 |     'TaskNum': 1,
17 |     'ClassNum': 2,
18 |     'OutputSize': 2,
19 |     'Feature': 'PyGSGC',
20 |     'Model': 'PyGSGC',
21 | 
22 |     'OnlySpecific': True,
23 |     'Weight': True,
24 |     'AC': True,
25 |     'PyG': True,
26 | 
27 |     'ValidRate': 4000,
28 |     'PrintRate': 5,
29 |     'UpdateRate': 1,
30 |     'SplitRate': [0.8, 0.1],
31 |     'Splitter': 'Random',
32 |     'MaxEpoch': 300,
33 |     'LowerThanMaxLimit': 30,
34 |     'DecreasingLimit': 12,
35 | 
36 |     # if OnlyEval == True:
37 |     'EvalModelPath': None,
38 |     'EvalDatasetPath': None,
39 |     'EvalLogAllPreds': None,
40 | 
41 |     'Scheduler': 'PolynomialDecayLR',
42 | 
43 |     # Params for PolynomialDecayLR only
44 |     'WarmupEpoch': 2,
45 |     'LRMaxEpoch':300,
46 |     'EndLR':1e-9,
47 |     'Power':1.0,
48 |     # Params for StepLR only
49 |     'LRStep': 30,
50 |     'LRGamma': 0.1,
51 |     ##########
52 | 
53 |     'WeightIniter': 'XavierNorm',
54 | 
55 |     # Params for NormWeightIniter only
56 |     'InitMean' : 0,
57 |     'InitStd' : 1,
58 | 
59 |     'AtomFeatureSize': 39,
60 |     'BondFeatureSize': 10,
61 | 
62 |     'GCNReadout': 'Add',
63 | 
64 | 
65 |     'SplitValidSeed': 8,
66 |     'SplitTestSeed': 8,
67 |     'BatchSize': 32,
68 | 
69 | }
70 | AdjustableParamList = {}
71 | SpecificParamList = {
72 |     'DropRate':[0.2],
73 |     'WeightDecay':[4.5],
74 |     'lr':[3],
75 |     'SGCInputSize': [64],
76 |     'SGCHiddenSize': [128],
77 |     'SGCK': [1],
78 |     'SGCLayers': [3],
79 |     'FPSize':[64],
80 |     'DNNLayers':[[]],
81 | }
82 | 
83 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
84 | 
85 | expcontroller.ExperimentStart()
86 | 


--------------------------------------------------------------------------------
/ACNet/SMILESTransformerFew.py:
--------------------------------------------------------------------------------
 1 | from ACComponents.ACProcessControllers import *
 2 | 
 3 | ExpOptions = {
 4 |     'Search': 'greedy',
 5 |     'SeedPerOpt': 3,
 6 |     'SubsetsNum': 13,
 7 |     'OnlyEval': False,
 8 |     'Finetune':True,
 9 | }
10 | 
11 | BasicParamList = {
12 |     'ExpName': 'ACFew',
13 |     'MainMetric': 'AUC',
14 |     'DataPath': './ACComponents/ACDataset/data_files/MMP_AC_Few_representation/SMILESTransformer.npz',
15 |     'RootPath': './TestExp/Few/SMILESTransformer/',
16 |     'CUDA_VISIBLE_DEVICES': '3',
17 |     'TaskNum': 1,
18 |     'ClassNum': 2,
19 |     'OutputSize': 2,
20 |     'Feature': 'Raw',
21 |     'Model': 'MLP',
22 | 
23 |     # if Feature == Raw
24 |     'RawFeatureSize': 1024,
25 | 
26 |     'OnlySpecific': True,
27 |     'Weight': True,
28 |     'AC': True,
29 |     'PyG': False,
30 | 
31 |     'ValidRate': 40000,
32 |     'PrintRate': 5,
33 |     'UpdateRate': 1,
34 |     'SplitRate': [0.8, 0.1],
35 |     'Splitter': 'Random',
36 |     'MaxEpoch': 300,
37 |     'LowerThanMaxLimit': 12,
38 |     'DecreasingLimit': 8,
39 | 
40 |     # if OnlyEval == True:
41 |     'EvalModelPath': None,
42 |     'EvalDatasetPath': None,
43 |     'EvalLogAllPreds': None,
44 | 
45 |     'Scheduler': 'PolynomialDecayLR',
46 | 
47 |     # Params for PolynomialDecayLR only
48 |     'WarmupEpoch': 2,
49 |     'LRMaxEpoch':300,
50 |     'EndLR':1e-9,
51 |     'Power':1.0,
52 |     # Params for StepLR only
53 |     'LRStep': 30,
54 |     'LRGamma': 0.1,
55 |     ##########
56 | 
57 |     'WeightIniter': None,
58 | 
59 |     # Params for NormWeightIniter only
60 |     'InitMean' : 0,
61 |     'InitStd' : 1,
62 | 
63 |     # Training Params to be adujsted. If the param is not needed to be adjusted, set the value here.
64 |     'SplitValidSeed': 8,
65 |     'SplitTestSeed': 8,
66 |     'BatchSize': 8,
67 | }
68 | AdjustableParamList = {}
69 | SpecificParamList = {
70 |     'DropRate':[0.2],
71 |     'WeightDecay':[4.5],
72 |     'lr':[3],
73 |     'DNNLayers':[[128]],
74 | }
75 | 
76 | 
77 | expcontroller = ACExperimentProcessController(ExpOptions, [BasicParamList, AdjustableParamList, SpecificParamList])
78 | 
79 | expcontroller.ExperimentStart()
80 | 
81 | 


--------------------------------------------------------------------------------
/ACNet/TrainingFramework/Dataset.py:
--------------------------------------------------------------------------------
 1 | from TrainingFramework.FileUtils import *
 2 | from TrainingFramework.Splitter import *
 3 | from TrainingFramework.Featurizer import *
 4 | from torch.utils import data
 5 | from torch_geometric.data import InMemoryDataset
 6 | import os
 7 | 
 8 | class PyGMolDataset(InMemoryDataset):
 9 |     def __init__(self, graphdataset, opt, mode):
10 |         self.graph_dataset = graphdataset
11 |         self.opt = opt
12 |         # todo(zqzhang): updated in ACv7
13 |         self.dataset_path_root = self.opt.args['ExpDir'] + 'Dataset/'
14 |         if not os.path.exists(self.dataset_path_root):
15 |             os.mkdir(self.dataset_path_root)
16 |         self.mode = mode
17 |         if os.path.exists(self.dataset_path_root + 'processed/' + self.processed_file_names[0]):
18 |             os.remove(self.dataset_path_root + 'processed/' + self.processed_file_names[0])
19 |         super(PyGMolDataset, self).__init__(root = self.dataset_path_root)
20 |         self.data, self.slices = t.load(self.processed_paths[0])
21 | 
22 |     @property
23 |     def raw_file_names(self):
24 |         return [self.opt.args['DataPath']]
25 | 
26 |     @property
27 |     def processed_file_names(self):
28 |         return [self.opt.args['ExpName'] + '_' + self.mode + '.pt']
29 | 
30 |     def download(self):
31 |         pass
32 | 
33 |     def process(self):
34 |         data_list = self.graph_dataset
35 |         data, slices = self.collate(data_list)
36 |         # print("Processed without saving complete.")
37 |         print("Saving processed files...")
38 |         t.save((data, slices), self.processed_paths[0])
39 |         print('Saving complete!')
40 | 
41 |     # def __len__(self):
42 |     #     return len(self.graph_dataset)
43 | 


--------------------------------------------------------------------------------
/ACNet/TrainingFramework/FileUtils.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import json
  3 | 
  4 | class FileLoader(object):
  5 |     def __init__(self, file_path):
  6 |         super(FileLoader, self).__init__()
  7 |         self.path = file_path
  8 | 
  9 |     def load(self):
 10 |         with open(self.path, 'r') as f:
 11 |             raw_data = f.readlines()
 12 |         return raw_data
 13 | 
 14 | class JsonFileLoader(object):
 15 |     def __init__(self, file_path):
 16 |         super(JsonFileLoader, self).__init__()
 17 |         self.path = file_path
 18 | 
 19 |     def load(self):
 20 |         with open(self.path, 'r') as f:
 21 |             raw_dataset = json.load(f)
 22 |         return raw_dataset
 23 | 
 24 | # todo(zqzhang): updated in TPv7
 25 | class PTFileLoader(object):
 26 |     def __init__(self, file_path):
 27 |         super(PTFileLoader, self).__init__()
 28 |         self.path = file_path
 29 | 
 30 |     def load(self):
 31 |         import torch as t
 32 |         import numpy as np
 33 |         Content = t.load(self.path, map_location = 'cpu')
 34 |         if Content.__class__ == t.Tensor:
 35 |             Content = Content.cpu()
 36 |         Content = np.array(Content)
 37 |         return Content
 38 | 
 39 | class NpyFileLoader(object):
 40 |     def __init__(self, file_path):
 41 |         super(NpyFileLoader, self).__init__()
 42 |         self.path = file_path
 43 | 
 44 |     def load(self):
 45 |         import numpy as np
 46 |         Content = np.load(self.path)
 47 |         return Content
 48 | 
 49 | class NpzFileLoader(object):
 50 |     def __init__(self, file_path):
 51 |         super(NpzFileLoader, self).__init__()
 52 |         self.path = file_path
 53 | 
 54 |     def load(self):
 55 |         import numpy as np
 56 |         Content = np.load(self.path)
 57 |         return Content
 58 | 
 59 | ##############################################
 60 | # Parse files for different dataset files
 61 | ##############################################
 62 | class BasicFileParser(object):
 63 |     def __init__(self):
 64 |         super(BasicFileParser, self).__init__()
 65 | 
 66 |     def _parse_line(self, line):
 67 |         raise NotImplementedError(
 68 |             "Line parser not implemented."
 69 |         )
 70 | 
 71 |     def parse_file(self, raw_data):
 72 |         Dataset = []
 73 |         for line in raw_data:
 74 |             data = self._parse_line(line)
 75 |             Dataset.append(data)
 76 |         return Dataset
 77 | 
 78 | class HIVFileParser(BasicFileParser):
 79 |     def __init__(self):
 80 |         super(HIVFileParser, self).__init__()
 81 | 
 82 |     def _parse_line(self, line):
 83 |         data = re.split(',', line)
 84 |         SMILES = data[0]
 85 |         Value = data[1]
 86 |         Value = re.split('\n', Value)[0]
 87 |         return {'SMILES': SMILES, 'Value': Value}
 88 | 
 89 | 
 90 | class BBBPFileParser(BasicFileParser):
 91 |     def __init__(self):
 92 |         super(BBBPFileParser, self).__init__()
 93 | 
 94 |     def _parse_line(self, line):
 95 |         data = re.split(',', line)
 96 |         SMILES = data[0]
 97 |         Value = data[1]
 98 |         Value = re.split('\n', Value)[0]
 99 |         return {'SMILES': SMILES, 'Value': Value}
100 | 
101 | 
102 | class BACEFileParser(BasicFileParser):
103 |     def __init__(self):
104 |         super(BACEFileParser, self).__init__()
105 | 
106 |     def _parse_line(self, line):
107 |         data = re.split(',', line)
108 |         SMILES = data[0]
109 |         Value = data[1]
110 |         Value = re.split('\n', Value)[0]
111 |         return {'SMILES': SMILES, 'Value': Value}
112 | 
113 | 
114 | class QM9FileParser(BasicFileParser):
115 |     def __init__(self):
116 |         super(QM9FileParser, self).__init__()
117 | 
118 |     def _parse_line(self, line):
119 |         data = re.split(',', line)
120 |         SMILES = data[0]
121 |         Value = data[1:]
122 |         Value[-1] = re.split('\n', Value[-1])[0]
123 |         return {'SMILES': SMILES, 'Value': Value}
124 | 
125 | 
126 | class FreeSolvFileParser(BasicFileParser):
127 |     def __init__(self):
128 |         super(FreeSolvFileParser, self).__init__()
129 | 
130 |     def _parse_line(self, line):
131 |         data = re.split(',', line)
132 |         SMILES = data[0]
133 |         Value = data[1]
134 |         Value = re.split('\n', Value)[0]
135 |         return {'SMILES': SMILES, 'Value': Value}
136 | 
137 | 
138 | class LipopFileParser(BasicFileParser):
139 |     def __init__(self):
140 |         super(LipopFileParser, self).__init__()
141 | 
142 |     def _parse_line(self, line):
143 |         data = re.split(',', line)
144 |         SMILES = data[0]
145 |         Value = data[1]
146 |         Value = re.split('\n', Value)[0]
147 |         return {'SMILES': SMILES, 'Value': Value}
148 | 
149 | 
150 | class MalariaFileParser(BasicFileParser):
151 |     def __init__(self):
152 |         super(MalariaFileParser, self).__init__()
153 | 
154 |     def _parse_line(self, line):
155 |         data = re.split(',', line)
156 |         SMILES = data[0]
157 |         Value = data[1]
158 |         Value = re.split('\n', Value)[0]
159 |         return {'SMILES': SMILES, 'Value': Value}
160 | 
161 | 
162 | class CEPFileParser(BasicFileParser):
163 |     def __init__(self):
164 |         super(CEPFileParser, self).__init__()
165 | 
166 |     def _parse_line(self, line):
167 |         data = re.split(',', line)
168 |         SMILES = data[0]
169 |         Value = data[1]
170 |         Value = re.split('\n', Value)[0]
171 |         return {'SMILES': SMILES, 'Value': Value}
172 | 
173 | 
174 | class SHP2FileParser(BasicFileParser):
175 |     def __init__(self):
176 |         super(SHP2FileParser, self).__init__()
177 | 
178 |     def _parse_line(self, line):
179 |         data = re.split(',', line)
180 |         SMILES = data[0]
181 |         Value = data[1]
182 |         Value = re.split('\n', Value)[0]
183 |         return {'SMILES': SMILES, 'Value': Value}
184 | 
185 | 
186 | class Tox21FileParser(BasicFileParser):
187 |     def __init__(self):
188 |         super(Tox21FileParser, self).__init__()
189 | 
190 |     def _parse_line(self, line):
191 |         data = re.split(',', line)
192 |         SMILES = data[0]
193 |         Value = data[1:]
194 |         Value[-1] = re.split('\n', Value[-1])[0]
195 |         for i in range(len(Value)):
196 |             value = Value[i]
197 |             if value == '':
198 |                 Value[i] = '-1'
199 |         return {'SMILES': SMILES, 'Value': Value}
200 | 
201 | 
202 | class ToxcastFileParser(BasicFileParser):
203 |     def __init__(self):
204 |         super(ToxcastFileParser, self).__init__()
205 | 
206 |     def _parse_line(self, line):
207 |         # Convert '1.0/0.0' to '1/0'
208 |         # Convert missing value '' to '-1'
209 |         data = re.split(',', line)
210 |         SMILES = data[0]
211 |         Value = data[1:]
212 |         Value[-1] = re.split('\n', Value[-1])[0]
213 |         for i in range(len(Value)):
214 |             value = Value[i]
215 |             if value == '':
216 |                 Value[i] = '-1'
217 |             elif value == '0.0':
218 |                 Value[i] = '0'
219 |             elif value == '1.0':
220 |                 Value[i] = '1'
221 |         return {'SMILES': SMILES, 'Value': Value}
222 | 
223 | 
224 | class MUVFileParser(BasicFileParser):
225 |     def __init__(self):
226 |         super(MUVFileParser, self).__init__()
227 | 
228 |     def _parse_line(self, line):
229 |         data = re.split(',', line)
230 |         SMILES = data[0]
231 |         Value = data[1:]
232 |         Value[-1] = re.split('\n', Value[-1])[0]
233 |         for i in range(len(Value)):
234 |             value = Value[i]
235 |             if value == '':
236 |                 Value[i] = '-1'
237 |         return {"SMILES": SMILES, 'Value': Value}
238 | 
239 | 
240 | class ClinToxFileParser(BasicFileParser):
241 |     def __init__(self):
242 |         super(ClinToxFileParser, self).__init__()
243 | 
244 |     def _parse_line(self, line):
245 |         data = re.split(',', line)
246 |         SMILES = data[0]
247 |         Value = data[1:]
248 |         Value[-1] = re.split('\n', Value[-1])[0]
249 |         return {'SMILES': SMILES, 'Value': Value}
250 | 
251 | 
252 | class SIDERFileParser(BasicFileParser):
253 |     def __init__(self):
254 |         super(SIDERFileParser, self).__init__()
255 | 
256 |     def _parse_line(self, line):
257 |         data = re.split(',', line)
258 |         SMILES = data[0]
259 |         Value = data[1:]
260 |         Value[-1] = re.split('\n', Value[-1])[0]
261 |         return {'SMILES': SMILES, 'Value': Value}
262 | 
263 | 
264 | class ESOLFileParser(BasicFileParser):
265 |     def __init__(self):
266 |         super(ESOLFileParser, self).__init__()
267 | 
268 |     def _parse_line(self, line):
269 |         data = re.split(',', line)
270 |         SMILES = data[0]
271 |         Value = data[1]
272 |         Value = re.split('\n', Value)[0]
273 |         return {'SMILES': SMILES, 'Value': Value}
274 | ################################################


--------------------------------------------------------------------------------
/ACNet/TrainingFramework/Initializer.py:
--------------------------------------------------------------------------------
 1 | import torch as t
 2 | import torch.nn as nn
 3 | 
 4 | class Initializer(object):
 5 |     def __init__(self):
 6 |         super(Initializer, self).__init__()
 7 | 
 8 |     def WeightInit(self, tensor):
 9 |         self._init_func(tensor)
10 | 
11 |     def _init_func(self, tensor):
12 |         raise NotImplementedError("Weight Initialization Function is not implemented.")
13 | 
14 | 
15 | class NormalInitializer(Initializer):
16 |     def __init__(self, opt):
17 |         self.opt = opt
18 |         super(NormalInitializer, self).__init__()
19 | 
20 |     def _init_func(self, tensor):
21 |         mean = self.opt.args['InitMean']
22 |         std = self.opt.args['InitStd']
23 |         nn.init.normal_(tensor, mean, std)
24 | 
25 | 
26 | class XavierNormalInitializer(Initializer):
27 |     def __init__(self):
28 |         super(XavierNormalInitializer, self).__init__()
29 | 
30 |     def _init_func(self, tensor):
31 |         if tensor.dim() == 1:
32 |             nn.init.constant_(tensor, 0)
33 |         else:
34 |             nn.init.xavier_normal_(tensor)


--------------------------------------------------------------------------------
/ACNet/TrainingFramework/Metrics.py:
--------------------------------------------------------------------------------
 1 | import torch as t
 2 | from sklearn.metrics import roc_auc_score
 3 | import torch.nn.functional as F
 4 | 
 5 | class ACC(object):
 6 |     def __init__(self):
 7 |         super(ACC, self).__init__()
 8 |         self.name = 'ACC'
 9 | 
10 |     def compute(self, answer, label):
11 |         assert len(answer) == len(label)
12 |         total = len(answer)
13 |         answer = t.Tensor(answer)
14 |         label = t.Tensor(label)
15 |         pred = t.argmax(answer, dim=1)
16 |         correct = sum(pred == label).float()
17 |         acc = correct / total
18 |         return acc.item()
19 | 
20 | 
21 | class AUC(object):
22 |     def __init__(self):
23 |         super(AUC, self).__init__()
24 |         self.name = 'AUC'
25 | 
26 |     def compute(self, answer, label):
27 |         assert len(answer) == len(label)
28 |         answer = t.Tensor(answer)
29 |         answer = answer[:,1]
30 |         answer = answer.tolist()
31 |         result = roc_auc_score(y_true = label, y_score= answer)
32 |         return result
33 | 
34 | 
35 | class MAE(object):
36 |     def __init__(self):
37 |         super(MAE, self).__init__()
38 |         self.name = 'MAE'
39 | 
40 |     def compute(self, answer, label):
41 |         assert len(answer) == len(label)
42 |         answer = t.Tensor(answer).squeeze(-1)
43 |         label = t.Tensor(label)
44 |         MAE = F.l1_loss(answer, label, reduction = 'mean')
45 |         return MAE.item()
46 | 
47 | class RMSE(object):
48 |     def __init__(self):
49 |         super(RMSE, self).__init__()
50 |         self.name = 'RMSE'
51 | 
52 |     def compute(self, answer, label):
53 |         assert len(answer) == len(label)
54 |         answer = t.Tensor(answer).squeeze(-1)
55 |         label = t.Tensor(label)
56 |         RMSE = F.mse_loss(answer, label, reduction = 'mean').sqrt()
57 |         return RMSE.item()
58 | 


--------------------------------------------------------------------------------
/ACNet/TrainingFramework/Scheduler.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.optim.lr_scheduler import _LRScheduler
 3 | 
 4 | 
 5 | class PolynomialDecayLR(_LRScheduler):
 6 | 
 7 |     def __init__(self, optimizer, warmup_updates, tot_updates, lr, end_lr, power, last_epoch=-1, verbose=False):
 8 |         self.warmup_updates = warmup_updates
 9 |         self.tot_updates = tot_updates
10 |         self.lr = lr
11 |         self.end_lr = end_lr
12 |         self.power = power
13 |         super(PolynomialDecayLR, self).__init__(optimizer, last_epoch, verbose)
14 | 
15 |     def get_lr(self):
16 |         if self._step_count <= self.warmup_updates:
17 |             self.warmup_factor = self._step_count / float(self.warmup_updates)
18 |             lr = self.warmup_factor * self.lr
19 |         elif self._step_count >= self.tot_updates:
20 |             lr = self.end_lr
21 |         else:
22 |             warmup = self.warmup_updates
23 |             lr_range = self.lr - self.end_lr
24 |             pct_remaining = 1 - (self._step_count - warmup) / (
25 |                 self.tot_updates - warmup
26 |             )
27 |             lr = lr_range * pct_remaining ** (self.power) + self.end_lr
28 | 
29 |         # todo(zqzhang): updated in TPv7
30 |         print(f"lr: {lr}")
31 |         return [lr for group in self.optimizer.param_groups]
32 | 
33 |     def _get_closed_form_lr(self):
34 |         assert False
35 | 
36 | 
37 | class EmptyLRSchedular(_LRScheduler):
38 | 
39 |     def __init__(self, optimizer, lr, last_epoch=-1, verbose=False):
40 |         self.lr = lr
41 |         super(EmptyLRSchedular, self).__init__(optimizer, last_epoch, verbose)
42 | 
43 | 
44 |     def get_lr(self):
45 |         lr = self.lr
46 |         return [lr for group in self.optimizer.param_groups]


--------------------------------------------------------------------------------
/ACNet/TrainingFramework/Splitter.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import torch
  3 | from TrainingFramework.ChemUtils import *
  4 | #from ProcessControllers import *
  5 | 
  6 | class BasicSplitter(object):
  7 |     # A splitter module is used to split a dataset
  8 |     # with a entire dataset given, the splitter will return the index of the samples of different subsets,
  9 |     # or return the subsets directly.
 10 |     # return: (sets), (sets_index)
 11 |     def __init__(self):
 12 |         super(BasicSplitter, self).__init__()
 13 | 
 14 |     def split(self, dataset, opt):
 15 |         raise NotImplementedError(
 16 |             'Dataset splitter not implemented.'
 17 |         )
 18 | 
 19 | class RandomSplitter(BasicSplitter):
 20 |     # Module for randomly splitting dataset
 21 |     def __init__(self):
 22 |         super(RandomSplitter, self).__init__()
 23 | 
 24 |     def CheckClass(self, dataset, tasknum):
 25 |         # To check whether both classes of samples appear in the dataset.
 26 |         c0cnt = np.zeros(tasknum)
 27 |         c1cnt = np.zeros(tasknum)
 28 |         for data in dataset:
 29 |             value = data['Value']
 30 |             assert tasknum == len(value)
 31 |             for task in range(tasknum):
 32 |                 # todo(zqzhang): updated in TPv7
 33 |                 if (value[task] == '0') or (value[task] == 0):
 34 |                     c0cnt[task] += 1
 35 |                 elif (value[task] == '1') or (value[task] == 1):
 36 |                     c1cnt[task] += 1
 37 |         if 0 in c0cnt:
 38 |             print("Invalid splitting.")
 39 |             return False
 40 |         elif 0 in c1cnt:
 41 |             print("Invalid splitting.")
 42 |             return False
 43 |         else:
 44 |             return True
 45 | 
 46 |     def split(self, dataset, opt):
 47 |         rate = opt.args['SplitRate']
 48 |         validseed = opt.args['SplitValidSeed']
 49 |         testseed = opt.args['SplitTestSeed']
 50 |         total_num = len(dataset)
 51 |         np_dataset = np.array(dataset)
 52 |         index = np.arange(total_num)
 53 | 
 54 |         if len(rate) == 1:
 55 |             train_num = int(total_num * rate[0])
 56 |             valid_num = total_num - train_num
 57 |             endflag = 0
 58 | 
 59 |             while not endflag:
 60 |                 random.seed(validseed)
 61 |                 random.shuffle(index)
 62 |                 set1_idx = index[:train_num]
 63 |                 set2_idx = index[train_num:]
 64 | 
 65 |                 assert len(set1_idx) == train_num
 66 |                 assert len(set2_idx) == valid_num
 67 | 
 68 |                 set1 = np_dataset[set1_idx]
 69 |                 set2 = np_dataset[set2_idx]
 70 |                 if opt.args['ClassNum'] == 2:
 71 |                     endflag = self.CheckClass(set2, opt.args['TaskNum'])
 72 |                     validseed += 1
 73 |                 else:
 74 |                     endflag = 1
 75 |             return (set1, set2), (set1_idx, set2_idx)
 76 | 
 77 |         if len(rate) == 2:
 78 |             train_num = int(total_num * rate[0])
 79 |             valid_num = int(total_num * rate[1])
 80 |             test_num = total_num - train_num - valid_num
 81 |             endflag = 0
 82 | 
 83 |             while not endflag:
 84 |                 random.seed(testseed)
 85 |                 random.shuffle(index)
 86 |                 set3_idx = index[(train_num + valid_num):]
 87 |                 set3 = np_dataset[set3_idx]
 88 | 
 89 |                 if opt.args['ClassNum'] == 2:
 90 |                     endflag = self.CheckClass(set3, opt.args['TaskNum'])
 91 |                     testseed += 1
 92 |                 else:
 93 |                     endflag = 1
 94 | 
 95 |             set_idx_remain = index[:(train_num + valid_num)]
 96 |             endflag = 0
 97 |             while not endflag:
 98 |                 random.seed(validseed)
 99 |                 random.shuffle(set_idx_remain)
100 | 
101 |                 set1_idx = set_idx_remain[:train_num]
102 |                 set2_idx = set_idx_remain[train_num:]
103 |                 set1 = np_dataset[set1_idx]
104 |                 set2 = np_dataset[set2_idx]
105 | 
106 |                 if opt.args['ClassNum'] == 2:
107 |                     endflag = self.CheckClass(set2, opt.args['TaskNum'])
108 |                     validseed += 1
109 |                 else:
110 |                     endflag = 1
111 | 
112 |                 assert len(set1) == train_num
113 |                 assert len(set2) == valid_num
114 |                 assert len(set3) == test_num
115 | 
116 |             return (set1, set2, set3), (set1_idx, set2_idx, set3_idx)
117 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2022 Ziqiao Zhang, Yatao Bian 
2 |    
3 |     Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 
4 |    
5 |     The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 
6 |    
7 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
8 | 
9 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # ACNet
  2 | 
  3 | The code repository of paper *ACNet: A Benchmark for Activity Cliff Prediction*
  4 | 
  5 | 
  6 | ## homepage
  7 | Introduction of this project: https://drugai.github.io/ACNet/
  8 | 
  9 | 
 10 | ## requirements
 11 | - pytorch >= 1.11
 12 | - numpy >= 1.21.2
 13 | - pandas >= 1.2.3
 14 | - rdkit >= 2020.09.5
 15 | - ogb >= 1.3.3
 16 | - pyg >= 2.0.4
 17 | - scikit-learn >= 1.0.2
 18 | 
 19 | ## Usage 
 20 | ### Clone the repository
 21 | Run the following command to clone the repository to your device.
 22 | 
 23 | ```
 24 | git clone https://github.com/DrugAI/ACNet.git
 25 | cd ACNet/ACNet
 26 | ```
 27 | 
 28 | **Note**: The current path `./` indicates `ACNet/` in the following part.
 29 | 
 30 | ### Create conda environment
 31 | Run the following command to create the environment.
 32 | 
 33 | `conda create -f ./ACNetEnviron.yml`
 34 | 
 35 | ### Download data files
 36 | Download data files from [here](https://drive.google.com/drive/folders/1JogBAg9AI0pUxY44w9_g8RHboLf7V5q7?usp=sharing)
 37 | 
 38 | Run the following command to put the data files into the directories
 39 | 
 40 | ```
 41 | mkdir ./ACComponents/ACDataset/data_files
 42 | mkdir ./ACComponents/ACDataset/data_files/raw_data
 43 | mkdir ./ACComponents/ACDataset/data_files/generated_datasets
 44 | mv all_smiles_target.csv ./ACComponents/ACDataset/data_files/raw_data/
 45 | mv mmp_ac_s_distinct.csv ./ACComponents/ACDataset/data_files/raw_data/
 46 | mv mmp_ac_s_neg_distinct.csv ./ACComponents/ACDataset/data_files/raw_data/
 47 | ```
 48 | 
 49 | ### Generate ACNet datasets
 50 | 
 51 | Run the following command to generate ACNet datasets with **Default Configuration**
 52 | 
 53 | ```
 54 | python ACNeet/ACComponents/ACDataset/GenerateACDatasets.py
 55 | ```
 56 | 
 57 | The genearated dataset files are in `./ACComponents/ACDataset/data_files/generated_datasets/`
 58 | 
 59 | The configuration can be customized in `./ACComponents/ACDataset/DataUtils.py`
 60 | 
 61 | 
 62 | ```
 63 | class Config(object):
 64 |     def __init__(self):
 65 |         super(Config, self).__init__()
 66 |         self.mixed = True                       # Whether to generate the Mix dataset, default True
 67 |         self.random_sample_negative = False     # Whether to use randomly sample the negative samples toward certain target, default False.
 68 |         self.random_sample_negative_seed = 8    # Random seed for sample negative samples if self.random_saple_negative == True.
 69 |         self.discard_extreme_imbalance = False  # Whether to discard the subsets that are extremely imbalanced, default False.
 70 |         self.pn_rate_threshold = 0.2            # The threshold of Pos/Neg for extremely imbalanced subsets if self.discard_extreme_imbalance == True.
 71 |         self.discard_few_pos = True             # Whether to discard the subsets that have only few positive samples. Default True.
 72 |         self.few_pos_threshold = 10             # The threshold to discard tasks with few positive samples, deault 10.
 73 |         self.large_thres = 20000                # Thresholds for grouping tasks into subsets.
 74 |         self.medium_thres = 1000
 75 |         self.small_thres = 100
 76 | ```
 77 | 
 78 | ### Reproducing
 79 | 
 80 | Our experimental results are obtained on RTX 3090 GPU, E5-2667 CPU, 256GB memory, and Ubuntu 18.04.5.
 81 | 
 82 | To reproduce the results, execute the following steps.
 83 | 
 84 | 
 85 | 
 86 | 1. Create the directories for model checkpoints.
 87 | 
 88 | ```
 89 | mkdir ./TestExp
 90 | mkdir ./TestExp/Large
 91 | mkdir ./TestExp/Medium
 92 | mkdir ./TestExp/Small
 93 | mkdir ./TestExp/Few
 94 | mkdir ./TestExp/Mix
 95 | ```
 96 | 
 97 | 2. Run the scripts in `./`
 98 | 
 99 | For instance, to run the ECFP+MLP on Large subset:
100 | 
101 | ```
102 | mkdir ./TestExp/Large/FPMLP
103 | python ./FPMLPLarge.py
104 | ```
105 | 
106 | To run the GRU on Small subset:
107 | 
108 | ```
109 | mkdir ./TestExp/Small/GRU
110 | python ./GRUSmall.py
111 | ```
112 | 
113 | 3. Run the following command before experiments of Graphormer model
114 | 
115 | ```
116 | cd ./Models/Graphormer
117 | python setup.py build_ext --inplace
118 | ```
119 | 
120 | 4. Molecular representations extracted by PTMs
121 | 
122 | Representations extracted by 7 PTMs for the Few subset can be downloaded [here](https://drive.google.com/drive/folders/1JogBAg9AI0pUxY44w9_g8RHboLf7V5q7?usp=sharing)
123 | 
124 | Run the following command to put them into the directory.
125 | 
126 | ```
127 | mv MMP_AC_Few_representation ./ACComponent/ACDatasets/data_files/
128 | ```
129 | 
130 | 
131 | **Note**:
132 | The GNNs (GCN, GIN, SGC) in the baseline experiments are implemented by PyG package, which uses `torch.scatter_` function.
133 | Remember that the `torch.scatter_` function is non-deterministic (See [here](https://pytorch.org/docs/stable/generated/torch.Tensor.scatter_.html#torch.Tensor.scatter_) ), so the results of the GNNs may be slightly different with our reported results in the manuscript.
134 | 
135 | 
136 | **Note**:
137 | The baseline experiments of ACNet are conducted by a *self-made* training framework.
138 | It is not as well-constructed as other training frameworks, e.g. *torchdrug*.
139 | It is just served as an example to show how our benchmark works and to show the reproducibility of our results reported in the manuscript.
140 | We can only guarantee that the experimental scripts can work to reproduce the results, but the stability of the training framework is not guaranteed when using other functions.
141 | And the illustration of this training framework is not our point.
142 | 
143 | 
144 | 
145 | 
146 | ## Illustration
147 | ### Data files
148 | 
149 | - `all_smiles_target.csv`
150 | Contains 142,307 activities screened from ChEMBL.
151 | 
152 | - `mmp_ac_s_distinct.csv`
153 | Contains 21,352 MMP-Cliffs.
154 | 
155 | - `mmp_ac_s_neg_distinct.csv`
156 | Contains 423,282 non-AC MMPs.
157 | 
158 | - `target_dictionary.xlsx`
159 | A dictionary that match target ids to the target names. Contains 1006 targets.
160 | 
161 | - `MMP_AC.json`
162 | All of the MMP-Cliffs and non-AC MMPs. Contains samples against 190 targets.
163 | 
164 | - `MMP_AC_Discarded.json`
165 | Discarded samples when organizing 21,352 positive samples and 423,282 negative samples.
166 | 
167 | - `MMP_AC_Large.json`, `MMP_AC_Medium.json`, `MMP_AC_Small.json`, `MMP_AC_Few.json`, `MMP_AC_Mixed_Screened.json`
168 | Five subsets of the ACNet benchmark generated based on the configuration file.
169 | 
170 | 
171 | ### Data structure
172 | Each json file corresponds to a subset of ACNet, structured as a dictionary.
173 | Keys are target ids, and values are datasets of the targets.
174 | The datasets of targets are lists of samples.
175 | And each sample is a dictionary with keys `SMILES1, SMILES2, Value`.
176 | 
177 | Using Large subset as an example:
178 | 
179 | ```
180 | >>> dataset.keys()
181 | dict_keys(['72','130','10102']
182 | >>> taskset = dataset['72']
183 | >>> len(taskset)
184 | 26376
185 | >>> data = taskset[0]
186 | >>> data
187 | {'SMILES1': 'OC1(c2ccc(Cl)cc2)CCN(Cc2c[nH]c3ccccc23)CC1', 'SMILES2': 'OCC1(c2ccc(Cl)cc2)CCN(Cc2c[nH]c3ccccc23)CC1', 'Value': '1'}
188 | ```
189 | 
190 | 


--------------------------------------------------------------------------------