├── LICENSE ├── README.md ├── code_submission ├── data_space.py ├── explore.py ├── feat_engine.py ├── model.py ├── model_lib │ ├── __init__.py │ ├── appnp.py │ ├── arma.py │ ├── gat.py │ ├── gatedgraph.py │ ├── gcn.py │ ├── gin.py │ ├── graphconvnet.py │ ├── graphsage.py │ ├── incepgcn.py │ ├── jkgcn.py │ ├── resgcn.py │ ├── sg.py │ └── tag.py ├── model_space.py ├── timer.py └── utils │ ├── __init__.py │ ├── eda.py │ └── tools.py ├── data └── demo │ ├── test_label.tsv │ └── train.data │ ├── config.yml │ ├── edge.tsv │ ├── feature.tsv │ ├── test_node_id.txt │ ├── train_label.tsv │ └── train_node_id.txt ├── ingestion ├── __pycache__ │ ├── common.cpython-36.pyc │ ├── dataset.cpython-36.pyc │ └── timing.cpython-36.pyc ├── common.py ├── dataset.py ├── ingestion.py ├── metadata └── timing.py ├── run_local_test.py └── scoring ├── metadata └── score.py /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AutoGL 2 | 3 | ## What is AutoGL? 4 | 5 | AutoGL is graph learning framework with automatic machine learning techniques. AutoGL now mainly focus on node classification problems, but it's easy to apply this program to other graph learning problems. 6 | 7 | AutoGL is the 6th solution for AutoGraph Challenge@KDD'20, the competition rules can be found [here](https://www.automl.ai/competitions/3). We achieve 1st, 4th, 1st, 6th and 27th on 5 final phase datasets. 8 | | # | Dataset1 | Dataset2 | Dataset3 | Dataset4 | Dataset5 | Avg | 9 | | --- | -------- | ------- | -------- | ------ | ----------- | ---------------------- | 10 | | rank | 1 | 4 | 1 | 6 | 27 | 7.8 | 11 | 12 | ## Usage 13 | Clone this repository to your machine: 14 | ``` 15 | git clone https://github.com/JunweiSUN/AutoGL.git 16 | ``` 17 | Download datasets from [here](https://www.automl.ai/competitions/6?secret_key=c10be8ef-9a94-417d-bb7a-5711aa6c895b#learn_the_details). You can also create your own datasets with required format.
18 | When the download process finished, unzip the datasets and move them to the `data` folder. Or you can just simple use the demo dataset in `data`.
19 | 20 | AutoGL could be easily started with [docker](https://www.docker.com/): 21 | ``` 22 | cd path/to/AutoGL/ 23 | docker run --gpus=0 -it --rm -v "$(pwd):/app/autograph" -w /app/autograph nehzux/kddcup2020:v2 24 | python run_local_test.py --dataset_dir=./data/demo --code_dir=./code_submission 25 | ``` 26 | You can change the argument dataset_dir to other datasets. On the other hand, you can also modify the directory containing your other sample code.
27 | 28 | You can also use your own python environment to run this program. In this way, you must install all the necessary packages. So we recommend users to run this program with docker. 29 | 30 | ## Acknowledgements 31 | We refer to these packages and codes when developing this program:
32 | 33 | [nni](https://github.com/microsoft/nni): An open source AutoML toolkit from microsoft
34 | [AutoDL (tabular part)](https://github.com/DeepWisdom/AutoDL/tree/master/AutoDL_sample_code_submission/Auto_Tabular): Automated Deep Learning without ANY human intervention
35 | [pytorch_geometric](https://github.com/rusty1s/pytorch_geometric): Geometric Deep Learning Extension Library for PyTorch
36 | [sparsesvd](https://github.com/RaRe-Technologies/sparsesvd): a fast library for sparse Singular Value Decomposition
37 | [DropEdge](https://github.com/DropEdge/DropEdge): a Pytorch implementation of paper: DropEdge: Towards Deep Graph Convolutional Networks on Node Classification 38 | 39 | ## Contact us 40 | If you have any question or advice, please feel free to contact our team members:
41 | Junwei Sun: junweisun@bupt.edu.cn
42 | Ruifeng Kuang: kuangruifeng@bupt.edu.cn
43 | Wei Huang: 18262998091@163.com
44 | Changrui Mu: u3553427@connect.hku.hk
45 | Jiayan Wang: jiayanwangno1@gmail.com 46 | 47 | ## License 48 | [Apache License 2.0](https://github.com/JunweiSUN/AutoGL/blob/master/LICENSE) 49 | -------------------------------------------------------------------------------- /code_submission/data_space.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn.model_selection import StratifiedShuffleSplit 4 | from sklearn.preprocessing import StandardScaler, OneHotEncoder 5 | import gc 6 | import torch 7 | from torch_geometric.data import Data 8 | import time 9 | import torch_geometric.transforms as T 10 | from torch_geometric.utils import dense_to_sparse 11 | 12 | class DataSpace: 13 | def __init__(self, info, data): 14 | """ 15 | Generating training / validation / testing data. 16 | Parameters: 17 | ---------- 18 | info: dict 19 | The eda infomation generated by AutoEDA 20 | data: dict 21 | The original data passed by the ingestion program. 22 | ---------- 23 | """ 24 | self.info = info 25 | 26 | self.y = data['train_label']['label'].to_numpy() 27 | self.pyg_data, self.all_train_idxs, self.test_idxs = self.generate_pyg_data(data) 28 | self.splits = {} 29 | self.n_splits = 5 30 | self.split_train_valid(ratio=0.1) 31 | self.update = False 32 | 33 | def split_train_valid(self, ratio=0.1): 34 | sss = StratifiedShuffleSplit(n_splits=self.n_splits, test_size=ratio, random_state=0) 35 | i = 0 36 | for train, val in sss.split(self.all_train_idxs, self.y): 37 | self.splits[i] = (self.all_train_idxs[train], self.all_train_idxs[val]) 38 | i += 1 39 | 40 | def get_data(self, round_num): 41 | train_idxs, val_idxs = self.splits[(round_num-1) % self.n_splits] 42 | print(f'Round {round_num}') 43 | 44 | train_mask = torch.zeros(self.pyg_data.num_nodes, dtype=torch.bool) 45 | train_mask[train_idxs] = 1 46 | self.pyg_data.train_mask = train_mask 47 | 48 | valid_mask = torch.zeros(self.pyg_data.num_nodes, dtype=torch.bool) 49 | valid_mask[val_idxs] = 1 50 | self.pyg_data.valid_mask = valid_mask 51 | 52 | return self.pyg_data 53 | 54 | def generate_pyg_data(self, data): 55 | x = data['fea_table'] 56 | x = x.drop('node_index', axis=1).to_numpy() 57 | x = torch.tensor(x, dtype=torch.float) 58 | 59 | df = data['edge_file'] 60 | edge_index = df[['src_idx', 'dst_idx']].to_numpy() 61 | edge_index = sorted(edge_index, key=lambda d: d[0]) 62 | edge_index = torch.tensor(edge_index, dtype=torch.long).transpose(0, 1) 63 | 64 | edge_weight = df['edge_weight'].to_numpy() 65 | edge_weight = torch.tensor(edge_weight, dtype=torch.float32) 66 | 67 | num_nodes = x.size(0) 68 | 69 | y = torch.zeros(num_nodes, dtype=torch.long) 70 | inds = data['train_label'][['node_index']].to_numpy() 71 | train_y = data['train_label'][['label']].to_numpy() 72 | y[inds] = torch.tensor(train_y, dtype=torch.long) 73 | 74 | all_train_idxs = np.array(data['train_indices'], dtype=int) 75 | test_idxs = np.array(data['test_indices'], dtype=int) 76 | 77 | data = Data(x=x, edge_index=edge_index, y=y, edge_weight=edge_weight) 78 | data.num_nodes = num_nodes 79 | 80 | data.test_idxs = test_idxs 81 | 82 | test_mask = torch.zeros(num_nodes, dtype=torch.bool) 83 | test_mask[test_idxs] = 1 84 | data.test_mask = test_mask 85 | 86 | data.label_weights = self.info['label_weights'] 87 | 88 | if self.info['normalize_features'] == 'row': 89 | print('Feature Normalized By Row') 90 | data.x = data.x / data.x.sum(1, keepdim=True).clamp(min=1) 91 | elif self.info['normalize_features'] == 'col': 92 | print('Feature Normalized By Column') 93 | data.x = data.x / data.x.sum(0, keepdim=True).clamp(min=1) 94 | 95 | return data.to('cuda'), all_train_idxs, test_idxs 96 | -------------------------------------------------------------------------------- /code_submission/explore.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import gc 3 | import collections 4 | import torch 5 | import time 6 | 7 | class Explore: 8 | def __init__(self, info, model_space, data_space): 9 | """ 10 | Training models and making predictions 11 | Parameters: 12 | ---------- 13 | info: dict 14 | The eda infomation generated by AutoEDA 15 | model_space: ModelSpace 16 | Model space 17 | data_space: DataSpace 18 | Data space 19 | ---------- 20 | """ 21 | self.info = info 22 | self.model_space = model_space 23 | self.data_space = data_space 24 | 25 | self.models = self.model_space.get_models() 26 | self.model = None 27 | self.model_prior = self.model_space.model_prior 28 | self.model_idx = 0 29 | 30 | self.ensemble_threshold = self.info['ensemble_threshold'] 31 | 32 | self.round_num = 1 33 | 34 | self.hist_info = {} 35 | self.pyg_data = None 36 | self.update_predict = True 37 | 38 | def explore_space(self): 39 | if self.model_idx == 0: 40 | print('Model Prior:', self.model_prior) 41 | self.explore_data_space() 42 | self.explore_model_space() 43 | val_score = self.model.trial(self.pyg_data, self.round_num) 44 | print('Model Name:', self.model.name, 'Round:', self.round_num, 'Val score:', val_score) 45 | 46 | self.update_model_hist(val_score) 47 | 48 | def explore_model_space(self): 49 | self.model = self.models[self.model_prior[self.model_idx]] 50 | self.model_idx += 1 51 | 52 | def explore_data_space(self): 53 | if self.data_space.update or self.pyg_data is None: 54 | self.pyg_data = self.data_space.get_data(round_num=self.round_num) 55 | self.data_space.update = False 56 | 57 | def update_model_hist(self, val_score): 58 | self.model.hist_score.append(val_score) 59 | if val_score > self.model.best_score: 60 | self.model.best_score = val_score 61 | self.update_predict = True 62 | else: 63 | self.update_predict = False 64 | 65 | def sort_model_prior(self): 66 | model_perform = collections.defaultdict(list) 67 | for name, info in self.hist_info.items(): 68 | model_perform[name] = [e[0] for e in info] 69 | 70 | self.model_prior = sorted(self.model_prior, key=lambda x: np.mean(model_perform[x]), reverse=True) 71 | self.model_idx = 0 72 | self.round_num += 1 73 | 74 | def get_top_preds(self): 75 | models_name = self.hist_info.keys() 76 | top_score_and_preds_for_each_model = [sorted(self.hist_info[name], key=lambda e: e[0], reverse=True)[0] for name in models_name] 77 | 78 | models_name_sorted, models_score_and_preds_sorted = (list(i) for i in 79 | zip(*sorted(zip(models_name, top_score_and_preds_for_each_model), key=lambda x: x[1][0], reverse=True))) 80 | 81 | models_score_sorted = [e[0] for e in models_score_and_preds_sorted] 82 | models_preds_sorted = [e[1] for e in models_score_and_preds_sorted] 83 | 84 | max_score = max(models_score_sorted) 85 | 86 | for i in range(len(models_score_sorted), 0, -1): 87 | top_num = i 88 | if models_score_sorted[i-1] + self.ensemble_threshold >= max_score: 89 | break 90 | 91 | top_score = np.array(models_score_sorted[:top_num]) 92 | top_score = top_score + 50 * (top_score - top_score.mean()) 93 | top_score = np.array([max(0.01, i) for i in top_score]) 94 | weights = top_score / top_score.sum() 95 | print('Ensmble Models Weights:', weights) 96 | 97 | top_preds = [] 98 | for i in range(top_num): 99 | name = models_name_sorted[i] 100 | rank = i + 1 101 | score = models_score_sorted[i] 102 | weight = weights[i] 103 | preds = models_preds_sorted[i] 104 | top_preds.append((name, rank, score, weight, preds)) 105 | 106 | return top_preds 107 | 108 | def predict(self): 109 | if self.update_predict: 110 | preds = self.model.predict() 111 | self.model.best_preds = preds 112 | if self.model.name in self.hist_info: 113 | self.hist_info[self.model.name].append((self.model.best_score, self.model.best_preds)) 114 | else: 115 | self.hist_info[self.model.name] = [(self.model.best_score, self.model.best_preds)] 116 | self.update_predict = False 117 | 118 | if self.model_idx >= len(self.model_prior): 119 | self.sort_model_prior() 120 | self.data_space.update = True 121 | 122 | preds = self.blending_predict().argmax(1).flatten() 123 | return preds 124 | 125 | def blending_predict(self): 126 | top_preds = self.get_top_preds() 127 | ensmble_models = [] 128 | ensmble_val_scores = [] 129 | ensmble_preds = 0 130 | for name, rank, score, weight, preds in top_preds: 131 | m = np.mean(preds) 132 | ensmble_models.append(name) 133 | ensmble_val_scores.append(score) 134 | ensmble_preds += weight * preds / m 135 | print('Ensmble Models Including:', ensmble_models) 136 | print('Ensmble Models Val Score:', ensmble_val_scores) 137 | return ensmble_preds 138 | -------------------------------------------------------------------------------- /code_submission/feat_engine.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from sparsesvd import sparsesvd 4 | import scipy 5 | import time 6 | import torch_geometric.transforms as T 7 | from torch_geometric.nn import Node2Vec 8 | import networkx as nx 9 | import torch 10 | 11 | class FeatEngine: 12 | """ 13 | A tool box for generating node features. 14 | Feature type including: SVD / One Hot / Degree / Node2Vec / Adjacency Matrix . 15 | These features can be concatenated. 16 | Parameters: 17 | ---------- 18 | info: dict 19 | The eda infomation generated by AutoEDA 20 | ---------- 21 | """ 22 | def __init__(self, info): 23 | self.info = info 24 | 25 | def fit_transform(self, data): 26 | if 'original' in self.info['feature_type']: 27 | print('Use Original Feature') 28 | if 'one_hot' in self.info['feature_type']: 29 | print('Use One Hot Feature') 30 | data['fea_table'] = self.generate_one_hot_feature(data) 31 | if 'svd' in self.info['feature_type']: 32 | print('Use SVD Feature') 33 | data['fea_table'] = self.generate_svd_feature(data, num_features=64) 34 | if 'degree' in self.info['feature_type']: 35 | print('Use Degree Feature') 36 | data['fea_table'] = self.generate_degree_feature(data) 37 | if 'node2vec' in self.info['feature_type']: 38 | print('Use Node2Vec Feature') 39 | data['fea_table'] = self.generate_node2vec_feature(data, epochs=20, num_features=64) 40 | if 'adj' in self.info['feature_type']: 41 | print('Use Adjacency Feature') 42 | data['fea_table'] = self.generate_adj_feature(data, use_weight=False) 43 | 44 | def generate_svd_feature(self, data, num_features=64): 45 | feat_df, edge_df = data['fea_table'], data['edge_file'] 46 | adj_matrix = np.zeros((self.info['num_nodes'], self.info['num_nodes'])) 47 | edges = edge_df.to_numpy(dtype=int) 48 | for edge in edges: 49 | adj_matrix[edge[0], edge[1]] = 1 50 | sparse_adj_matrix = scipy.sparse.csc_matrix(adj_matrix) 51 | ut, s, vt = sparsesvd(sparse_adj_matrix, num_features) 52 | svd_feats = pd.DataFrame(np.dot(ut.T, np.diag(s))) 53 | return pd.concat([feat_df, svd_feats], axis=1) 54 | 55 | def generate_adj_feature(self, data, use_weight=True): 56 | feat_df, edge_df = data['fea_table'], data['edge_file'] 57 | adj_matrix = np.zeros((self.info['num_nodes'], self.info['num_nodes'])) 58 | edges = edge_df.to_numpy(dtype=int) 59 | 60 | if use_weight: 61 | for edge in edges: 62 | adj_matrix[edge[0], edge[1]] = edge[2] 63 | else: 64 | for edge in edges: 65 | adj_matrix[edge[0], edge[1]] = 1 66 | 67 | adj_feats = pd.DataFrame(adj_matrix) 68 | return pd.concat([feat_df, adj_feats], axis=1) 69 | 70 | def generate_one_hot_feature(self, data): 71 | return pd.concat([data['fea_table'], pd.get_dummies(data['fea_table'].to_numpy().flatten())], axis=1) 72 | 73 | def generate_degree_feature(self, data): 74 | g = nx.DiGraph() 75 | edges = data['edge_file'].to_numpy().astype(int) 76 | g.add_weighted_edges_from(edges) 77 | 78 | degree_feat = np.zeros((self.info['num_nodes'], 2)) 79 | for node_idx in range(self.info['num_nodes']): 80 | in_degree, out_degree = g.in_degree(node_idx), g.out_degree(node_idx) 81 | degree_feat[node_idx,0], degree_feat[node_idx,1] = in_degree, out_degree 82 | in_edges = g.in_edges(node_idx, data=True) 83 | out_edges = g.out_edges(node_idx, data=True) 84 | in_weights = [e[2]['weight'] for e in in_edges] 85 | out_weights = [e[2]['weight'] for e in out_edges] 86 | degree_feat[2] = in_degree - out_degree 87 | 88 | return pd.concat([data['fea_table'], pd.DataFrame(degree_feat)], axis=1) 89 | 90 | def generate_node2vec_feature(self, data, epochs=20, num_features=64): 91 | edge_index = data['edge_file'][['src_idx', 'dst_idx']].to_numpy() 92 | edge_index = sorted(edge_index, key=lambda d: d[0]) 93 | edge_index = torch.tensor(edge_index, dtype=torch.long).transpose(0, 1) 94 | 95 | model = Node2Vec(edge_index, embedding_dim=num_features, walk_length=20, 96 | context_size=10, walks_per_node=10, num_negative_samples=1, sparse=True).to('cuda') 97 | 98 | loader = model.loader(batch_size=128, shuffle=True, num_workers=4) 99 | optimizer = torch.optim.SparseAdam(model.parameters(), lr=0.01) 100 | 101 | def train(): 102 | model.train() 103 | total_loss = 0 104 | for pos_rw, neg_rw in loader: 105 | optimizer.zero_grad() 106 | loss = model.loss(pos_rw.to('cuda'), neg_rw.to('cuda')) 107 | loss.backward() 108 | optimizer.step() 109 | total_loss += loss.item() 110 | return total_loss / len(loader) 111 | 112 | for epoch in range(1, epochs+1): 113 | loss = train() 114 | print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}') 115 | 116 | return pd.concat([data['fea_table'], pd.DataFrame(model().detach().cpu().numpy())], axis=1) 117 | -------------------------------------------------------------------------------- /code_submission/model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import torch 4 | import time 5 | import random 6 | import os 7 | import signal 8 | os.system('pip install nni') 9 | os.system('pip install seaborn') 10 | os.system('pip install cython') 11 | os.system('pip install sparsesvd') 12 | from utils.eda import AutoEDA 13 | from utils.tools import fix_seed 14 | from explore import Explore 15 | from data_space import DataSpace 16 | from model_space import ModelSpace 17 | from feat_engine import FeatEngine 18 | 19 | fix_seed(1234) 20 | def timeout_handler(signum, frame): 21 | """ 22 | Signal handler 23 | Inform the main process when time runs out. 24 | """ 25 | raise Timeout 26 | signal.signal(signal.SIGTSTP, timeout_handler) 27 | 28 | class Timeout(Exception): 29 | """Timeout""" 30 | 31 | class Model: 32 | """ 33 | Main Class for training and predicting. 34 | """ 35 | def __init__(self): 36 | self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 37 | 38 | def predict(self): 39 | self.explore.explore_space() 40 | preds = self.explore.predict() 41 | return preds 42 | 43 | def train_predict(self, data, time_budget, n_class, schema): 44 | # start a timer for timing. 45 | timer_abs_path = os.path.abspath(__file__).replace('/model.py', '/timer.py') 46 | pid = os.getpid() 47 | os.system(f'python {timer_abs_path} {time_budget - 1} {pid} &') 48 | 49 | start = time.time() 50 | self.auto_eda = AutoEDA(n_class) 51 | info = self.auto_eda.get_info(data) 52 | print('EDA Finished, Remaining', time_budget + start - time.time()) 53 | self.feat_engine = FeatEngine(info) 54 | self.feat_engine.fit_transform(data) 55 | print('Feature Engine Finished, Remaining', time_budget + start - time.time()) 56 | self.data_space = DataSpace(info, data) 57 | print('Data Space Constructed, Remaining', time_budget + start - time.time()) 58 | self.model_space = ModelSpace(info) 59 | print('Model Space Constructed, Remaining', time_budget + start - time.time()) 60 | self.explore = Explore(info, self.model_space, self.data_space) 61 | 62 | # start training 63 | while True: 64 | if time_budget + start - time.time() <= 0: 65 | return self.preds 66 | try: 67 | self.preds = self.predict() 68 | except Timeout: 69 | return self.preds 70 | 71 | return self.preds 72 | -------------------------------------------------------------------------------- /code_submission/model_lib/__init__.py: -------------------------------------------------------------------------------- 1 | from .gat import GAT 2 | from .gcn import GCN 3 | from .graphconvnet import GraphConvNet 4 | from .graphsage import GraphSAGE 5 | from .appnp import APPNPNet 6 | from .arma import ARMA 7 | from .gatedgraph import GatedGraphNet 8 | from .gin import GIN 9 | from .sg import SG 10 | from .tag import TAG 11 | from .incepgcn import IncepGCN 12 | from .resgcn import ResGCN 13 | from .jkgcn import JKGCN 14 | -------------------------------------------------------------------------------- /code_submission/model_lib/appnp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.nn import Linear 4 | from torch_geometric.nn import APPNP 5 | import copy 6 | from sklearn.metrics import f1_score 7 | from utils.tools import fix_seed, AverageMeter 8 | from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner 9 | from torch_geometric.utils import dropout_adj 10 | import random 11 | fix_seed(1234) 12 | 13 | 14 | class APPNPNet(torch.nn.Module): 15 | 16 | def __init__(self, info): 17 | super(APPNPNet, self).__init__() 18 | 19 | self.info = info 20 | 21 | self.best_score = 0 22 | self.hist_score = [] 23 | 24 | self.best_preds = None 25 | self.current_round_best_preds = None 26 | self.best_valid_score = 0 27 | self.max_patience = 100 28 | self.max_epochs = 1600 29 | 30 | self.name = 'APPNP' 31 | 32 | self.hyperparameters = { 33 | 'num_layers': self.info['num_layers'], 34 | 'lr': 0.005, 35 | 'K': 10, 36 | 'alpha': 0.15, 37 | 'dropedge_rate': self.info['dropedge_rate'], 38 | 'dropout_rate': self.info['dropout_rate'], 39 | 'hidden': self.info['init_hidden_size'] 40 | } 41 | self.best_hp = None 42 | self.tuner = HyperoptTuner(algorithm_name='tpe', optimize_mode='maximize') 43 | search_space = { 44 | "dropedge_rate": { 45 | "_type": "uniform", 46 | "_value": [0, 1] 47 | }, 48 | "dropout_rate": { 49 | "_type": "uniform", 50 | "_value": [0, 1] 51 | }, 52 | "num_layers": { 53 | "_type": "randint", 54 | "_value": [2, 3] 55 | }, 56 | "hidden": { 57 | "_type": "quniform", 58 | "_value": [4, 7, 1] 59 | }, 60 | "lr":{ 61 | "_type": "choice", 62 | "_value": [self.info['lr']] 63 | }, 64 | 'K' :{ 65 | "_type": "quniform", 66 | "_value": [1, 6, 1] 67 | }, 68 | 'alpha':{ 69 | "_type": "uniform", 70 | "_value": [0, 1] 71 | } 72 | } 73 | self.tuner.update_search_space(search_space) 74 | 75 | def init_model(self, n_class, feature_num): 76 | hidden_size = int(2 ** self.hyperparameters['hidden']) 77 | K = int(self.hyperparameters['K']) 78 | self.lin1 = Linear(feature_num, hidden_size) 79 | self.lin2 = Linear(hidden_size, n_class) 80 | self.prop1 = APPNP(K=K, alpha=self.hyperparameters['alpha']) 81 | 82 | self.optimizer = torch.optim.Adam(self.parameters(), lr=self.hyperparameters['lr'], weight_decay=5e-4) 83 | 84 | self = self.to('cuda') 85 | 86 | def forward(self, data): 87 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 88 | if self.hyperparameters['dropedge_rate'] is not None: 89 | edge_index, edge_weight = dropout_adj(edge_index, edge_weight, p=self.hyperparameters['dropedge_rate'],\ 90 | force_undirected=False, num_nodes=None, training=self.training) 91 | 92 | x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training) 93 | x = F.relu(self.lin1(x)) 94 | x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training) 95 | x = self.lin2(x) 96 | x = self.prop1(x, edge_index,edge_weight) 97 | return x 98 | 99 | def trial(self, data, round_num): 100 | n_class, feature_num = self.info['n_class'], data.x.shape[1] 101 | if round_num >= 2: 102 | self.hyperparameters = self.tuner.generate_parameters(round_num-1) 103 | print(self.hyperparameters) 104 | 105 | while True: 106 | try: 107 | self.init_model(n_class, feature_num) 108 | best_valid_score = self.train_valid(data, round_num) 109 | if round_num > 1: 110 | self.tuner.receive_trial_result(round_num-1,self.hyperparameters,val_score) 111 | if val_score > self.best_score: 112 | self.best_hp = copy.deepcopy(self.hyperparameters) 113 | break 114 | except RuntimeError as e: 115 | print(self.name,e, 'OOM with Hidden Size', self.hyperparameters['hidden']) 116 | if round_num > 1: 117 | self.tuner.receive_trial_result(round_num-1,self.hyperparameters,0) 118 | return 0 119 | print("Best Hyperpameters of", self.name, self.best_hp) 120 | return val_score 121 | 122 | def train_valid(self, data, round_num): 123 | y, train_mask, valid_mask, test_mask, label_weights = data.y, data.train_mask, data.valid_mask, data.test_mask, data.label_weights 124 | 125 | patience = self.max_patience 126 | best_valid_score = 0 127 | valid_acc_meter = AverageMeter() 128 | for epoch in range(self.max_epochs): 129 | 130 | # train 131 | self.train() 132 | self.optimizer.zero_grad() 133 | preds = self.forward(data) 134 | loss = F.cross_entropy(preds[train_mask], y[train_mask], label_weights) 135 | loss.backward() 136 | self.optimizer.step() 137 | 138 | # valid 139 | self.eval() 140 | with torch.no_grad(): 141 | preds = F.softmax(self.forward(data), dim=-1) 142 | valid_preds, test_preds = preds[valid_mask], preds[test_mask] 143 | valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro') 144 | 145 | valid_acc_meter.update(valid_score) 146 | 147 | # patience 148 | if valid_acc_meter.avg > best_valid_score: 149 | best_valid_score = valid_acc_meter.avg 150 | self.current_round_best_preds = test_preds 151 | patience = self.max_patience 152 | else: 153 | patience -= 1 154 | 155 | if patience == 0: 156 | break 157 | 158 | return best_valid_score 159 | 160 | def predict(self): 161 | return self.current_round_best_preds.cpu().numpy() 162 | 163 | def __repr__(self): 164 | return self.__class__.__name__ -------------------------------------------------------------------------------- /code_submission/model_lib/arma.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.nn import Linear 4 | from torch_geometric.nn import ARMAConv 5 | import copy 6 | from sklearn.metrics import f1_score 7 | from utils.tools import fix_seed, AverageMeter 8 | from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner 9 | from torch_geometric.utils import dropout_adj 10 | import random 11 | fix_seed(1234) 12 | 13 | class ARMA(torch.nn.Module): 14 | 15 | def __init__(self, info): 16 | super(ARMA, self).__init__() 17 | 18 | self.info = info 19 | 20 | self.best_score = 0 21 | self.hist_score = [] 22 | 23 | self.best_preds = None 24 | self.current_round_best_preds = None 25 | self.best_valid_score = 0 26 | self.max_patience = 100 27 | self.max_epochs = 1600 28 | 29 | self.name = 'ARMA' 30 | 31 | self.hidden = 16 32 | self.lr = 0.005 33 | self.hyperparameters = { 34 | 'num_layers': self.info['num_layers'], 35 | 'lr': self.info['lr'], 36 | 'num_stacks': 1, 37 | 'conv_layers': 1, 38 | 'dropedge_rate': self.info['dropedge_rate'], 39 | 'dropout_rate': 0.5, 40 | 'hidden': self.info['init_hidden_size'], 41 | 'use_linear': self.info['use_linear'] 42 | } 43 | self.best_hp = None 44 | self.tuner = HyperoptTuner(algorithm_name='tpe', optimize_mode='maximize') 45 | search_space = { 46 | "dropedge_rate": { 47 | "_type": "choice", 48 | "_value": [self.info['dropedge_rate']] 49 | }, 50 | "dropout_rate": { 51 | "_type": "choice", 52 | "_value": [self.info['dropout_rate']] 53 | }, 54 | "num_layers": { 55 | "_type": "quniform", 56 | "_value": [1, 3, 1] 57 | }, 58 | "hidden": { 59 | "_type": "quniform", 60 | "_value": [4, 7, 1] 61 | }, 62 | "lr":{ 63 | "_type": "choice", 64 | "_value": [0.005] 65 | }, 66 | 'num_stacks' : { 67 | "_type": "quniform", 68 | "_value": [1, 5, 1] 69 | }, 70 | 'conv_layers' : { 71 | "_type": "quniform", 72 | "_value": [1, 5, 1] 73 | }, 74 | 'use_linear': { 75 | "_type":"choice", 76 | "_value":[True, False] 77 | } 78 | } 79 | self.tuner.update_search_space(search_space) 80 | 81 | def init_model(self, n_class, feature_num): 82 | hidden_size = int(2 ** self.hyperparameters['hidden']) 83 | num_stacks = int(self.hyperparameters['num_stacks']) 84 | conv_layers = int(self.hyperparameters['conv_layers']) 85 | lr = self.hyperparameters['lr'] 86 | dropout = self.hyperparameters['dropout_rate'] 87 | num_layers = int(self.hyperparameters['num_layers']) 88 | if self.hyperparameters['use_linear']: 89 | self.input_lin = Linear(feature_num, hidden_size) 90 | self.convs = torch.nn.ModuleList() 91 | for i in range(num_layers): 92 | self.convs.append(ARMAConv(hidden_size, hidden_size, num_stacks=num_stacks, num_layers=conv_layers, dropout=dropout)) 93 | self.output_lin = Linear(hidden_size, n_class) 94 | else: 95 | if num_layers == 1: 96 | self.conv1 = ARMAConv(feature_num, n_class, num_stacks=num_stacks,\ 97 | num_layers=conv_layers, shared_weights=False, dropout=dropout) 98 | else: 99 | self.conv1 = ARMAConv(feature_num, hidden_size, num_stacks=num_stacks,\ 100 | num_layers=conv_layers, shared_weights=False, dropout=dropout) 101 | self.convs = torch.nn.ModuleList() 102 | for i in range(num_layers - 2): 103 | self.convs.append(ARMAConv(hidden_size, hidden_size, num_stacks=num_stacks,\ 104 | num_layers=conv_layers, shared_weights=False, dropout=dropout)) 105 | self.conv2 = ARMAConv(hidden_size, n_class, num_stacks=num_stacks,\ 106 | num_layers=conv_layers, shared_weights=False, dropout=dropout) 107 | self.optimizer = torch.optim.Adam(self.parameters(), lr=lr, weight_decay=5e-4) 108 | 109 | self = self.to('cuda') 110 | 111 | torch.cuda.empty_cache() 112 | 113 | def forward(self, data): 114 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 115 | if self.hyperparameters['dropedge_rate'] is not None: 116 | edge_index, edge_weight = dropout_adj(edge_index, edge_weight, p=self.hyperparameters['dropedge_rate'],\ 117 | force_undirected=False, num_nodes=None, training=self.training) 118 | 119 | if self.hyperparameters['use_linear']: 120 | x = F.relu(self.input_lin(x)) 121 | else: 122 | x = F.relu(self.conv1(x, edge_index,edge_weight)) 123 | if self.hyperparameters['num_layers'] == 1: 124 | return x 125 | 126 | x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training) 127 | for conv in self.convs: 128 | x = F.relu(conv(x, edge_index, edge_weight=edge_weight)) 129 | if self.hyperparameters['use_linear']: 130 | x = self.output_lin(x) 131 | else: 132 | x = self.conv2(x, edge_index,edge_weight) 133 | return x 134 | 135 | def trial(self, data, round_num): 136 | n_class, feature_num = self.info['n_class'], data.x.shape[1] 137 | if round_num >= 2: 138 | self.hyperparameters = self.tuner.generate_parameters(round_num-1) 139 | print(self.hyperparameters) 140 | 141 | while True: 142 | try: 143 | self.init_model(n_class, feature_num) 144 | val_score = self.train_valid(data, round_num) 145 | if round_num > 1: 146 | self.tuner.receive_trial_result(round_num-1,self.hyperparameters,val_score) 147 | if val_score > self.best_score: 148 | self.best_hp = copy.deepcopy(self.hyperparameters) 149 | break 150 | except RuntimeError as e: 151 | print(self.name,e, 'OOM with Hidden Size', self.hyperparameters['hidden']) 152 | if round_num > 1: 153 | self.tuner.receive_trial_result(round_num-1,self.hyperparameters,0) 154 | return 0 155 | print("Best Hyperparameters of", self.name, self.best_hp) 156 | return val_score 157 | 158 | def train_valid(self, data, round_num): 159 | y, train_mask, valid_mask, test_mask, label_weights = data.y, data.train_mask, data.valid_mask, data.test_mask, data.label_weights 160 | 161 | 162 | patience = self.max_patience 163 | best_valid_score = 0 164 | valid_acc_meter = AverageMeter() 165 | for epoch in range(self.max_epochs): 166 | 167 | # train 168 | self.train() 169 | self.optimizer.zero_grad() 170 | preds = self.forward(data) 171 | loss = F.cross_entropy(preds[train_mask], y[train_mask], label_weights) 172 | loss.backward() 173 | self.optimizer.step() 174 | 175 | # valid 176 | self.eval() 177 | with torch.no_grad(): 178 | preds = F.softmax(self.forward(data), dim=-1) 179 | valid_preds, test_preds = preds[valid_mask], preds[test_mask] 180 | valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro') 181 | 182 | valid_acc_meter.update(valid_score) 183 | # patience 184 | if valid_acc_meter.avg > best_valid_score: 185 | best_valid_score = valid_acc_meter.avg 186 | self.current_round_best_preds = test_preds 187 | patience = self.max_patience 188 | else: 189 | patience -= 1 190 | 191 | if patience == 0: 192 | break 193 | 194 | return best_valid_score 195 | 196 | def predict(self): 197 | return self.current_round_best_preds.cpu().numpy() 198 | 199 | def __repr__(self): 200 | return self.__class__.__name__ -------------------------------------------------------------------------------- /code_submission/model_lib/gat.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.nn import Linear 4 | from torch_geometric.nn import GATConv 5 | import copy 6 | from sklearn.metrics import f1_score 7 | from utils.tools import fix_seed, AverageMeter 8 | from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner 9 | from torch_geometric.utils import dropout_adj 10 | import random 11 | fix_seed(1234) 12 | 13 | class GAT(torch.nn.Module): 14 | 15 | def __init__(self, info): 16 | super(GAT, self).__init__() 17 | self.info = info 18 | 19 | self.best_score = 0 20 | self.hist_score = [] 21 | 22 | self.best_preds = None 23 | self.current_round_best_preds = None 24 | self.best_valid_score = 0 25 | self.max_patience = 100 26 | self.max_epochs = 1600 27 | 28 | self.name = 'GAT' 29 | self.hyperparameters = { 30 | 'num_layers': self.info['num_layers'], 31 | 'lr': self.info['lr'], 32 | 'heads': 5, 33 | 'dropedge_rate': self.info['dropedge_rate'], 34 | 'dropout_rate': self.info['dropout_rate'], 35 | 'hidden': 8 36 | } 37 | self.best_hp = None 38 | self.tuner = HyperoptTuner(algorithm_name='tpe', optimize_mode='maximize') 39 | search_space = { 40 | "dropedge_rate": { 41 | "_type": "choice", 42 | "_value": [self.info['dropedge_rate']] 43 | }, 44 | "dropout_rate": { 45 | "_type": "choice", 46 | "_value": [self.info['dropout_rate']] 47 | }, 48 | "num_layers": { 49 | "_type": "choice", 50 | "_value": [2] 51 | }, 52 | "hidden": { 53 | "_type": "quniform", 54 | "_value": [4, 7, 1] 55 | }, 56 | "lr":{ 57 | "_type": "choice", 58 | "_value": [0.005] 59 | }, 60 | 'heads' :{ 61 | "_type": "quniform", 62 | "_value": [1, 10, 1] 63 | } 64 | } 65 | self.tuner.update_search_space(search_space) 66 | 67 | def init_model(self, n_class, feature_num): 68 | heads = int(self.hyperparameters['heads']) 69 | hidden_size = int(2 ** self.hyperparameters['hidden']) 70 | 71 | self.conv1 = GATConv(feature_num, hidden_size, heads=heads, dropout=self.hyperparameters['dropout_rate']) 72 | self.conv2 = GATConv(hidden_size * heads, n_class, concat=False, dropout=self.hyperparameters['dropout_rate']) 73 | 74 | self.optimizer = torch.optim.Adam(self.parameters(), lr=self.hyperparameters['lr'], weight_decay=5e-4) 75 | 76 | self = self.to('cuda') 77 | 78 | torch.cuda.empty_cache() 79 | 80 | 81 | def forward(self, data): 82 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 83 | if self.hyperparameters['dropedge_rate'] is not None: 84 | edge_index, edge_weight = dropout_adj(edge_index, edge_weight, p=self.hyperparameters['dropedge_rate'],\ 85 | force_undirected=False, num_nodes=None, training=self.training) 86 | 87 | x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training) 88 | x = F.elu(self.conv1(x, edge_index)) 89 | x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training) 90 | x = self.conv2(x, edge_index) 91 | return x 92 | 93 | def trial(self, data, round_num): 94 | n_class, feature_num = self.info['n_class'], data.x.shape[1] 95 | if round_num >= 2: 96 | self.hyperparameters = self.tuner.generate_parameters(round_num-1) 97 | print(self.hyperparameters) 98 | 99 | while True: 100 | try: 101 | self.init_model(n_class, feature_num) 102 | val_score = self.train_valid(data, round_num) 103 | if round_num > 1: 104 | self.tuner.receive_trial_result(round_num-1,self.hyperparameters,val_score) 105 | if val_score > self.best_score: 106 | self.best_hp = copy.deepcopy(self.hyperparameters) 107 | break 108 | except RuntimeError as e: 109 | print(self.name,e, 'OOM with Hidden Size', self.hyperparameters['hidden']) 110 | if round_num > 1: 111 | self.tuner.receive_trial_result(round_num-1,self.hyperparameters,0) 112 | return 0 113 | print("Best Hyperparameters of", self.name, self.best_hp) 114 | return val_score 115 | 116 | 117 | 118 | def train_valid(self, data, round_num): 119 | y, train_mask, valid_mask, test_mask, label_weights = data.y, data.train_mask, data.valid_mask, data.test_mask, data.label_weights 120 | 121 | patience = self.max_patience 122 | best_valid_score = 0 123 | valid_acc_meter = AverageMeter() 124 | for epoch in range(self.max_epochs): 125 | 126 | # train 127 | self.train() 128 | self.optimizer.zero_grad() 129 | preds = self.forward(data) 130 | loss = F.cross_entropy(preds[train_mask], y[train_mask], label_weights) 131 | loss.backward() 132 | self.optimizer.step() 133 | 134 | # valid 135 | self.eval() 136 | with torch.no_grad(): 137 | preds = F.softmax(self.forward(data), dim=-1) 138 | valid_preds, test_preds = preds[valid_mask], preds[test_mask] 139 | valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro') 140 | 141 | valid_acc_meter.update(valid_score) 142 | # patience 143 | if valid_acc_meter.avg > best_valid_score: 144 | best_valid_score = valid_acc_meter.avg 145 | self.current_round_best_preds = test_preds 146 | patience = self.max_patience 147 | else: 148 | patience -= 1 149 | 150 | if patience == 0: 151 | break 152 | 153 | return best_valid_score 154 | 155 | def epoch_train(self, data, run_num, info, time_remain): 156 | y, train_mask = data.y, data.train_mask 157 | self.train() 158 | self.optimizer.zero_grad() 159 | preds = self.forward(data) 160 | loss = F.cross_entropy(preds[train_mask], y[train_mask]) 161 | loss.backward() 162 | self.optimizer.step() 163 | 164 | 165 | def epoch_valid(self, data): 166 | y, valid_mask, test_mask = data.y, data.valid_mask, data.test_mask 167 | 168 | self.eval() 169 | with torch.no_grad(): 170 | preds = F.softmax(self.forward(data), dim=-1) 171 | valid_preds, test_preds = preds[valid_mask], preds[test_mask] 172 | self.current_preds = test_preds 173 | valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro') 174 | return valid_score 175 | 176 | def predict(self): 177 | return self.current_round_best_preds.cpu().numpy() 178 | 179 | def __repr__(self): 180 | return self.__class__.__name__ 181 | -------------------------------------------------------------------------------- /code_submission/model_lib/gatedgraph.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.nn import Linear 4 | from torch_geometric.nn import GatedGraphConv 5 | import copy 6 | from sklearn.metrics import f1_score 7 | from utils.tools import fix_seed, AverageMeter 8 | from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner 9 | from torch_geometric.utils import dropout_adj 10 | import random 11 | 12 | fix_seed(1234) 13 | class GatedGraphNet(torch.nn.Module): 14 | 15 | def __init__(self, info): 16 | super(GatedGraphNet, self).__init__() 17 | 18 | self.info = info 19 | self.hyperparameters = { 20 | 'num_layers': self.info['num_layers'], 21 | 'lr': 0.005, 22 | 'gated_conv_layers': 3, 23 | 'dropedge_rate': self.info['dropedge_rate'], 24 | 'dropout_rate': self.info['dropout_rate'], 25 | 'hidden': self.info['init_hidden_size'] 26 | } 27 | 28 | self.best_score = 0 29 | self.hist_score = [] 30 | 31 | self.best_preds = None 32 | self.current_round_best_preds = None 33 | self.best_valid_score = 0 34 | self.max_patience = 100 35 | self.max_epochs = 1600 36 | 37 | self.name = 'GatedGraph' 38 | 39 | self.best_hp = None 40 | self.tuner = HyperoptTuner(algorithm_name='tpe', optimize_mode='maximize') 41 | search_space = { 42 | "dropedge_rate": { 43 | "_type": "uniform", 44 | "_value": [0, 1] 45 | }, 46 | "dropout_rate": { 47 | "_type": "uniform", 48 | "_value": [0, 1] 49 | }, 50 | "num_layers": { 51 | "_type": "randint", 52 | "_value": [2, 4] 53 | }, 54 | "hidden": { 55 | "_type": "quniform", 56 | "_value": [4, 7, 1] 57 | }, 58 | "lr":{ 59 | "_type": "choice", 60 | "_value": [0.005] 61 | }, 62 | 'gated_conv_layers' :{ 63 | "_type": "quniform", 64 | "_value": [1, 10, 1] 65 | } 66 | } 67 | self.tuner.update_search_space(search_space) 68 | 69 | def init_model(self, n_class, feature_num): 70 | num_layers = self.hyperparameters['num_layers'] 71 | hidden_size = int(2 ** self.hyperparameters['hidden']) 72 | lr = self.hyperparameters['lr'] 73 | gated_conv_layers = int(self.hyperparameters['gated_conv_layers']) 74 | 75 | self.input_linear = Linear(feature_num, hidden_size) 76 | self.convs = torch.nn.ModuleList() 77 | for i in range(num_layers - 1): 78 | self.convs.append(GatedGraphConv(out_channels=hidden_size, num_layers=gated_conv_layers)) 79 | self.output_linear = Linear(hidden_size, n_class) 80 | self.optimizer = torch.optim.Adam(self.parameters(), lr=lr, weight_decay=5e-4) 81 | 82 | self = self.to('cuda') 83 | 84 | torch.cuda.empty_cache() 85 | 86 | def forward(self, data): 87 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 88 | if self.hyperparameters['dropedge_rate'] is not None: 89 | edge_index, edge_weight = dropout_adj(edge_index, edge_weight, p=self.hyperparameters['dropedge_rate'],\ 90 | force_undirected=False, num_nodes=None, training=self.training) 91 | 92 | x = F.relu(self.input_linear(x)) 93 | x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training) 94 | for conv in self.convs: 95 | x = F.relu(conv(x, edge_index,edge_weight)) 96 | x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training) 97 | x = self.output_linear(x) 98 | return x 99 | 100 | def trial(self, data, round_num): 101 | n_class, feature_num = self.info['n_class'], data.x.shape[1] 102 | if round_num >= 2: 103 | self.hyperparameters = self.tuner.generate_parameters(round_num-1) 104 | print(self.hyperparameters) 105 | 106 | while True: 107 | try: 108 | self.init_model(n_class, feature_num) 109 | val_score = self.train_valid(data, round_num) 110 | if round_num > 1: 111 | self.tuner.receive_trial_result(round_num-1,self.hyperparameters,val_score) 112 | if val_score > self.best_score: 113 | self.best_hp = copy.deepcopy(self.hyperparameters) 114 | break 115 | except RuntimeError as e: 116 | print(self.name,e, 'OOM with Hidden Size', self.hyperparameters['hidden']) 117 | if round_num > 1: 118 | self.tuner.receive_trial_result(round_num-1,self.hyperparameters,0) 119 | return 0 120 | print("Best Hyperparameters of", self.name, self.best_hp) 121 | return val_score 122 | 123 | def train_valid(self, data, round_num): 124 | y, train_mask, valid_mask, test_mask, label_weights = data.y, data.train_mask, data.valid_mask, data.test_mask, data.label_weights 125 | 126 | patience = self.max_patience 127 | best_valid_score = 0 128 | valid_acc_meter = AverageMeter() 129 | for epoch in range(self.max_epochs): 130 | 131 | # train 132 | self.train() 133 | self.optimizer.zero_grad() 134 | preds = self.forward(data) 135 | loss = F.cross_entropy(preds[train_mask], y[train_mask], label_weights) 136 | loss.backward() 137 | self.optimizer.step() 138 | 139 | # valid 140 | self.eval() 141 | with torch.no_grad(): 142 | preds = F.softmax(self.forward(data), dim=-1) 143 | valid_preds, test_preds = preds[valid_mask], preds[test_mask] 144 | valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro') 145 | valid_acc_meter.update(valid_score) 146 | # patience 147 | if valid_acc_meter.avg > best_valid_score: 148 | best_valid_score = valid_acc_meter.avg 149 | self.current_round_best_preds = test_preds 150 | patience = self.max_patience 151 | else: 152 | patience -= 1 153 | 154 | if patience == 0: 155 | break 156 | 157 | return best_valid_score 158 | 159 | def predict(self): 160 | return self.current_round_best_preds.cpu().numpy() 161 | 162 | def __repr__(self): 163 | return self.__class__.__name__ -------------------------------------------------------------------------------- /code_submission/model_lib/gcn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.nn import Linear 4 | from torch_geometric.nn import GCNConv 5 | import copy 6 | from sklearn.metrics import f1_score 7 | from utils.tools import fix_seed, AverageMeter 8 | from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner 9 | from torch_geometric.utils import dropout_adj 10 | import random 11 | 12 | fix_seed(1234) 13 | class GCN(torch.nn.Module): 14 | 15 | def __init__(self, info): 16 | super(GCN, self).__init__() 17 | self.info = info 18 | 19 | self.hyperparameters = { 20 | 'num_layers': self.info['num_layers'], 21 | 'lr':self.info['lr'], 22 | 'dropedge_rate':self.info['dropedge_rate'], 23 | 'dropout_rate':self.info['dropout_rate'], 24 | 'hidden': self.info['init_hidden_size'] 25 | } 26 | 27 | self.best_score = 0 28 | self.hist_score = [] 29 | 30 | self.best_preds = None 31 | self.current_round_best_preds = None 32 | self.best_valid_score = 0 33 | self.max_patience = 100 34 | self.max_epochs = 1600 35 | 36 | self.name = 'GCN' 37 | self.tuner = HyperoptTuner(algorithm_name='tpe', optimize_mode='maximize') 38 | search_space = { 39 | "dropedge_rate": { 40 | "_type": "choice", 41 | "_value": [self.info['dropedge_rate']] 42 | }, 43 | "dropout_rate": { 44 | "_type": "choice", 45 | "_value": [self.info['dropout_rate']] 46 | }, 47 | "num_layers": { 48 | "_type": "randint", 49 | "_value": [2, 4] 50 | }, 51 | "hidden": { 52 | "_type": "quniform", 53 | "_value": [4, 7, 1] 54 | }, 55 | "lr":{ 56 | "_type": "choice", 57 | "_value": [0.005] 58 | } 59 | } 60 | self.tuner.update_search_space(search_space) 61 | self.best_hp = None 62 | 63 | def init_model(self, n_class, feature_num): 64 | hidden_size = int(2 ** self.hyperparameters['hidden']) 65 | if self.info['num_edges'] > 1000000: 66 | self.conv1 = Linear(feature_num, hidden_sizes) 67 | else: 68 | self.conv1 = GCNConv(feature_num, hidden_sizes) 69 | if self.hyperparameters['num_layers'] > 2: 70 | self.convs = torch.nn.ModuleList() 71 | for i in range(self.hyperparameters['num_layers'] - 2): 72 | self.convs.append(GCNConv(hidden_sizes,hidden_sizes)) 73 | self.conv2 = GCNConv(hidden_sizes, n_class) 74 | 75 | self.optimizer = torch.optim.Adam(self.parameters(), lr=self.hyperparameters['lr'], weight_decay=5e-4) 76 | 77 | self = self.to('cuda') 78 | 79 | torch.cuda.empty_cache() 80 | 81 | def forward(self, data): 82 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 83 | if self.hyperparameters['dropedge_rate'] is not None: 84 | edge_index, edge_weight = dropout_adj(edge_index, edge_weight, p=self.hyperparameters['dropedge_rate'],\ 85 | force_undirected=False, num_nodes=None, training=self.training) 86 | if self.info['num_edges'] > 1000000: 87 | x = F.relu(self.conv1(x)) 88 | else: 89 | x = F.relu(self.conv1(x, edge_index,edge_weight)) 90 | x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training) 91 | if self.hyperparameters['num_layers'] > 2: 92 | for conv in self.convs: 93 | x = F.relu(conv(x, edge_index,edge_weight)) 94 | x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training) 95 | x = self.conv2(x, edge_index,edge_weight) 96 | return x 97 | 98 | def trial(self, data, round_num): 99 | n_class, feature_num = self.info['n_class'], data.x.shape[1] 100 | if round_num >= 2: 101 | self.hyperparameters = self.tuner.generate_parameters(round_num-1) 102 | print(self.hyperparameters) 103 | 104 | while True: 105 | try: 106 | self.init_model(n_class, feature_num) 107 | val_score = self.train_valid(data, round_num) 108 | if round_num > 1: 109 | self.tuner.receive_trial_result(round_num-1,self.hyperparameters,val_score) 110 | if val_score > self.best_score: 111 | self.best_hp = copy.deepcopy(self.hyperparameters) 112 | break 113 | except RuntimeError as e: 114 | print(self.name,e, 'OOM with Hidden Size', self.hyperparameters['hidden']) 115 | if round_num > 1: 116 | self.tuner.receive_trial_result(round_num-1,self.hyperparameters,0) 117 | return 0 118 | print("Best Hyperparameters of", self.name, self.best_hp) 119 | return val_score 120 | 121 | 122 | def train_valid(self, data, round_num): 123 | y, train_mask, valid_mask, test_mask, label_weights = data.y, data.train_mask, data.valid_mask, data.test_mask, data.label_weights 124 | patience = self.max_patience 125 | best_valid_score = 0 126 | valid_acc_meter = AverageMeter() 127 | for epoch in range(self.max_epochs): 128 | 129 | # train 130 | self.train() 131 | self.optimizer.zero_grad() 132 | preds = self.forward(data) 133 | loss = F.cross_entropy(preds[train_mask], y[train_mask], label_weights) 134 | loss.backward() 135 | self.optimizer.step() 136 | 137 | # valid 138 | self.eval() 139 | with torch.no_grad(): 140 | preds = F.softmax(self.forward(data), dim=-1) 141 | valid_preds, test_preds = preds[valid_mask], preds[test_mask] 142 | valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro') 143 | valid_acc_meter.update(valid_score) 144 | # patience 145 | if valid_acc_meter.avg > best_valid_score: 146 | best_valid_score = valid_acc_meter.avg 147 | self.current_round_best_preds = test_preds 148 | patience = self.max_patience 149 | else: 150 | patience -= 1 151 | 152 | if patience == 0: 153 | break 154 | 155 | return best_valid_score 156 | 157 | def epoch_train(self, data, run_num, info, time_remain): 158 | y, train_mask = data.y, data.train_mask 159 | self.train() 160 | self.optimizer.zero_grad() 161 | preds = self.forward(data) 162 | loss = F.cross_entropy(preds[train_mask], y[train_mask]) 163 | loss.backward() 164 | self.optimizer.step() 165 | 166 | def epoch_valid(self, data): 167 | y, valid_mask, test_mask = data.y, data.valid_mask, data.test_mask 168 | self.eval() 169 | 170 | with torch.no_grad(): 171 | preds = F.softmax(self.forward(data), dim=-1) 172 | valid_preds, test_preds = preds[valid_mask], preds[test_mask] 173 | self.current_preds = test_preds 174 | valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro') 175 | 176 | return valid_score 177 | 178 | def predict(self): 179 | return self.current_round_best_preds.cpu().numpy() 180 | 181 | def __repr__(self): 182 | return self.__class__.__name__ -------------------------------------------------------------------------------- /code_submission/model_lib/gin.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.nn import Linear 4 | from torch_geometric.nn import GINConv 5 | import copy 6 | from sklearn.metrics import f1_score 7 | from utils.tools import fix_seed, AverageMeter 8 | from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner 9 | from torch_geometric.utils import dropout_adj 10 | import random 11 | 12 | fix_seed(1234) 13 | class GIN(torch.nn.Module): 14 | 15 | def __init__(self, info): 16 | super(GIN, self).__init__() 17 | 18 | self.info = info 19 | self.hyperparameters = { 20 | 'num_layers': self.info['num_layers'], 21 | 'lr': self.info['lr'], 22 | 'dropedge_rate':self.info['dropedge_rate'], 23 | 'dropout_rate':self.info['dropout_rate'], 24 | 'hidden': self.info['init_hidden_size'] 25 | } 26 | 27 | self.best_score = 0 28 | self.hist_score = [] 29 | 30 | self.best_preds = None 31 | self.current_round_best_preds = None 32 | self.best_valid_score = 0 33 | self.max_patience = 100 34 | self.max_epochs = 1600 35 | 36 | self.name = 'GIN' 37 | 38 | self.tuner = HyperoptTuner(algorithm_name='tpe', optimize_mode='maximize') 39 | search_space = { 40 | "dropedge_rate": { 41 | "_type": "choice", 42 | "_value": [self.info['dropedge_rate']] 43 | }, 44 | "dropout_rate": { 45 | "_type": "choice", 46 | "_value": [self.info['dropout_rate']] 47 | }, 48 | "num_layers": { 49 | "_type": "quniform", 50 | "_value": [1, 3, 1] 51 | }, 52 | "hidden": { 53 | "_type": "quniform", 54 | "_value": [4, 7, 1] 55 | }, 56 | "lr":{ 57 | "_type": "choice", 58 | "_value": [0.005] 59 | } 60 | } 61 | self.tuner.update_search_space(search_space) 62 | self.best_hp = None 63 | 64 | def init_model(self, n_class, feature_num): 65 | num_layers = int(self.hyperparameters['num_layers']) 66 | hidden_size = int(2 ** self.hyperparameters['hidden']) 67 | lr = self.hyperparameters['lr'] 68 | 69 | self.input_linear = Linear(feature_num, hidden_size) 70 | self.convs = torch.nn.ModuleList() 71 | for i in range(num_layers): 72 | self.convs.append(GINConv(torch.nn.Sequential( 73 | Linear(hidden_size, hidden_size), 74 | torch.nn.ReLU(), 75 | Linear(hidden_size, hidden_size), 76 | torch.nn.ReLU() 77 | ), eps=0, train_eps=False) 78 | ) 79 | self.output_linear = Linear(hidden_size, n_class) 80 | self.optimizer = torch.optim.Adam(self.parameters(), lr=lr, weight_decay=5e-4) 81 | 82 | self = self.to('cuda') 83 | 84 | torch.cuda.empty_cache() 85 | 86 | def forward(self, data): 87 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 88 | if self.hyperparameters['dropedge_rate'] is not None: 89 | edge_index, edge_weight = dropout_adj(edge_index, edge_weight, p=self.hyperparameters['dropedge_rate'],\ 90 | force_undirected=False, num_nodes=None, training=self.training) 91 | x = F.relu(self.input_linear(x)) 92 | x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training) 93 | for conv in self.convs: 94 | x = conv(x, edge_index) 95 | x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training) 96 | x = self.output_linear(x) 97 | 98 | return x 99 | 100 | def trial(self, data, round_num): 101 | n_class, feature_num = self.info['n_class'], data.x.shape[1] 102 | if round_num >= 2: 103 | self.hyperparameters = self.tuner.generate_parameters(round_num-1) 104 | print(self.hyperparameters) 105 | 106 | while True: 107 | try: 108 | self.init_model(n_class, feature_num) 109 | val_score = self.train_valid(data, round_num) 110 | if round_num > 1: 111 | self.tuner.receive_trial_result(round_num-1,self.hyperparameters,val_score) 112 | if val_score > self.best_score: 113 | self.best_hp = copy.deepcopy(self.hyperparameters) 114 | break 115 | except RuntimeError as e: 116 | print(self.name,e, 'OOM with Hidden Size', self.hyperparameters['hidden']) 117 | if round_num > 1: 118 | self.tuner.receive_trial_result(round_num-1, self.hyperparameters, 0) 119 | return 0 120 | print("Best Hyperparameters of", self.name, self.best_hp) 121 | return val_score 122 | 123 | def train_valid(self, data, round_num): 124 | y, train_mask, valid_mask, test_mask, label_weights = data.y, data.train_mask, data.valid_mask, data.test_mask, data.label_weights 125 | 126 | patience = self.max_patience 127 | best_valid_score = 0 128 | valid_acc_meter = AverageMeter() 129 | for epoch in range(self.max_epochs): 130 | 131 | # train 132 | self.train() 133 | self.optimizer.zero_grad() 134 | preds = self.forward(data) 135 | loss = F.cross_entropy(preds[train_mask], y[train_mask], label_weights) 136 | loss.backward() 137 | self.optimizer.step() 138 | 139 | # valid 140 | self.eval() 141 | with torch.no_grad(): 142 | preds = F.softmax(self.forward(data), dim=-1) 143 | valid_preds, test_preds = preds[valid_mask], preds[test_mask] 144 | valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro') 145 | valid_acc_meter.update(valid_score) 146 | 147 | # patience 148 | if valid_acc_meter.avg > best_valid_score: 149 | best_valid_score = valid_acc_meter.avg 150 | self.current_round_best_preds = test_preds 151 | patience = self.max_patience 152 | else: 153 | patience -= 1 154 | 155 | if patience == 0: 156 | break 157 | 158 | return best_valid_score 159 | 160 | def predict(self): 161 | return self.current_round_best_preds.cpu().numpy() 162 | 163 | def __repr__(self): 164 | return self.__class__.__name__ -------------------------------------------------------------------------------- /code_submission/model_lib/graphconvnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.nn import Linear 4 | from torch_geometric.nn import GraphConv 5 | import copy 6 | from sklearn.metrics import f1_score 7 | from utils.tools import fix_seed, AverageMeter 8 | from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner 9 | from torch_geometric.utils import dropout_adj 10 | import random 11 | 12 | fix_seed(1234) 13 | class GraphConvNet(torch.nn.Module): 14 | 15 | def __init__(self, info): 16 | super(GraphConvNet, self).__init__() 17 | 18 | self.info = info 19 | self.hyperparameters = { 20 | 'num_layers': self.info['num_layers'], 21 | 'lr': self.info['lr'], 22 | 'dropedge_rate': self.info['dropedge_rate'], 23 | 'dropout_rate': self.info['dropout_rate'], 24 | 'hidden': self.info['init_hidden_size'], 25 | 'use_linear':self.info['use_linear'] 26 | } 27 | 28 | self.best_score = 0 29 | self.hist_score = [] 30 | 31 | self.best_preds = None 32 | self.current_round_best_preds = None 33 | self.best_valid_score = 0 34 | self.max_patience = 100 35 | self.max_epochs = 1600 36 | 37 | self.name = 'GraphConvNet' 38 | 39 | self.best_hp = None 40 | self.tuner = HyperoptTuner(algorithm_name='tpe', optimize_mode='maximize') 41 | search_space = { 42 | "dropedge_rate": { 43 | "_type": "choice", 44 | "_value": [self.info['dropedge_rate']] 45 | }, 46 | "dropout_rate": { 47 | "_type": "choice", 48 | "_value": [self.info['dropout_rate']] 49 | }, 50 | "num_layers": { 51 | "_type": "quniform", 52 | "_value": [1, 3, 1] 53 | }, 54 | "hidden": { 55 | "_type": "quniform", 56 | "_value": [4, 7, 1] 57 | }, 58 | "lr":{ 59 | "_type": "choice", 60 | "_value": [0.005] 61 | }, 62 | 'use_linear': { 63 | "_type":"choice", 64 | "_value":[True, False] 65 | } 66 | 67 | } 68 | self.tuner.update_search_space(search_space) 69 | 70 | 71 | def init_model(self, n_class, feature_num): 72 | num_layers = int(self.hyperparameters['num_layers']) 73 | hidden_size = int(2 ** self.hyperparameters['hidden']) 74 | lr = self.hyperparameters['lr'] 75 | if self.hyperparameters['use_linear']: 76 | self.input_lin = Linear(feature_num, hidden_size) 77 | self.convs = torch.nn.ModuleList() 78 | for i in range(num_layers): 79 | self.convs.append(GraphConv(hidden_size, hidden_size)) 80 | self.output_lin = Linear(hidden_size, n_class) 81 | 82 | else: 83 | if num_layers == 1: 84 | self.conv1 = GraphConv(in_channels=feature_num, out_channels=n_class) 85 | else: 86 | self.conv1 = GraphConv(in_channels=feature_num, out_channels=hidden_size) 87 | self.convs = torch.nn.ModuleList() 88 | for i in range(num_layers - 2): 89 | self.convs.append(GraphConv(in_channels=hidden_size, out_channels=hidden_size)) 90 | self.conv2 = GraphConv(hidden_size, n_class) 91 | self.optimizer = torch.optim.Adam(self.parameters(), lr=lr, weight_decay=5e-4) 92 | 93 | self = self.to('cuda') 94 | 95 | torch.cuda.empty_cache() 96 | 97 | def forward(self, data): 98 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 99 | if self.hyperparameters['dropedge_rate'] is not None: 100 | edge_index, edge_weight = dropout_adj(edge_index, edge_weight, p=self.hyperparameters['dropedge_rate'],\ 101 | force_undirected=False, num_nodes=None, training=self.training) 102 | 103 | if self.hyperparameters['use_linear']: 104 | x = F.relu(self.input_lin(x)) 105 | else: 106 | x = F.relu(self.conv1(x, edge_index,edge_weight)) 107 | if self.hyperparameters['num_layers'] == 1: 108 | return x 109 | x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training) 110 | for conv in self.convs: 111 | x = F.relu(conv(x, edge_index, edge_weight=edge_weight)) 112 | x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training) 113 | if self.hyperparameters['use_linear']: 114 | x = self.output_lin(x) 115 | else: 116 | x = self.conv2(x, edge_index,edge_weight) 117 | return x 118 | 119 | def trial(self, data, round_num): 120 | n_class, feature_num = self.info['n_class'], data.x.shape[1] 121 | if round_num >= 2: 122 | self.hyperparameters = self.tuner.generate_parameters(round_num-1) 123 | print(self.hyperparameters) 124 | 125 | while True: 126 | try: 127 | self.init_model(n_class, feature_num) 128 | val_score = self.train_valid(data, round_num) 129 | if round_num > 1: 130 | self.tuner.receive_trial_result(round_num-1,self.hyperparameters,val_score) 131 | if val_score > self.best_score: 132 | self.best_hp = copy.deepcopy(self.hyperparameters) 133 | break 134 | except RuntimeError as e: 135 | print(self.name,e, 'OOM with Hidden Size', self.hyperparameters['hidden']) 136 | if round_num > 1: 137 | self.tuner.receive_trial_result(round_num-1,self.hyperparameters,0) 138 | return 0 139 | print("Best Hyperparameters of ", self.name, self.best_hp) 140 | return val_score 141 | 142 | def train_valid(self, data, round_num): 143 | y, train_mask, valid_mask, test_mask, label_weights = data.y, data.train_mask, data.valid_mask, data.test_mask, data.label_weights 144 | 145 | patience = self.max_patience 146 | best_valid_score = 0 147 | valid_acc_meter = AverageMeter() 148 | for epoch in range(self.max_epochs): 149 | 150 | # train 151 | self.train() 152 | self.optimizer.zero_grad() 153 | preds = self.forward(data) 154 | loss = F.cross_entropy(preds[train_mask], y[train_mask], label_weights) 155 | loss.backward() 156 | self.optimizer.step() 157 | 158 | # valid 159 | self.eval() 160 | with torch.no_grad(): 161 | preds = F.softmax(self.forward(data), dim=-1) 162 | valid_preds, test_preds = preds[valid_mask], preds[test_mask] 163 | valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro') 164 | valid_acc_meter.update(valid_score) 165 | 166 | # patience 167 | if valid_acc_meter.avg > best_valid_score: 168 | best_valid_score = valid_acc_meter.avg 169 | self.current_round_best_preds = test_preds 170 | patience = self.max_patience 171 | else: 172 | patience -= 1 173 | 174 | if patience == 0: 175 | break 176 | 177 | return best_valid_score 178 | 179 | def epoch_train(self, data, run_num, info, time_remain): 180 | y, train_mask = data.y, data.train_mask 181 | self.train() 182 | self.optimizer.zero_grad() 183 | preds = self.forward(data) 184 | loss = F.cross_entropy(preds[train_mask], y[train_mask]) 185 | loss.backward() 186 | self.optimizer.step() 187 | 188 | def epoch_valid(self, data): 189 | y, valid_mask, test_mask = data.y, data.valid_mask, data.test_mask 190 | self.eval() 191 | 192 | with torch.no_grad(): 193 | preds = F.softmax(self.forward(data), dim=-1) 194 | valid_preds, test_preds = preds[valid_mask], preds[test_mask] 195 | self.current_preds = test_preds 196 | valid_score = f1_score(y[valid_mask].flatten().cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro') 197 | 198 | return valid_score 199 | 200 | def predict(self): 201 | return self.current_round_best_preds.cpu().numpy() 202 | 203 | def __repr__(self): 204 | return self.__class__.__name__ -------------------------------------------------------------------------------- /code_submission/model_lib/graphsage.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.nn import Linear 4 | from torch_geometric.nn import SAGEConv 5 | import copy 6 | from sklearn.metrics import f1_score 7 | from utils.tools import fix_seed, AverageMeter 8 | from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner 9 | from torch_geometric.utils import dropout_adj 10 | import random 11 | 12 | fix_seed(1234) 13 | class GraphSAGE(torch.nn.Module): 14 | 15 | def __init__(self, info): 16 | super(GraphSAGE, self).__init__() 17 | 18 | self.info = info 19 | self.hyperparameters = { 20 | 'num_layers': self.info['num_layers'], 21 | 'lr':self.info['lr'], 22 | 'dropedge_rate':self.info['dropedge_rate'], 23 | 'dropout_rate':self.info['dropout_rate'], 24 | 'hidden': self.info['init_hidden_size'], 25 | 'use_linear':self.info['use_linear'] 26 | } 27 | 28 | self.best_score = 0 29 | self.hist_score = [] 30 | 31 | self.best_preds = None 32 | self.current_round_best_preds = None 33 | self.best_valid_score = 0 34 | self.max_patience = 100 35 | self.max_epochs = 1600 36 | 37 | self.name = 'GraphSAGE' 38 | 39 | self.best_hp = None 40 | self.tuner = HyperoptTuner(algorithm_name='tpe', optimize_mode='maximize') 41 | search_space = { 42 | "dropedge_rate": { 43 | "_type": "choice", 44 | "_value": [self.info['dropedge_rate']] 45 | }, 46 | "dropout_rate": { 47 | "_type": "choice", 48 | "_value": [self.info['dropout_rate']] 49 | }, 50 | "num_layers": { 51 | "_type": "quniform", 52 | "_value": [1, 3, 1] 53 | }, 54 | "hidden": { 55 | "_type": "quniform", 56 | "_value": [4, 7, 1] 57 | }, 58 | "lr":{ 59 | "_type": "choice", 60 | "_value": [0.005] 61 | }, 62 | "use_linear":{ 63 | "_type": "choice", 64 | "_value": [True, False] 65 | } 66 | 67 | } 68 | self.tuner.update_search_space(search_space) 69 | 70 | def init_model(self, n_class, feature_num): 71 | num_layers = int(self.hyperparameters['num_layers']) 72 | hidden_size = int(2 ** self.hyperparameters['hidden']) 73 | lr = self.hyperparameters['lr'] 74 | if self.hyperparameters['use_linear']: 75 | self.input_lin = Linear(feature_num, hidden_size) 76 | self.convs = torch.nn.ModuleList() 77 | for i in range(num_layers): 78 | self.convs.append(SAGEConv(hidden_size, hidden_size,normalize=True)) 79 | self.output_lin = Linear(hidden_size, n_class) 80 | else: 81 | if num_layers == 1: 82 | self.conv1 = SAGEConv(in_channels=feature_num, out_channels=n_class,normalize=True) 83 | else: 84 | self.conv1 = SAGEConv(in_channels=feature_num, out_channels=hidden_size,normalize=True) 85 | self.convs = torch.nn.ModuleList() 86 | for i in range(num_layers - 2): 87 | self.convs.append(SAGEConv(in_channels=hidden_size, out_channels=hidden_size)) 88 | self.conv2 = SAGEConv(in_channels=hidden_size, out_channels=n_class,normalize=True) 89 | 90 | self.optimizer = torch.optim.Adam(self.parameters(), lr=lr, weight_decay=5e-4) 91 | 92 | self = self.to('cuda') 93 | 94 | torch.cuda.empty_cache() 95 | 96 | def forward(self, data): 97 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 98 | if self.hyperparameters['dropedge_rate'] is not None: 99 | edge_index, edge_weight = dropout_adj(edge_index, edge_weight, p=self.hyperparameters['dropedge_rate'],\ 100 | force_undirected=False, num_nodes=None, training=self.training) 101 | 102 | if self.hyperparameters['use_linear']: 103 | x = F.relu(self.input_lin(x)) 104 | else: 105 | x = F.relu(self.conv1(x, edge_index,edge_weight)) 106 | if self.hyperparameters['num_layers'] == 1: 107 | return x 108 | x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training) 109 | for conv in self.convs: 110 | x = F.relu(conv(x, edge_index, edge_weight=edge_weight)) 111 | x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training) 112 | if self.hyperparameters['use_linear']: 113 | x = self.output_lin(x) 114 | else: 115 | x = self.conv2(x, edge_index,edge_weight) 116 | return x 117 | 118 | def trial(self, data, round_num): 119 | n_class, feature_num = self.info['n_class'], data.x.shape[1] 120 | if round_num >= 2: 121 | self.hyperparameters = self.tuner.generate_parameters(round_num-1) 122 | print(self.hyperparameters) 123 | 124 | while True: 125 | try: 126 | self.init_model(n_class, feature_num) 127 | val_score = self.train_valid(data, round_num) 128 | if round_num > 1: 129 | self.tuner.receive_trial_result(round_num-1,self.hyperparameters,val_score) 130 | if val_score > self.best_score: 131 | self.best_hp = copy.deepcopy(self.hyperparameters) 132 | break 133 | except RuntimeError as e: 134 | print(self.name,e, 'OOM with Hidden Size', self.hyperparameters['hidden']) 135 | if round_num > 1: 136 | self.tuner.receive_trial_result(round_num-1,self.hyperparameters,0) 137 | return 0 138 | print("Best Hyperparameters of", self.name, self.best_hp) 139 | return val_score 140 | 141 | def train_valid(self, data, round_num): 142 | y, train_mask, valid_mask, test_mask, label_weights = data.y, data.train_mask, data.valid_mask, data.test_mask, data.label_weights 143 | 144 | patience = self.max_patience 145 | best_valid_score = 0 146 | valid_acc_meter = AverageMeter() 147 | for epoch in range(self.max_epochs): 148 | 149 | # train 150 | self.train() 151 | self.optimizer.zero_grad() 152 | preds = self.forward(data) 153 | loss = F.cross_entropy(preds[train_mask], y[train_mask], label_weights) 154 | loss.backward() 155 | self.optimizer.step() 156 | 157 | # valid 158 | self.eval() 159 | with torch.no_grad(): 160 | preds = F.softmax(self.forward(data), dim=-1) 161 | valid_preds, test_preds = preds[valid_mask], preds[test_mask] 162 | valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro') 163 | valid_acc_meter.update(valid_score) 164 | # patience 165 | if valid_acc_meter.avg > best_valid_score: 166 | best_valid_score = valid_acc_meter.avg 167 | self.current_round_best_preds = test_preds 168 | patience = self.max_patience 169 | else: 170 | patience -= 1 171 | 172 | if patience == 0: 173 | break 174 | 175 | return best_valid_score 176 | 177 | def predict(self): 178 | return self.current_round_best_preds.cpu().numpy() 179 | 180 | def __repr__(self): 181 | return self.__class__.__name__ -------------------------------------------------------------------------------- /code_submission/model_lib/incepgcn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.nn import Linear 4 | import torch.nn as nn 5 | from torch_geometric.nn import GCNConv 6 | from sklearn.metrics import f1_score 7 | from utils.tools import fix_seed, AverageMeter 8 | from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner 9 | from torch_geometric.utils import dropout_adj 10 | import copy 11 | fix_seed(1234) 12 | 13 | class GraphBaseBlock(torch.nn.Module): 14 | """ 15 | The base block for Multi-layer GCN / ResGCN / Dense GCN 16 | """ 17 | 18 | def __init__(self, in_features, out_features, nbaselayer, 19 | withbn=True, withloop=True, activation=F.relu, dropout=0.5, 20 | aggrmethod="concat", dense=False): 21 | """ 22 | The base block for constructing DeepGCN model. 23 | :param in_features: the input feature dimension. 24 | :param out_features: the hidden feature dimension. 25 | :param nbaselayer: the number of layers in the base block. 26 | :param withbn: using batch normalization in graph convolution. 27 | :param withloop: using self feature modeling in graph convolution. 28 | :param activation: the activation function, default is ReLu. 29 | :param dropout: the dropout ratio. 30 | :param aggrmethod: the aggregation function for baseblock, can be "concat" and "add". For "resgcn", the default 31 | is "add", for others the default is "concat". 32 | :param dense: enable dense connection 33 | """ 34 | super(GraphBaseBlock, self).__init__() 35 | self.in_features = in_features 36 | self.hiddendim = out_features 37 | self.nhiddenlayer = nbaselayer 38 | self.activation = activation 39 | self.aggrmethod = aggrmethod 40 | self.dense = dense 41 | self.dropout = dropout 42 | 43 | self.hiddenlayers = nn.ModuleList() 44 | self.__makehidden() 45 | 46 | if self.aggrmethod == "concat" and dense == False: 47 | self.out_features = in_features + out_features 48 | elif self.aggrmethod == "concat" and dense == True: 49 | self.out_features = in_features + out_features * nbaselayer 50 | elif self.aggrmethod == "add": 51 | if in_features != self.hiddendim: 52 | raise RuntimeError("The dimension of in_features and hiddendim should be matched in add model.") 53 | self.out_features = out_features 54 | elif self.aggrmethod == "nores": 55 | self.out_features = out_features 56 | else: 57 | raise NotImplementedError("The aggregation method only support 'concat','add' and 'nores'.") 58 | 59 | def __makehidden(self): 60 | for i in range(self.nhiddenlayer): 61 | if i == 0: 62 | layer = GCNConv(self.in_features, self.hiddendim) 63 | else: 64 | layer = GCNConv(self.hiddendim, self.hiddendim) 65 | self.hiddenlayers.append(layer) 66 | 67 | def _doconcat(self, x, subx): 68 | if x is None: 69 | return subx 70 | if self.aggrmethod == "concat": 71 | return torch.cat((x, subx), 1) 72 | elif self.aggrmethod == "add": 73 | return x + subx 74 | elif self.aggrmethod == "nores": 75 | return x 76 | 77 | def forward(self, input, edge_index, edge_weight): 78 | x = input 79 | denseout = None 80 | # Here out is the result in all levels. 81 | for gc in self.hiddenlayers: 82 | denseout = self._doconcat(denseout, x) 83 | x = self.activation(gc(x, edge_index, edge_weight)) 84 | x = F.dropout(x, self.dropout, training=self.training) 85 | 86 | if not self.dense: 87 | return self._doconcat(x, input) 88 | return self._doconcat(x, denseout) 89 | 90 | def get_outdim(self): 91 | return self.out_features 92 | 93 | def __repr__(self): 94 | return "%s %s (%d - [%d:%d] > %d)" % (self.__class__.__name__, 95 | self.aggrmethod, 96 | self.in_features, 97 | self.hiddendim, 98 | self.nhiddenlayer, 99 | self.out_features) 100 | 101 | class InceptionGCNBlock(torch.nn.Module): 102 | """ 103 | The multiple layer GCN with inception connection block. 104 | """ 105 | 106 | def __init__(self, in_features, out_features, nbaselayer, 107 | dropout=0.5, aggrmethod="concat", dense=False): 108 | """ 109 | The multiple layer GCN with inception connection block. 110 | :param in_features: the input feature dimension. 111 | :param out_features: the hidden feature dimension. 112 | :param nbaselayer: the number of layers in the base block. 113 | :param withbn: using batch normalization in graph convolution. 114 | :param withloop: using self feature modeling in graph convolution. 115 | :param activation: the activation function, default is ReLu. 116 | :param dropout: the dropout ratio. 117 | :param aggrmethod: the aggregation function for baseblock, can be "concat" and "add". For "resgcn", the default 118 | is "add", for others the default is "concat". 119 | :param dense: not applied. The default is False, cannot be changed. 120 | """ 121 | super(InceptionGCNBlock, self).__init__() 122 | self.in_features = in_features 123 | self.out_features = out_features 124 | self.hiddendim = out_features 125 | self.nbaselayer = nbaselayer 126 | self.aggrmethod = aggrmethod 127 | self.dropout = dropout 128 | self.midlayers = torch.nn.ModuleList() 129 | self.__makehidden() 130 | 131 | if self.aggrmethod == "concat": 132 | self.out_features = in_features + out_features * nbaselayer 133 | elif self.aggrmethod == "add": 134 | if in_features != self.hiddendim: 135 | raise RuntimeError("The dimension of in_features and hiddendim should be matched in 'add' model.") 136 | self.out_features = out_features 137 | else: 138 | raise NotImplementedError("The aggregation method only support 'concat', 'add'.") 139 | 140 | def __makehidden(self): 141 | for j in range(self.nbaselayer): 142 | reslayer = torch.nn.ModuleList() 143 | for i in range(j + 1): 144 | if i == 0: 145 | layer = GCNConv(self.in_features, self.hiddendim) 146 | else: 147 | layer = GCNConv(self.hiddendim, self.hiddendim) 148 | reslayer.append(layer) 149 | self.midlayers.append(reslayer) 150 | 151 | def forward(self, input, edge_index, edge_weight): 152 | x = input 153 | for reslayer in self.midlayers: 154 | subx = input 155 | for gc in reslayer: 156 | subx = gc(subx, edge_index, edge_weight) 157 | subx = F.dropout(subx, p=self.dropout, training=self.training) 158 | x = self._doconcat(x, subx) 159 | return x 160 | 161 | def get_outdim(self): 162 | return self.out_features 163 | 164 | def _doconcat(self, x, subx): 165 | if self.aggrmethod == "concat": 166 | return torch.cat((x, subx), 1) 167 | elif self.aggrmethod == "add": 168 | return x + subx 169 | 170 | def __repr__(self): 171 | return "%s %s (%d - [%d:%d] > %d)" % (self.__class__.__name__, 172 | self.aggrmethod, 173 | self.in_features, 174 | self.hiddendim, 175 | self.nbaselayer, 176 | self.out_features) 177 | 178 | class IncepGCN(torch.nn.Module): 179 | 180 | def __init__(self, info): 181 | super(IncepGCN, self).__init__() 182 | self.info = info 183 | self.best_score = 0 184 | self.hist_score = [] 185 | 186 | self.best_preds = None 187 | self.current_round_best_preds = None 188 | self.best_valid_score = 0 189 | self.max_patience = 100 190 | self.max_epochs = 1600 191 | 192 | self.name = 'IncepGCN' 193 | 194 | self.tuner = HyperoptTuner(algorithm_name='tpe', optimize_mode='maximize') 195 | search_space = { 196 | "dropedge_rate": { 197 | "_type": "choice", 198 | "_value": [self.info['dropedge_rate']] 199 | }, 200 | "dropout_rate": { 201 | "_type": "choice", 202 | "_value": [self.info['dropout_rate']] 203 | }, 204 | "num_layers": { 205 | "_type": "quniform", 206 | "_value": [2, 4, 1] 207 | }, 208 | "hidden": { 209 | "_type": "quniform", 210 | "_value": [4, 7, 1] 211 | }, 212 | "lr":{ 213 | "_type": "choice", 214 | "_value": [0.005] 215 | } 216 | } 217 | self.tuner.update_search_space(search_space) 218 | self.hyperparameters = { 219 | 'num_layers': self.info['num_layers'], 220 | 'lr': self.info['lr'], 221 | 'dropedge_rate':self.info['dropedge_rate'], 222 | 'dropout_rate':self.info['dropout_rate'], 223 | 'hidden': self.info['init_hidden_size'] 224 | } 225 | self.best_hp = None 226 | 227 | def init_model(self, n_class, features_num): 228 | hidden = int(2 ** self.hyperparameters['hidden']) 229 | num_layers = int(self.hyperparameters['num_layers']) 230 | self.in_lin = nn.Linear(features_num, hidden) 231 | self.incep_conv = InceptionGCNBlock(hidden, hidden, nbaselayer=num_layers, dropout=self.hyperparameters['dropout_rate']) 232 | self.out_lin = nn.Linear(self.incep_conv.get_outdim(), n_class) 233 | 234 | self.optimizer = torch.optim.Adam(self.parameters(), lr=self.hyperparameters['lr'], weight_decay=5e-4) 235 | 236 | self = self.to('cuda') 237 | 238 | torch.cuda.empty_cache() 239 | 240 | def forward(self, data): 241 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 242 | if self.hyperparameters['dropedge_rate'] is not None: 243 | edge_index, edge_weight = dropout_adj(edge_index, edge_weight, p=self.hyperparameters['dropedge_rate'],\ 244 | force_undirected=False, num_nodes=None, training=self.training) 245 | x = self.in_lin(x) 246 | x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training) 247 | x = self.incep_conv(x, edge_index, edge_weight) 248 | x = self.out_lin(x) 249 | return x 250 | 251 | def trial(self, data, round_num): 252 | n_class, feature_num = self.info['n_class'], data.x.shape[1] 253 | if round_num >= 2: 254 | self.hyperparameters = self.tuner.generate_parameters(round_num-1) 255 | print(self.hyperparameters) 256 | 257 | while True: 258 | try: 259 | self.init_model(n_class, feature_num) 260 | val_score = self.train_valid(data, round_num) 261 | if round_num > 1: 262 | self.tuner.receive_trial_result(round_num-1,self.hyperparameters,val_score) 263 | if val_score > self.best_score: 264 | self.best_hp = copy.deepcopy(self.hyperparameters) 265 | break 266 | except RuntimeError as e: 267 | print(self.name,e, 'OOM with Hidden Size', self.hyperparameters['hidden']) 268 | if round_num > 1: 269 | self.tuner.receive_trial_result(round_num-1,self.hyperparameters,0) 270 | return 0 271 | print("Best Hyperparameters of", self.name, self.best_hp) 272 | return val_score 273 | 274 | 275 | def train_valid(self, data, round_num): 276 | y, train_mask, valid_mask, test_mask, label_weights = data.y, data.train_mask, data.valid_mask, data.test_mask, data.label_weights 277 | 278 | score_meter = AverageMeter() 279 | patience = self.max_patience 280 | best_valid_score = 0 281 | for epoch in range(self.max_epochs): 282 | 283 | # train 284 | self.train() 285 | self.optimizer.zero_grad() 286 | preds = self.forward(data) 287 | loss = F.cross_entropy(preds[train_mask], y[train_mask], label_weights) 288 | loss.backward() 289 | self.optimizer.step() 290 | 291 | # valid 292 | self.eval() 293 | with torch.no_grad(): 294 | preds = F.softmax(self.forward(data), dim=-1) 295 | valid_preds, test_preds = preds[valid_mask], preds[test_mask] 296 | valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro') 297 | 298 | score_meter.update(valid_score) 299 | 300 | # patience 301 | if score_meter.avg > best_valid_score: 302 | best_valid_score = score_meter.avg 303 | self.current_round_best_preds = test_preds 304 | patience = self.max_patience 305 | else: 306 | patience -= 1 307 | 308 | if patience == 0: 309 | break 310 | 311 | return best_valid_score 312 | 313 | def predict(self): 314 | if self.current_round_best_preds is not None: 315 | return self.current_round_best_preds.cpu().numpy() 316 | else: 317 | return None 318 | 319 | def __repr__(self): 320 | return self.__class__.__name__ -------------------------------------------------------------------------------- /code_submission/model_lib/jkgcn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.nn import Linear 4 | import torch.nn as nn 5 | from torch_geometric.nn import GCNConv 6 | from sklearn.metrics import f1_score 7 | from utils.tools import fix_seed, AverageMeter 8 | from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner 9 | from torch_geometric.utils import dropout_adj 10 | import copy 11 | fix_seed(1234) 12 | 13 | class GraphBaseBlock(torch.nn.Module): 14 | """ 15 | The base block for Multi-layer GCN / ResGCN / Dense GCN 16 | """ 17 | 18 | def __init__(self, in_features, out_features, nbaselayer, 19 | withbn=True, withloop=True, activation=F.relu, dropout=0.5, 20 | aggrmethod="concat", dense=False): 21 | """ 22 | The base block for constructing DeepGCN model. 23 | :param in_features: the input feature dimension. 24 | :param out_features: the hidden feature dimension. 25 | :param nbaselayer: the number of layers in the base block. 26 | :param withbn: using batch normalization in graph convolution. 27 | :param withloop: using self feature modeling in graph convolution. 28 | :param activation: the activation function, default is ReLu. 29 | :param dropout: the dropout ratio. 30 | :param aggrmethod: the aggregation function for baseblock, can be "concat" and "add". For "resgcn", the default 31 | is "add", for others the default is "concat". 32 | :param dense: enable dense connection 33 | """ 34 | super(GraphBaseBlock, self).__init__() 35 | self.in_features = in_features 36 | self.hiddendim = out_features 37 | self.nhiddenlayer = nbaselayer 38 | self.activation = activation 39 | self.aggrmethod = aggrmethod 40 | self.dense = dense 41 | self.dropout = dropout 42 | self.hiddenlayers = nn.ModuleList() 43 | self.__makehidden() 44 | 45 | if self.aggrmethod == "concat" and dense == False: 46 | self.out_features = in_features + out_features 47 | elif self.aggrmethod == "concat" and dense == True: 48 | self.out_features = in_features + out_features * nbaselayer 49 | elif self.aggrmethod == "add": 50 | if in_features != self.hiddendim: 51 | raise RuntimeError("The dimension of in_features and hiddendim should be matched in add model.") 52 | self.out_features = out_features 53 | elif self.aggrmethod == "nores": 54 | self.out_features = out_features 55 | else: 56 | raise NotImplementedError("The aggregation method only support 'concat','add' and 'nores'.") 57 | 58 | def __makehidden(self): 59 | for i in range(self.nhiddenlayer): 60 | if i == 0: 61 | layer = GCNConv(self.in_features, self.hiddendim) 62 | else: 63 | layer = GCNConv(self.hiddendim, self.hiddendim) 64 | self.hiddenlayers.append(layer) 65 | 66 | def _doconcat(self, x, subx): 67 | if x is None: 68 | return subx 69 | if self.aggrmethod == "concat": 70 | return torch.cat((x, subx), 1) 71 | elif self.aggrmethod == "add": 72 | return x + subx 73 | elif self.aggrmethod == "nores": 74 | return x 75 | 76 | def forward(self, input, edge_index, edge_weight): 77 | x = input 78 | denseout = None 79 | # Here out is the result in all levels. 80 | for gc in self.hiddenlayers: 81 | denseout = self._doconcat(denseout, x) 82 | x = self.activation(gc(x, edge_index, edge_weight)) 83 | x = F.dropout(x, self.dropout, training=self.training) 84 | 85 | if not self.dense: 86 | return self._doconcat(x, input) 87 | return self._doconcat(x, denseout) 88 | 89 | def get_outdim(self): 90 | return self.out_features 91 | 92 | def __repr__(self): 93 | return "%s %s (%d - [%d:%d] > %d)" % (self.__class__.__name__, 94 | self.aggrmethod, 95 | self.in_features, 96 | self.hiddendim, 97 | self.nhiddenlayer, 98 | self.out_features) 99 | 100 | class DenseGCNBlock(torch.nn.Module): 101 | """ 102 | The multiple layer GCN with dense connection block. 103 | """ 104 | 105 | def __init__(self, in_features, out_features, nbaselayer, 106 | withbn=True, withloop=True, activation=F.relu, dropout=True, 107 | aggrmethod="concat", dense=True): 108 | """ 109 | The multiple layer GCN with dense connection block. 110 | :param in_features: the input feature dimension. 111 | :param out_features: the hidden feature dimension. 112 | :param nbaselayer: the number of layers in the base block. 113 | :param withbn: using batch normalization in graph convolution. 114 | :param withloop: using self feature modeling in graph convolution. 115 | :param activation: the activation function, default is ReLu. 116 | :param dropout: the dropout ratio. 117 | :param aggrmethod: the aggregation function for the output. For denseblock, default is "concat". 118 | :param dense: default is True, cannot be changed. 119 | """ 120 | super(DenseGCNBlock, self).__init__() 121 | self.model = GraphBaseBlock(in_features=in_features, 122 | out_features=out_features, 123 | nbaselayer=nbaselayer, 124 | withbn=withbn, 125 | withloop=withloop, 126 | activation=activation, 127 | dropout=dropout, 128 | dense=True, 129 | aggrmethod=aggrmethod) 130 | 131 | def forward(self, input, edge_index, edge_weight): 132 | return self.model.forward(input, edge_index, edge_weight) 133 | 134 | def get_outdim(self): 135 | return self.model.get_outdim() 136 | 137 | def __repr__(self): 138 | return "%s %s (%d - [%d:%d] > %d)" % (self.__class__.__name__, 139 | self.aggrmethod, 140 | self.model.in_features, 141 | self.model.hiddendim, 142 | self.model.nhiddenlayer, 143 | self.model.out_features) 144 | 145 | class JKGCN(torch.nn.Module): 146 | 147 | def __init__(self, info): 148 | super(JKGCN, self).__init__() 149 | self.info = info 150 | self.best_score = 0 151 | self.hist_score = [] 152 | 153 | self.best_preds = None 154 | self.current_round_best_preds = None 155 | self.best_valid_score = 0 156 | self.max_patience = 100 157 | self.max_epochs = 1600 158 | 159 | self.name = 'JKGCN' 160 | 161 | self.tuner = HyperoptTuner(algorithm_name='tpe', optimize_mode='maximize') 162 | search_space = { 163 | "dropedge_rate": { 164 | "_type": "choice", 165 | "_value": [self.info['dropedge_rate']] 166 | }, 167 | "dropout_rate": { 168 | "_type": "choice", 169 | "_value": [self.info['dropout_rate']] 170 | }, 171 | "num_layers": { 172 | "_type": "quniform", 173 | "_value": [2, 4, 1] 174 | }, 175 | "hidden": { 176 | "_type": "quniform", 177 | "_value": [4, 7, 1] 178 | }, 179 | "lr":{ 180 | "_type": "choice", 181 | "_value": [0.005] 182 | } 183 | } 184 | self.tuner.update_search_space(search_space) 185 | self.hyperparameters = { 186 | 'num_layers': self.info['num_layers'], 187 | 'lr': 0.005, 188 | 'dropedge_rate':self.info['dropedge_rate'], 189 | 'dropout_rate':0.5, 190 | 'hidden': self.info['init_hidden_size'] 191 | } 192 | self.best_hp = None 193 | 194 | def init_model(self, n_class, features_num): 195 | hidden = int(2 ** self.hyperparameters['hidden']) 196 | num_layers = int(self.hyperparameters['num_layers']) 197 | self.in_lin = nn.Linear(features_num, hidden) 198 | self.jk_conv = DenseGCNBlock(hidden, hidden, nbaselayer=num_layers, dropout=self.hyperparameters['dropout_rate']) 199 | self.out_lin = nn.Linear(self.jk_conv.get_outdim(), n_class) 200 | 201 | self.optimizer = torch.optim.Adam(self.parameters(), lr=self.hyperparameters['lr'], weight_decay=5e-4) 202 | 203 | self = self.to('cuda') 204 | 205 | torch.cuda.empty_cache() 206 | 207 | def forward(self, data): 208 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 209 | if self.hyperparameters['dropedge_rate'] is not None: 210 | edge_index, edge_weight = dropout_adj(edge_index, edge_weight, p=self.hyperparameters['dropedge_rate'],\ 211 | force_undirected=False, num_nodes=None, training=self.training) 212 | x = self.in_lin(x) 213 | x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training) 214 | x = self.jk_conv(x, edge_index, edge_weight) 215 | x = self.out_lin(x) 216 | return x 217 | 218 | def trial(self, data, round_num): 219 | n_class, feature_num = self.info['n_class'], data.x.shape[1] 220 | if round_num >= 2: 221 | self.hyperparameters = self.tuner.generate_parameters(round_num-1) 222 | print(self.hyperparameters) 223 | 224 | while True: 225 | try: 226 | self.init_model(n_class, feature_num) 227 | val_score = self.train_valid(data, round_num) 228 | if round_num > 1: 229 | self.tuner.receive_trial_result(round_num-1,self.hyperparameters,val_score) 230 | if val_score > self.best_score: 231 | self.best_hp = copy.deepcopy(self.hyperparameters) 232 | break 233 | except RuntimeError as e: 234 | print(self.name,e, 'OOM with Hidden Size', self.hyperparameters['hidden']) 235 | if round_num > 1: 236 | self.tuner.receive_trial_result(round_num-1,self.hyperparameters,0) 237 | return 0 238 | print("Best Hyperparameters of", self.name, self.best_hp) 239 | return val_score 240 | 241 | def train_valid(self, data, round_num): 242 | y, train_mask, valid_mask, test_mask, label_weights = data.y, data.train_mask, data.valid_mask, data.test_mask, data.label_weights 243 | 244 | score_meter = AverageMeter() 245 | patience = self.max_patience 246 | best_valid_score = 0 247 | for epoch in range(self.max_epochs): 248 | 249 | # train 250 | self.train() 251 | self.optimizer.zero_grad() 252 | preds = self.forward(data) 253 | loss = F.cross_entropy(preds[train_mask], y[train_mask], label_weights) 254 | loss.backward() 255 | self.optimizer.step() 256 | 257 | # valid 258 | self.eval() 259 | with torch.no_grad(): 260 | preds = F.softmax(self.forward(data), dim=-1) 261 | valid_preds, test_preds = preds[valid_mask], preds[test_mask] 262 | valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro') 263 | 264 | score_meter.update(valid_score) 265 | 266 | # patience 267 | if score_meter.avg > best_valid_score: 268 | best_valid_score = score_meter.avg 269 | self.current_round_best_preds = test_preds 270 | patience = self.max_patience 271 | else: 272 | patience -= 1 273 | 274 | if patience == 0: 275 | break 276 | 277 | return best_valid_score 278 | 279 | def predict(self): 280 | if self.current_round_best_preds is not None: 281 | return self.current_round_best_preds.cpu().numpy() 282 | else: 283 | return None 284 | 285 | def __repr__(self): 286 | return self.__class__.__name__ 287 | -------------------------------------------------------------------------------- /code_submission/model_lib/resgcn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.nn import Linear 4 | import torch.nn as nn 5 | from torch_geometric.nn import GCNConv 6 | from sklearn.metrics import f1_score 7 | from utils.tools import fix_seed, AverageMeter 8 | from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner 9 | from torch_geometric.utils import dropout_adj 10 | import copy 11 | fix_seed(1234) 12 | 13 | class GraphBaseBlock(torch.nn.Module): 14 | """ 15 | The base block for Multi-layer GCN / ResGCN / Dense GCN 16 | """ 17 | 18 | def __init__(self, in_features, out_features, nbaselayer, 19 | withbn=True, withloop=True, activation=F.relu, dropout=0.5, 20 | aggrmethod="concat", dense=False): 21 | """ 22 | The base block for constructing DeepGCN model. 23 | :param in_features: the input feature dimension. 24 | :param out_features: the hidden feature dimension. 25 | :param nbaselayer: the number of layers in the base block. 26 | :param withbn: using batch normalization in graph convolution. 27 | :param withloop: using self feature modeling in graph convolution. 28 | :param activation: the activation function, default is ReLu. 29 | :param dropout: the dropout ratio. 30 | :param aggrmethod: the aggregation function for baseblock, can be "concat" and "add". For "resgcn", the default 31 | is "add", for others the default is "concat". 32 | :param dense: enable dense connection 33 | """ 34 | super(GraphBaseBlock, self).__init__() 35 | self.in_features = in_features 36 | self.hiddendim = out_features 37 | self.nhiddenlayer = nbaselayer 38 | self.activation = activation 39 | self.aggrmethod = aggrmethod 40 | self.dense = dense 41 | self.dropout = dropout 42 | self.hiddenlayers = nn.ModuleList() 43 | self.__makehidden() 44 | 45 | if self.aggrmethod == "concat" and dense == False: 46 | self.out_features = in_features + out_features 47 | elif self.aggrmethod == "concat" and dense == True: 48 | self.out_features = in_features + out_features * nbaselayer 49 | elif self.aggrmethod == "add": 50 | if in_features != self.hiddendim: 51 | raise RuntimeError("The dimension of in_features and hiddendim should be matched in add model.") 52 | self.out_features = out_features 53 | elif self.aggrmethod == "nores": 54 | self.out_features = out_features 55 | else: 56 | raise NotImplementedError("The aggregation method only support 'concat','add' and 'nores'.") 57 | 58 | def __makehidden(self): 59 | for i in range(self.nhiddenlayer): 60 | if i == 0: 61 | layer = GCNConv(self.in_features, self.hiddendim) 62 | else: 63 | layer = GCNConv(self.hiddendim, self.hiddendim) 64 | self.hiddenlayers.append(layer) 65 | 66 | def _doconcat(self, x, subx): 67 | if x is None: 68 | return subx 69 | if self.aggrmethod == "concat": 70 | return torch.cat((x, subx), 1) 71 | elif self.aggrmethod == "add": 72 | return x + subx 73 | elif self.aggrmethod == "nores": 74 | return x 75 | 76 | def forward(self, input, edge_index, edge_weight): 77 | x = input 78 | denseout = None 79 | # Here out is the result in all levels. 80 | for gc in self.hiddenlayers: 81 | denseout = self._doconcat(denseout, x) 82 | x = self.activation(gc(x, edge_index, edge_weight)) 83 | x = F.dropout(x, self.dropout, training=self.training) 84 | 85 | if not self.dense: 86 | return self._doconcat(x, input) 87 | return self._doconcat(x, denseout) 88 | 89 | def get_outdim(self): 90 | return self.out_features 91 | 92 | def __repr__(self): 93 | return "%s %s (%d - [%d:%d] > %d)" % (self.__class__.__name__, 94 | self.aggrmethod, 95 | self.in_features, 96 | self.hiddendim, 97 | self.nhiddenlayer, 98 | self.out_features) 99 | 100 | class ResGCNBlock(torch.nn.Module): 101 | """ 102 | The multiple layer GCN with residual connection block. 103 | """ 104 | 105 | def __init__(self, in_features, out_features, nbaselayer, 106 | withbn=True, withloop=True, activation=F.relu, dropout=True, 107 | aggrmethod=None, dense=None): 108 | """ 109 | The multiple layer GCN with residual connection block. 110 | :param in_features: the input feature dimension. 111 | :param out_features: the hidden feature dimension. 112 | :param nbaselayer: the number of layers in the base block. 113 | :param withbn: using batch normalization in graph convolution. 114 | :param withloop: using self feature modeling in graph convolution. 115 | :param activation: the activation function, default is ReLu. 116 | :param dropout: the dropout ratio. 117 | :param aggrmethod: not applied. 118 | :param dense: not applied. 119 | """ 120 | super(ResGCNBlock, self).__init__() 121 | self.model = GraphBaseBlock(in_features=in_features, 122 | out_features=out_features, 123 | nbaselayer=nbaselayer, 124 | withbn=withbn, 125 | withloop=withloop, 126 | activation=activation, 127 | dropout=dropout, 128 | dense=False, 129 | aggrmethod="add") 130 | 131 | def forward(self, input, edge_index, edge_weight): 132 | return self.model.forward(input, edge_index, edge_weight) 133 | 134 | def get_outdim(self): 135 | return self.model.get_outdim() 136 | 137 | def __repr__(self): 138 | return "%s %s (%d - [%d:%d] > %d)" % (self.__class__.__name__, 139 | self.aggrmethod, 140 | self.model.in_features, 141 | self.model.hiddendim, 142 | self.model.nhiddenlayer, 143 | self.model.out_features) 144 | 145 | class ResGCN(torch.nn.Module): 146 | 147 | def __init__(self, info): 148 | super(ResGCN, self).__init__() 149 | self.info = info 150 | self.best_score = 0 151 | self.hist_score = [] 152 | 153 | self.best_preds = None 154 | self.current_round_best_preds = None 155 | self.best_valid_score = 0 156 | self.max_patience = 100 157 | self.max_epochs = 1600 158 | 159 | self.name = 'ResGCN' 160 | 161 | self.tuner = HyperoptTuner(algorithm_name='tpe', optimize_mode='maximize') 162 | search_space = { 163 | "dropedge_rate": { 164 | "_type": "choice", 165 | "_value": [self.info['dropedge_rate']] 166 | }, 167 | "dropout_rate": { 168 | "_type": "choice", 169 | "_value": [self.info['dropout_rate']] 170 | }, 171 | "num_layers": { 172 | "_type": "quniform", 173 | "_value": [2, 4, 1] 174 | }, 175 | "hidden": { 176 | "_type": "quniform", 177 | "_value": [4, 7, 1] 178 | }, 179 | "lr":{ 180 | "_type": "choice", 181 | "_value": [0.005] 182 | } 183 | } 184 | self.tuner.update_search_space(search_space) 185 | self.hyperparameters = { 186 | 'num_layers': self.info['num_layers'], 187 | 'lr': 0.005, 188 | 'dropedge_rate':self.info['dropedge_rate'], 189 | 'dropout_rate':0.5, 190 | 'hidden': self.info['init_hidden_size'] 191 | } 192 | self.best_hp = { 193 | 'num_layers': self.info['num_layers'], 194 | 'lr': 0.005, 195 | 'dropedge_rate':self.info['dropedge_rate'], 196 | 'dropout_rate':0.5, 197 | 'hidden': self.info['init_hidden_size'] 198 | } 199 | 200 | def init_model(self, n_class, features_num): 201 | hidden = int(2 ** self.hyperparameters['hidden']) 202 | num_layers = int(self.hyperparameters['num_layers']) 203 | self.in_lin = nn.Linear(features_num, hidden) 204 | self.res_conv = ResGCNBlock(hidden, hidden, nbaselayer=num_layers, dropout=self.hyperparameters['dropout_rate']) 205 | self.out_lin = nn.Linear(self.res_conv.get_outdim(), n_class) 206 | 207 | self.optimizer = torch.optim.Adam(self.parameters(), lr=self.hyperparameters['lr'], weight_decay=5e-4) 208 | 209 | self = self.to('cuda') 210 | 211 | torch.cuda.empty_cache() 212 | 213 | def forward(self, data): 214 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 215 | if self.hyperparameters['dropedge_rate'] is not None: 216 | edge_index, edge_weight = dropout_adj(edge_index, edge_weight, p=self.hyperparameters['dropedge_rate'],\ 217 | force_undirected=False, num_nodes=None, training=self.training) 218 | x = self.in_lin(x) 219 | x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training) 220 | x = self.res_conv(x, edge_index, edge_weight) 221 | x = self.out_lin(x) 222 | return x 223 | 224 | def trial(self, data, round_num): 225 | n_class, feature_num = self.info['n_class'], data.x.shape[1] 226 | if round_num >= 2: 227 | self.hyperparameters = self.tuner.generate_parameters(round_num-1) 228 | print(self.hyperparameters) 229 | 230 | while True: 231 | try: 232 | self.init_model(n_class, feature_num) 233 | val_score = self.train_valid(data, round_num) 234 | if round_num > 1: 235 | self.tuner.receive_trial_result(round_num-1,self.hyperparameters,val_score) 236 | if val_score > self.best_score: 237 | self.best_hp = copy.deepcopy(self.hyperparameters) 238 | break 239 | except RuntimeError as e: 240 | print(self.name,e, 'OOM with Hidden Size', self.hyperparameters['hidden']) 241 | if round_num > 1: 242 | self.tuner.receive_trial_result(round_num-1,self.hyperparameters,0) 243 | return 0 244 | print("Best Hyperparameters of", self.name, self.best_hp) 245 | return val_score 246 | 247 | def train_valid(self, data, round_num): 248 | y, train_mask, valid_mask, test_mask, label_weights = data.y, data.train_mask, data.valid_mask, data.test_mask, data.label_weights 249 | 250 | score_meter = AverageMeter() 251 | patience = self.max_patience 252 | best_valid_score = 0 253 | for epoch in range(self.max_epochs): 254 | 255 | # train 256 | self.train() 257 | self.optimizer.zero_grad() 258 | preds = self.forward(data) 259 | loss = F.cross_entropy(preds[train_mask], y[train_mask], label_weights) 260 | loss.backward() 261 | self.optimizer.step() 262 | 263 | # valid 264 | self.eval() 265 | with torch.no_grad(): 266 | preds = F.softmax(self.forward(data), dim=-1) 267 | valid_preds, test_preds = preds[valid_mask], preds[test_mask] 268 | valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro') 269 | 270 | score_meter.update(valid_score) 271 | 272 | # patience 273 | if score_meter.avg > best_valid_score: 274 | best_valid_score = score_meter.avg 275 | self.current_round_best_preds = test_preds 276 | patience = self.max_patience 277 | else: 278 | patience -= 1 279 | 280 | if patience == 0: 281 | break 282 | 283 | return best_valid_score 284 | 285 | def predict(self): 286 | if self.current_round_best_preds is not None: 287 | return self.current_round_best_preds.cpu().numpy() 288 | else: 289 | return None 290 | 291 | def __repr__(self): 292 | return self.__class__.__name__ 293 | -------------------------------------------------------------------------------- /code_submission/model_lib/sg.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.nn import Linear 4 | from torch_geometric.nn import SGConv 5 | import copy 6 | from sklearn.metrics import f1_score 7 | from utils.tools import fix_seed,AverageMeter 8 | from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner 9 | from torch_geometric.utils import dropout_adj 10 | import random 11 | 12 | fix_seed(1234) 13 | class SG(torch.nn.Module): 14 | 15 | def __init__(self, info): 16 | super(SG, self).__init__() 17 | 18 | self.info = info 19 | self.best_score = 0 20 | self.hist_score = [] 21 | self.best_preds = None 22 | self.current_round_best_preds = None 23 | self.max_patience = 100 24 | self.max_epochs = 1600 25 | self.name = 'SG' 26 | 27 | self.hyperparameters = { 28 | 'num_layers': self.info['num_layers'], 29 | 'lr': 0.005, 30 | 'K': 3, 31 | 'dropedge_rate':self.info['dropedge_rate'], 32 | 'dropout_rate':0.5, 33 | 'hidden': self.info['init_hidden_size'], 34 | 'use_linear':self.info['use_linear'] 35 | } 36 | 37 | self.best_hp = None 38 | self.tuner = HyperoptTuner(algorithm_name='tpe', optimize_mode='maximize') 39 | search_space = { 40 | "dropedge_rate": { 41 | "_type": "choice", 42 | "_value": [self.info['dropedge_rate']] 43 | }, 44 | "dropout_rate": { 45 | "_type": "choice", 46 | "_value": [self.info['dropout_rate']] 47 | }, 48 | "num_layers": { 49 | "_type": "quniform", 50 | "_value": [1, 3, 1] 51 | }, 52 | "hidden": { 53 | "_type": "quniform", 54 | "_value": [4, 7, 1] 55 | }, 56 | "lr":{ 57 | "_type": "choice", 58 | "_value": [0.005] 59 | }, 60 | 'K' :{ 61 | "_type": "quniform", 62 | "_value": [1, 6, 1] 63 | }, 64 | "use_linear":{ 65 | "_type": "choice", 66 | "_value": [True,False] 67 | } 68 | } 69 | self.tuner.update_search_space(search_space) 70 | 71 | def init_model(self, n_class, feature_num): 72 | num_layers = int(self.hyperparameters['num_layers']) 73 | hidden_size = int(2 ** self.hyperparameters['hidden']) 74 | K = int(self.hyperparameters['K']) 75 | lr = self.hyperparameters['lr'] 76 | if self.hyperparameters['use_linear']: 77 | self.input_lin = Linear(feature_num, hidden_size) 78 | self.convs = torch.nn.ModuleList() 79 | for i in range(num_layers): 80 | self.convs.append(SGConv(in_channels=hidden_size, out_channels=hidden_size, K=K, cached=False)) 81 | self.output_lin = Linear(hidden_size, n_class) 82 | else: 83 | if num_layers == 1: 84 | self.conv1 = SGConv(in_channels=feature_num, out_channels=n_class, K=K, cached=False) 85 | else: 86 | self.conv1 = SGConv(in_channels=feature_num, out_channels=hidden_size, K=K, cached=False) 87 | self.convs = torch.nn.ModuleList() 88 | for i in range(num_layers - 2): 89 | self.convs.append(SGConv(in_channels=hidden_size, out_channels=hidden_size, K=K, cached=False)) 90 | self.conv2 = SGConv(in_channels=hidden_size, out_channels=n_class, K=K, cached=False) 91 | 92 | self.optimizer = torch.optim.Adam(self.parameters(), lr=lr, weight_decay=5e-4) 93 | 94 | self = self.to('cuda') 95 | 96 | torch.cuda.empty_cache() 97 | 98 | def forward(self, data): 99 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 100 | if self.hyperparameters['dropedge_rate'] is not None: 101 | edge_index, edge_weight = dropout_adj(edge_index, edge_weight, p=self.hyperparameters['dropedge_rate'],\ 102 | force_undirected=False, num_nodes=None, training=self.training) 103 | 104 | if self.hyperparameters['use_linear']: 105 | x = F.relu(self.input_lin(x)) 106 | else: 107 | x = F.relu(self.conv1(x, edge_index,edge_weight)) 108 | if self.hyperparameters['num_layers'] == 1: 109 | return x 110 | x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training) 111 | for conv in self.convs: 112 | x = F.relu(conv(x, edge_index, edge_weight=edge_weight)) 113 | x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training) 114 | if self.hyperparameters['use_linear']: 115 | x = self.output_lin(x) 116 | else: 117 | x = self.conv2(x, edge_index,edge_weight) 118 | return x 119 | 120 | def trial(self, data, round_num): 121 | n_class, feature_num = self.info['n_class'], data.x.shape[1] 122 | if round_num >= 2: 123 | self.hyperparameters = self.tuner.generate_parameters(round_num-1) 124 | print(self.hyperparameters) 125 | 126 | while True: 127 | try: 128 | self.init_model(n_class, feature_num) 129 | val_score = self.train_valid(data, round_num) 130 | if round_num > 1: 131 | self.tuner.receive_trial_result(round_num-1,self.hyperparameters,val_score) 132 | if val_score > self.best_score: 133 | self.best_hp = copy.deepcopy(self.hyperparameters) 134 | break 135 | except RuntimeError as e: 136 | print(self.name,e, 'OOM with Hidden Size', self.hyperparameters['hidden']) 137 | if round_num > 1: 138 | self.tuner.receive_trial_result(round_num-1,self.hyperparameters,0) 139 | return 0 140 | print("Best Hyperparameters of", self.name, self.best_hp) 141 | return val_score 142 | 143 | def train_valid(self, data, round_num): 144 | y, train_mask, valid_mask, test_mask, label_weights = data.y, data.train_mask, data.valid_mask, data.test_mask, data.label_weights 145 | patience = self.max_patience 146 | best_valid_score = 0 147 | valid_acc_meter = AverageMeter() 148 | for epoch in range(self.max_epochs): 149 | 150 | # train 151 | self.train() 152 | self.optimizer.zero_grad() 153 | preds = self.forward(data) 154 | loss = F.cross_entropy(preds[train_mask], y[train_mask], label_weights) 155 | loss.backward() 156 | self.optimizer.step() 157 | 158 | # valid 159 | self.eval() 160 | with torch.no_grad(): 161 | preds = F.softmax(self.forward(data), dim=-1) 162 | valid_preds, test_preds = preds[valid_mask], preds[test_mask] 163 | valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro') 164 | valid_acc_meter.update(valid_score) 165 | # patience 166 | if valid_acc_meter.avg > best_valid_score: 167 | best_valid_score = valid_acc_meter.avg 168 | self.current_round_best_preds = test_preds 169 | patience = self.max_patience 170 | else: 171 | patience -= 1 172 | 173 | if patience == 0: 174 | break 175 | 176 | return best_valid_score 177 | 178 | def predict(self): 179 | return self.current_round_best_preds.cpu().numpy() 180 | 181 | def __repr__(self): 182 | return self.__class__.__name__ -------------------------------------------------------------------------------- /code_submission/model_lib/tag.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.nn import Linear 4 | from torch_geometric.nn import TAGConv 5 | import copy 6 | from sklearn.metrics import f1_score 7 | from utils.tools import fix_seed, AverageMeter 8 | from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner 9 | from torch_geometric.utils import dropout_adj 10 | import random 11 | 12 | fix_seed(1234) 13 | class TAG(torch.nn.Module): 14 | 15 | def __init__(self, info): 16 | super(TAG, self).__init__() 17 | 18 | self.info = info 19 | self.hyperparameters ={ 20 | 'num_layers': self.info['num_layers'], 21 | 'lr': self.info['lr'], 22 | 'dropedge_rate': self.info['dropedge_rate'], 23 | 'dropout_rate': self.info['dropout_rate'], 24 | 'K': 3, 25 | 'hidden': self.info['init_hidden_size'], 26 | 'use_linear': self.info['use_linear'] 27 | } 28 | 29 | self.best_score = 0 30 | self.hist_score = [] 31 | self.best_preds = None 32 | self.current_round_best_preds = None 33 | self.best_valid_score = 0 34 | self.max_patience = 100 35 | self.max_epochs = 1600 36 | 37 | self.name = 'TAG' 38 | self.best_hp = None 39 | self.tuner = HyperoptTuner(algorithm_name='tpe', optimize_mode='maximize') 40 | search_space = { 41 | "dropedge_rate": { 42 | "_type": "choice", 43 | "_value": [self.info['dropedge_rate']] 44 | }, 45 | "dropout_rate": { 46 | "_type": "choice", 47 | "_value": [self.info['dropout_rate']] 48 | }, 49 | "num_layers": { 50 | "_type": "quniform", 51 | "_value": [1, 3, 1] 52 | }, 53 | "hidden": { 54 | "_type": "quniform", 55 | "_value": [4, 7, 1] 56 | }, 57 | "lr":{ 58 | "_type": "choice", 59 | "_value": [0.005] 60 | }, 61 | 'K' :{ 62 | "_type": "quniform", 63 | "_value": [1, 6, 1] 64 | }, 65 | "use_linear":{ 66 | "_type": "choice", 67 | "_value": [True, False] 68 | } 69 | } 70 | self.tuner.update_search_space(search_space) 71 | 72 | def init_model(self, n_class, feature_num): 73 | num_layers = int(self.hyperparameters['num_layers']) 74 | hidden_size = int(2 ** self.hyperparameters['hidden']) 75 | lr = self.hyperparameters['lr'] 76 | K = int(self.hyperparameters['K']) 77 | 78 | if self.hyperparameters['use_linear']: 79 | self.input_lin = Linear(feature_num, hidden_size) 80 | self.convs = torch.nn.ModuleList() 81 | for i in range(num_layers): 82 | self.convs.append(TAGConv(in_channels=hidden_size, out_channels=hidden_size, K=K)) 83 | self.output_lin = Linear(hidden_size, n_class) 84 | else: 85 | if num_layers == 1: 86 | self.conv1 = TAGConv(in_channels=feature_num, out_channels=n_class, K=K) 87 | else: 88 | self.conv1 = TAGConv(in_channels=feature_num, out_channels=hidden_size, K=K) 89 | self.convs = torch.nn.ModuleList() 90 | for i in range(num_layers - 2): 91 | self.convs.append(TAGConv(in_channels=hidden_size, out_channels=hidden_size, K=K)) 92 | self.conv2 = TAGConv(in_channels=hidden_size, out_channels=n_class, K=K) 93 | 94 | self.optimizer = torch.optim.Adam(self.parameters(), lr=lr, weight_decay=5e-4) 95 | 96 | self = self.to('cuda') 97 | 98 | torch.cuda.empty_cache() 99 | 100 | def forward(self, data): 101 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 102 | if self.hyperparameters['dropedge_rate'] is not None: 103 | edge_index, edge_weight = dropout_adj(edge_index, edge_weight, p=self.hyperparameters['dropedge_rate'],\ 104 | force_undirected=False, num_nodes=None, training=self.training) 105 | 106 | if self.hyperparameters['use_linear']: 107 | x = F.relu(self.input_lin(x)) 108 | else: 109 | x = F.relu(self.conv1(x, edge_index,edge_weight)) 110 | if self.hyperparameters['num_layers'] == 1: 111 | return x 112 | x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training) 113 | for conv in self.convs: 114 | x = F.relu(conv(x, edge_index, edge_weight=edge_weight)) 115 | x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training) 116 | if self.hyperparameters['use_linear']: 117 | x = self.output_lin(x) 118 | else: 119 | x = self.conv2(x, edge_index,edge_weight) 120 | return x 121 | 122 | def trial(self, data, round_num): 123 | n_class, feature_num = self.info['n_class'], data.x.shape[1] 124 | if round_num >= 2: 125 | self.hyperparameters = self.tuner.generate_parameters(round_num-1) 126 | print(self.hyperparameters) 127 | 128 | while True: 129 | try: 130 | self.init_model(n_class, feature_num) 131 | val_score = self.train_valid(data, round_num) 132 | if round_num > 1: 133 | self.tuner.receive_trial_result(round_num-1,self.hyperparameters,val_score) 134 | if val_score > self.best_score: 135 | self.best_hp = copy.deepcopy(self.hyperparameters) 136 | break 137 | except RuntimeError as e: 138 | print(self.name,e, 'OOM with Hidden Size', self.hyperparameters['hidden']) 139 | if round_num > 1: 140 | self.tuner.receive_trial_result(round_num-1,self.hyperparameters,0) 141 | return 0 142 | print("Best Hyperparameters of", self.name, self.best_hp) 143 | return val_score 144 | 145 | def train_valid(self, data, round_num): 146 | y, train_mask, valid_mask, test_mask, label_weights = data.y, data.train_mask, data.valid_mask, data.test_mask, data.label_weights 147 | patience = self.max_patience 148 | best_valid_score = 0 149 | valid_acc_meter = AverageMeter() 150 | for epoch in range(self.max_epochs): 151 | 152 | # train 153 | self.train() 154 | self.optimizer.zero_grad() 155 | preds = self.forward(data) 156 | loss = F.cross_entropy(preds[train_mask], y[train_mask], label_weights) 157 | loss.backward() 158 | self.optimizer.step() 159 | 160 | # valid 161 | self.eval() 162 | with torch.no_grad(): 163 | preds = F.softmax(self.forward(data), dim=-1) 164 | valid_preds, test_preds = preds[valid_mask], preds[test_mask] 165 | valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro') 166 | valid_acc_meter.update(valid_score) 167 | # patience 168 | if valid_acc_meter.avg > best_valid_score: 169 | best_valid_score = valid_acc_meter.avg 170 | self.current_round_best_preds = test_preds 171 | patience = self.max_patience 172 | else: 173 | patience -= 1 174 | 175 | if patience == 0: 176 | break 177 | 178 | return best_valid_score 179 | 180 | def predict(self): 181 | return self.current_round_best_preds.cpu().numpy() 182 | 183 | def __repr__(self): 184 | return self.__class__.__name__ -------------------------------------------------------------------------------- /code_submission/model_space.py: -------------------------------------------------------------------------------- 1 | from model_lib import * 2 | 3 | class ModelSpace: 4 | """ 5 | Model space which contains all the base models in model_lib 6 | Parameters: 7 | ---------- 8 | info: dict 9 | The eda infomation generated by AutoEDA 10 | ---------- 11 | """ 12 | def __init__(self, info): 13 | self.info = info 14 | 15 | self.model_prior = self.info['chosen_models'] 16 | 17 | self.model_lib = { 18 | 'GraphConvNet': GraphConvNet, 19 | 'GraphSAGE': GraphSAGE, 20 | 'GAT': GAT, 21 | 'GCN': GCN, 22 | 'APPNP': APPNPNet, 23 | 'ARMA': ARMA, 24 | 'GatedGraph': GatedGraphNet, 25 | 'GIN': GIN, 26 | 'SG': SG, 27 | 'TAG': TAG, 28 | 'IncepGCN': IncepGCN, 29 | 'ResGCN': ResGCN, 30 | 'JKGCN': JKGCN 31 | } 32 | 33 | def get_models(self): 34 | ret = {} 35 | for model_name in self.model_prior: 36 | ret[model_name] = self.model_lib[model_name](self.info) 37 | return ret 38 | -------------------------------------------------------------------------------- /code_submission/timer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import time 4 | import threading 5 | import signal 6 | 7 | time_budget, pid = int(sys.argv[1]), int(sys.argv[2]) 8 | 9 | def raise_timeout_exception(pid_to_kill): 10 | """ 11 | Helper function to inform the main process 12 | that time has ran out. 13 | Parameters: 14 | ---------- 15 | pid_to_kill: int 16 | the pid of main process 17 | ---------- 18 | """ 19 | os.kill(pid_to_kill, signal.SIGTSTP) 20 | 21 | # start a timer for timing. 22 | timer = threading.Timer(time_budget, raise_timeout_exception, [pid]) 23 | timer.start() 24 | -------------------------------------------------------------------------------- /code_submission/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JunweiSUN/AutoGL/e1743e4571d88889ce87c14cb3bce63a0a2a505d/code_submission/utils/__init__.py -------------------------------------------------------------------------------- /code_submission/utils/eda.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import sys 4 | import gc 5 | import torch 6 | from .tools import fix_seed 7 | from torch_geometric.utils import is_undirected 8 | fix_seed(1234) 9 | class AutoEDA(object): 10 | """ 11 | A tool box for Exploratory Data Analysis (EDA) 12 | Parameters: 13 | ---------- 14 | n_class: int 15 | number of classes 16 | ---------- 17 | """ 18 | def __init__(self, n_class): 19 | self.info = {'n_class': n_class} 20 | 21 | def get_info(self, data): 22 | self.get_feature_info(data['fea_table']) 23 | self.get_edge_info(data['edge_file']) 24 | self.set_priori_knowledges() 25 | self.get_label_weights(data, reweighting=True) 26 | return self.info 27 | 28 | def get_feature_info(self, df): 29 | """ 30 | Get information of the original node features: number of nodes, number of features, etc. 31 | Remove those features which have only one value. 32 | """ 33 | unique_counts = df.nunique() 34 | unique_counts = unique_counts[unique_counts == 1] 35 | df.drop(unique_counts.index, axis=1, inplace=True) 36 | 37 | self.info['num_nodes'] = df.shape[0] 38 | self.info['num_features'] = df.shape[1] - 1 39 | 40 | print('Number of Nodes:', self.info['num_nodes']) 41 | print('Number of Original Features:', self.info['num_features']) 42 | 43 | def get_edge_info(self, df): 44 | """ 45 | Get information of the edges: number of edges, if weighted, if directed, Max / Min weight, etc. 46 | """ 47 | self.info['num_edges'] = df.shape[0] 48 | min_weight, max_weight = df['edge_weight'].min(), df['edge_weight'].max() 49 | if min_weight != max_weight: 50 | self.info['weighted'] = True 51 | else: 52 | self.info['weighted'] = False 53 | 54 | edge_index = df[['src_idx', 'dst_idx']].to_numpy() 55 | edge_index = sorted(edge_index, key=lambda d: d[0]) 56 | edge_index = torch.tensor(edge_index, dtype=torch.long).transpose(0, 1) 57 | 58 | self.info['directed'] = not is_undirected(edge_index, num_nodes=self.info['num_nodes']) 59 | 60 | print('Number of Edges:', self.info['num_edges']) 61 | print('Is Directed Graph:', self.info['directed']) 62 | print('Is Weighted Graph:',self.info['weighted']) 63 | print('Max Weight:', max_weight, 'Min Weight:', min_weight) 64 | 65 | def set_priori_knowledges(self): 66 | """ 67 | Set some hyper parameters to their initial value according to some priori knowledges. 68 | """ 69 | if self.info['num_features'] == 0: 70 | if self.info['directed']: 71 | self.info['dropedge_rate'] = 0.5 72 | self.info['chosen_models'] = ['ResGCN', 'GraphConvNet', 'GraphSAGE'] 73 | self.info['ensemble_threshold'] = 0.01 74 | else: 75 | self.info['dropedge_rate'] = 0 76 | self.info['chosen_models'] = ['GraphConvNet','GIN','GraphSAGE'] 77 | self.info['ensemble_threshold'] = 0.01 78 | 79 | else: 80 | if self.info['directed']: 81 | self.info['dropedge_rate'] = 0.5 82 | self.info['chosen_models'] = ['GraphConvNet','GraphSAGE','ResGCN'] 83 | self.info['ensemble_threshold'] = 0.02 84 | else: 85 | if self.info['num_edges'] / self.info['num_nodes']>= 10: 86 | self.info['dropedge_rate'] = 0.5 87 | self.info['chosen_models'] = ['ARMA','GraphSAGE', 'IncepGCN'] 88 | self.info['ensemble_threshold'] = 0.02 89 | else: 90 | self.info['dropedge_rate'] = 0.5 91 | self.info['chosen_models'] = ['ARMA','IncepGCN','GraphConvNet','SG'] 92 | self.info['ensemble_threshold'] = 0.03 93 | 94 | if self.info['num_edges'] / self.info['num_nodes'] >= 200: 95 | self.info['num_layers'] = 1 96 | self.info['init_hidden_size'] = 5 97 | elif self.info['num_edges'] / self.info['num_nodes'] >= 100: 98 | self.info['num_layers'] = 2 99 | self.info['init_hidden_size'] = 5 100 | else: 101 | self.info['num_layers'] = 2 102 | self.info['init_hidden_size'] = 7 103 | 104 | if self.info['num_edges'] / self.info['num_nodes'] >= 10: 105 | self.info['use_linear'] = True 106 | self.info['dropout_rate'] = 0.2 107 | else: 108 | self.info['use_linear'] = False 109 | self.info['dropout_rate'] = 0.5 110 | 111 | self.info['lr'] = 0.005 112 | 113 | if self.info['num_features'] == 0: 114 | self.info['feature_type'] = ['svd'] # one_hot / svd / degree / node2vec / adj 115 | else: 116 | self.info['feature_type'] = ['original', 'svd'] 117 | 118 | self.info['normalize_features'] = 'None' 119 | 120 | def get_label_weights(self, data, reweighting=True): 121 | """ 122 | Compute the weights of labels as the weight when computing loss. 123 | """ 124 | if not reweighting: 125 | self.info['label_weights'] = None 126 | return 127 | 128 | groupby_data_orginal = data['train_label'].groupby('label').count() 129 | label_weights = groupby_data_orginal.iloc[:,0] 130 | 131 | if len(label_weights) < 10 or max(label_weights) < min(label_weights) * 10: 132 | self.info['label_weights'] = None 133 | return 134 | 135 | label_weights = 1 / np.sqrt(label_weights) 136 | self.info['label_weights'] = torch.tensor(label_weights.values,dtype=torch.float32) 137 | print('Label Weights:', self.info['label_weights']) 138 | 139 | 140 | 141 | 142 | 143 | -------------------------------------------------------------------------------- /code_submission/utils/tools.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import torch 4 | 5 | def fix_seed(seed): 6 | """ 7 | Fix all the random seeds. 8 | """ 9 | random.seed(seed) 10 | np.random.seed(seed) 11 | torch.manual_seed(seed) 12 | torch.cuda.manual_seed_all(seed) 13 | torch.backends.cudnn.deterministic = True 14 | torch.backends.cudnn.enabled = False 15 | 16 | class AverageMeter(object): 17 | """ 18 | Compute and store the current value and the average value in a momentum-line way. 19 | """ 20 | def __init__(self): 21 | self.val, self.avg, self.sum, self.count = 0, 0, 0, 0 22 | 23 | def update(self, val, factor=0.1, n=1): # factor like momentum 24 | self.val = val 25 | self.avg = self.val*factor + self.avg*(1-factor) 26 | self.sum += val * n 27 | self.count += n 28 | if self.count == 0: 29 | self.avg = self.val 30 | -------------------------------------------------------------------------------- /data/demo/test_label.tsv: -------------------------------------------------------------------------------- 1 | node_index label 2 | 4 3 3 | 10 0 4 | 15 3 5 | 17 3 6 | 20 5 7 | 38 4 8 | 48 3 9 | 61 0 10 | 69 6 11 | 75 4 12 | 84 4 13 | 90 2 14 | 91 4 15 | 92 6 16 | 102 1 17 | 106 6 18 | 108 6 19 | 114 5 20 | 119 5 21 | 121 1 22 | 125 5 23 | 130 5 24 | 133 1 25 | 138 1 26 | 144 6 27 | 145 6 28 | 148 4 29 | 156 2 30 | 161 0 31 | 167 3 32 | 175 6 33 | 191 5 34 | 194 4 35 | 203 4 36 | 210 4 37 | 211 0 38 | 217 4 39 | 223 2 40 | 224 4 41 | 226 3 42 | 230 3 43 | 232 3 44 | 240 3 45 | 241 5 46 | 248 4 47 | 255 4 48 | 256 3 49 | 257 0 50 | 262 6 51 | 264 5 52 | 269 3 53 | 278 3 54 | 282 5 55 | 292 0 56 | 301 2 57 | 310 3 58 | 324 0 59 | 326 2 60 | 330 4 61 | 337 2 62 | 338 3 63 | 341 2 64 | 343 3 65 | 344 5 66 | 346 2 67 | 348 4 68 | 350 1 69 | 352 3 70 | 358 2 71 | 365 0 72 | 369 4 73 | 374 3 74 | 386 3 75 | 409 1 76 | 430 4 77 | 442 4 78 | 446 0 79 | 447 6 80 | 451 5 81 | 458 5 82 | 488 4 83 | 490 5 84 | 500 3 85 | 506 6 86 | 507 3 87 | 516 5 88 | 517 5 89 | 519 1 90 | 526 2 91 | 528 5 92 | 546 3 93 | 553 3 94 | 555 3 95 | 562 2 96 | 563 1 97 | 564 2 98 | 566 3 99 | 568 3 100 | 569 2 101 | 574 3 102 | 577 4 103 | 579 3 104 | 587 5 105 | 588 4 106 | 591 0 107 | 595 3 108 | 600 6 109 | 601 3 110 | 604 6 111 | 606 0 112 | 610 5 113 | 611 6 114 | 614 0 115 | 615 2 116 | 616 2 117 | 620 3 118 | 626 2 119 | 630 3 120 | 632 2 121 | 638 4 122 | 639 3 123 | 640 3 124 | 649 3 125 | 652 4 126 | 654 4 127 | 656 1 128 | 660 1 129 | 671 5 130 | 675 4 131 | 682 6 132 | 683 1 133 | 684 2 134 | 686 2 135 | 690 3 136 | 700 3 137 | 701 5 138 | 707 0 139 | 711 0 140 | 713 3 141 | 717 0 142 | 720 5 143 | 724 6 144 | 729 3 145 | 742 2 146 | 744 2 147 | 756 3 148 | 767 0 149 | 768 3 150 | 769 0 151 | 770 3 152 | 773 1 153 | 777 0 154 | 779 6 155 | 780 4 156 | 785 3 157 | 788 3 158 | 789 4 159 | 810 4 160 | 812 3 161 | 815 0 162 | 817 2 163 | 820 2 164 | 825 2 165 | 829 1 166 | 832 3 167 | 838 0 168 | 841 3 169 | 844 5 170 | 850 6 171 | 854 4 172 | 859 2 173 | 863 0 174 | 864 1 175 | 870 3 176 | 871 2 177 | 872 4 178 | 881 2 179 | 885 5 180 | 887 1 181 | 888 0 182 | 890 5 183 | 892 3 184 | 903 5 185 | 922 4 186 | 927 3 187 | 930 3 188 | 932 3 189 | 943 4 190 | 947 4 191 | 965 0 192 | 969 2 193 | 970 2 194 | 974 3 195 | 976 2 196 | 979 3 197 | 982 3 198 | 987 3 199 | 991 6 200 | 994 3 201 | 1009 1 202 | 1011 1 203 | 1017 3 204 | 1018 4 205 | 1023 3 206 | 1036 0 207 | 1038 2 208 | 1041 2 209 | 1043 5 210 | 1045 1 211 | 1049 3 212 | 1053 3 213 | 1057 3 214 | 1062 3 215 | 1064 4 216 | 1068 4 217 | 1071 4 218 | 1074 6 219 | 1077 5 220 | 1078 0 221 | 1081 4 222 | 1084 3 223 | 1087 4 224 | 1091 3 225 | 1095 3 226 | 1105 5 227 | 1108 3 228 | 1113 5 229 | 1114 2 230 | 1118 0 231 | 1129 4 232 | 1130 3 233 | 1132 3 234 | 1137 0 235 | 1148 2 236 | 1151 3 237 | 1152 0 238 | 1153 4 239 | 1156 0 240 | 1157 4 241 | 1166 3 242 | 1189 2 243 | 1190 5 244 | 1193 1 245 | 1202 4 246 | 1207 4 247 | 1210 2 248 | 1214 2 249 | 1216 2 250 | 1217 3 251 | 1220 0 252 | 1222 5 253 | 1225 3 254 | 1226 5 255 | 1236 6 256 | 1239 3 257 | 1241 3 258 | 1247 3 259 | 1249 2 260 | 1254 2 261 | 1260 4 262 | 1264 4 263 | 1266 4 264 | 1267 5 265 | 1282 2 266 | 1283 1 267 | 1285 0 268 | 1287 2 269 | 1290 3 270 | 1293 0 271 | 1298 3 272 | 1305 2 273 | 1321 3 274 | 1331 3 275 | 1342 4 276 | 1348 3 277 | 1351 6 278 | 1360 5 279 | 1365 3 280 | 1367 1 281 | 1383 2 282 | 1384 2 283 | 1400 3 284 | 1401 4 285 | 1402 6 286 | 1403 3 287 | 1405 3 288 | 1406 4 289 | 1410 0 290 | 1413 3 291 | 1417 4 292 | 1420 6 293 | 1423 3 294 | 1424 3 295 | 1425 4 296 | 1426 2 297 | 1428 4 298 | 1434 3 299 | 1437 4 300 | 1438 3 301 | 1440 3 302 | 1442 3 303 | 1450 3 304 | 1453 3 305 | 1455 5 306 | 1456 3 307 | 1458 3 308 | 1465 3 309 | 1470 5 310 | 1478 5 311 | 1489 2 312 | 1500 5 313 | 1506 2 314 | 1513 2 315 | 1520 2 316 | 1523 0 317 | 1524 0 318 | 1527 0 319 | 1528 2 320 | 1536 0 321 | 1551 1 322 | 1554 3 323 | 1565 2 324 | 1571 1 325 | 1581 3 326 | 1584 1 327 | 1588 0 328 | 1594 5 329 | 1595 3 330 | 1596 2 331 | 1597 3 332 | 1598 3 333 | 1607 3 334 | 1608 3 335 | 1614 3 336 | 1616 6 337 | 1622 3 338 | 1626 4 339 | 1636 3 340 | 1648 2 341 | 1655 0 342 | 1658 0 343 | 1660 6 344 | 1663 3 345 | 1664 3 346 | 1666 3 347 | 1669 0 348 | 1677 5 349 | 1678 3 350 | 1680 0 351 | 1683 6 352 | 1684 3 353 | 1686 4 354 | 1703 4 355 | 1709 2 356 | 1710 2 357 | 1711 2 358 | 1721 2 359 | 1726 2 360 | 1729 2 361 | 1737 2 362 | 1747 2 363 | 1748 2 364 | 1749 2 365 | 1752 2 366 | 1753 2 367 | 1761 2 368 | 1764 5 369 | 1766 2 370 | 1776 1 371 | 1779 1 372 | 1787 1 373 | 1789 1 374 | 1790 1 375 | 1791 1 376 | 1793 4 377 | 1799 3 378 | 1810 3 379 | 1812 3 380 | 1816 3 381 | 1817 3 382 | 1818 3 383 | 1827 5 384 | 1828 5 385 | 1833 2 386 | 1835 6 387 | 1838 0 388 | 1842 5 389 | 1844 3 390 | 1848 0 391 | 1854 1 392 | 1855 3 393 | 1856 1 394 | 1859 3 395 | 1879 5 396 | 1885 5 397 | 1886 2 398 | 1888 2 399 | 1890 4 400 | 1896 5 401 | 1901 5 402 | 1905 0 403 | 1906 4 404 | 1907 4 405 | 1912 0 406 | 1918 6 407 | 1922 0 408 | 1924 3 409 | 1928 3 410 | 1932 3 411 | 1937 3 412 | 1943 3 413 | 1958 5 414 | 1964 4 415 | 1971 4 416 | 1975 6 417 | 1976 6 418 | 1980 5 419 | 1981 0 420 | 1991 1 421 | 1997 3 422 | 2003 3 423 | 2012 3 424 | 2017 5 425 | 2019 0 426 | 2024 5 427 | 2028 3 428 | 2030 6 429 | 2036 3 430 | 2037 3 431 | 2047 0 432 | 2051 0 433 | 2053 0 434 | 2060 5 435 | 2063 3 436 | 2068 0 437 | 2081 1 438 | 2087 1 439 | 2095 1 440 | 2097 0 441 | 2102 5 442 | 2103 5 443 | 2111 5 444 | 2118 3 445 | 2123 4 446 | 2131 0 447 | 2142 3 448 | 2144 1 449 | 2146 3 450 | 2152 3 451 | 2153 4 452 | 2163 3 453 | 2170 1 454 | 2171 0 455 | 2181 0 456 | 2193 0 457 | 2195 0 458 | 2202 0 459 | 2217 6 460 | 2221 2 461 | 2224 4 462 | 2228 6 463 | 2232 0 464 | 2259 4 465 | 2263 4 466 | 2264 4 467 | 2266 0 468 | 2272 5 469 | 2273 5 470 | 2276 3 471 | 2283 3 472 | 2287 5 473 | 2289 4 474 | 2292 4 475 | 2295 0 476 | 2300 1 477 | 2306 4 478 | 2319 1 479 | 2329 1 480 | 2333 0 481 | 2335 4 482 | 2347 0 483 | 2356 0 484 | 2368 3 485 | 2371 3 486 | 2376 4 487 | 2391 2 488 | 2394 6 489 | 2402 1 490 | 2404 0 491 | 2410 3 492 | 2425 3 493 | 2435 4 494 | 2437 3 495 | 2439 2 496 | 2445 2 497 | 2447 2 498 | 2463 4 499 | 2466 2 500 | 2470 0 501 | 2473 4 502 | 2479 6 503 | 2482 3 504 | 2489 3 505 | 2490 4 506 | 2492 2 507 | 2494 3 508 | 2495 3 509 | 2496 4 510 | 2500 3 511 | 2502 0 512 | 2507 2 513 | 2511 0 514 | 2522 2 515 | 2523 2 516 | 2530 2 517 | 2532 1 518 | 2538 4 519 | 2542 3 520 | 2548 4 521 | 2564 4 522 | 2574 3 523 | 2582 3 524 | 2586 5 525 | 2588 3 526 | 2598 1 527 | 2606 3 528 | 2623 4 529 | 2624 3 530 | 2629 3 531 | 2632 3 532 | 2638 6 533 | 2640 3 534 | 2642 2 535 | 2643 2 536 | 2653 0 537 | 2670 4 538 | 2673 4 539 | 2676 3 540 | 2679 4 541 | 2686 3 542 | 2693 3 543 | 2694 4 544 | 2696 3 545 | 2697 3 546 | 2703 3 547 | -------------------------------------------------------------------------------- /data/demo/train.data/test_node_id.txt: -------------------------------------------------------------------------------- 1 | 4 2 | 10 3 | 15 4 | 17 5 | 20 6 | 38 7 | 48 8 | 61 9 | 69 10 | 75 11 | 84 12 | 90 13 | 91 14 | 92 15 | 102 16 | 106 17 | 108 18 | 114 19 | 119 20 | 121 21 | 125 22 | 130 23 | 133 24 | 138 25 | 144 26 | 145 27 | 148 28 | 156 29 | 161 30 | 167 31 | 175 32 | 191 33 | 194 34 | 203 35 | 210 36 | 211 37 | 217 38 | 223 39 | 224 40 | 226 41 | 230 42 | 232 43 | 240 44 | 241 45 | 248 46 | 255 47 | 256 48 | 257 49 | 262 50 | 264 51 | 269 52 | 278 53 | 282 54 | 292 55 | 301 56 | 310 57 | 324 58 | 326 59 | 330 60 | 337 61 | 338 62 | 341 63 | 343 64 | 344 65 | 346 66 | 348 67 | 350 68 | 352 69 | 358 70 | 365 71 | 369 72 | 374 73 | 386 74 | 409 75 | 430 76 | 442 77 | 446 78 | 447 79 | 451 80 | 458 81 | 488 82 | 490 83 | 500 84 | 506 85 | 507 86 | 516 87 | 517 88 | 519 89 | 526 90 | 528 91 | 546 92 | 553 93 | 555 94 | 562 95 | 563 96 | 564 97 | 566 98 | 568 99 | 569 100 | 574 101 | 577 102 | 579 103 | 587 104 | 588 105 | 591 106 | 595 107 | 600 108 | 601 109 | 604 110 | 606 111 | 610 112 | 611 113 | 614 114 | 615 115 | 616 116 | 620 117 | 626 118 | 630 119 | 632 120 | 638 121 | 639 122 | 640 123 | 649 124 | 652 125 | 654 126 | 656 127 | 660 128 | 671 129 | 675 130 | 682 131 | 683 132 | 684 133 | 686 134 | 690 135 | 700 136 | 701 137 | 707 138 | 711 139 | 713 140 | 717 141 | 720 142 | 724 143 | 729 144 | 742 145 | 744 146 | 756 147 | 767 148 | 768 149 | 769 150 | 770 151 | 773 152 | 777 153 | 779 154 | 780 155 | 785 156 | 788 157 | 789 158 | 810 159 | 812 160 | 815 161 | 817 162 | 820 163 | 825 164 | 829 165 | 832 166 | 838 167 | 841 168 | 844 169 | 850 170 | 854 171 | 859 172 | 863 173 | 864 174 | 870 175 | 871 176 | 872 177 | 881 178 | 885 179 | 887 180 | 888 181 | 890 182 | 892 183 | 903 184 | 922 185 | 927 186 | 930 187 | 932 188 | 943 189 | 947 190 | 965 191 | 969 192 | 970 193 | 974 194 | 976 195 | 979 196 | 982 197 | 987 198 | 991 199 | 994 200 | 1009 201 | 1011 202 | 1017 203 | 1018 204 | 1023 205 | 1036 206 | 1038 207 | 1041 208 | 1043 209 | 1045 210 | 1049 211 | 1053 212 | 1057 213 | 1062 214 | 1064 215 | 1068 216 | 1071 217 | 1074 218 | 1077 219 | 1078 220 | 1081 221 | 1084 222 | 1087 223 | 1091 224 | 1095 225 | 1105 226 | 1108 227 | 1113 228 | 1114 229 | 1118 230 | 1129 231 | 1130 232 | 1132 233 | 1137 234 | 1148 235 | 1151 236 | 1152 237 | 1153 238 | 1156 239 | 1157 240 | 1166 241 | 1189 242 | 1190 243 | 1193 244 | 1202 245 | 1207 246 | 1210 247 | 1214 248 | 1216 249 | 1217 250 | 1220 251 | 1222 252 | 1225 253 | 1226 254 | 1236 255 | 1239 256 | 1241 257 | 1247 258 | 1249 259 | 1254 260 | 1260 261 | 1264 262 | 1266 263 | 1267 264 | 1282 265 | 1283 266 | 1285 267 | 1287 268 | 1290 269 | 1293 270 | 1298 271 | 1305 272 | 1321 273 | 1331 274 | 1342 275 | 1348 276 | 1351 277 | 1360 278 | 1365 279 | 1367 280 | 1383 281 | 1384 282 | 1400 283 | 1401 284 | 1402 285 | 1403 286 | 1405 287 | 1406 288 | 1410 289 | 1413 290 | 1417 291 | 1420 292 | 1423 293 | 1424 294 | 1425 295 | 1426 296 | 1428 297 | 1434 298 | 1437 299 | 1438 300 | 1440 301 | 1442 302 | 1450 303 | 1453 304 | 1455 305 | 1456 306 | 1458 307 | 1465 308 | 1470 309 | 1478 310 | 1489 311 | 1500 312 | 1506 313 | 1513 314 | 1520 315 | 1523 316 | 1524 317 | 1527 318 | 1528 319 | 1536 320 | 1551 321 | 1554 322 | 1565 323 | 1571 324 | 1581 325 | 1584 326 | 1588 327 | 1594 328 | 1595 329 | 1596 330 | 1597 331 | 1598 332 | 1607 333 | 1608 334 | 1614 335 | 1616 336 | 1622 337 | 1626 338 | 1636 339 | 1648 340 | 1655 341 | 1658 342 | 1660 343 | 1663 344 | 1664 345 | 1666 346 | 1669 347 | 1677 348 | 1678 349 | 1680 350 | 1683 351 | 1684 352 | 1686 353 | 1703 354 | 1709 355 | 1710 356 | 1711 357 | 1721 358 | 1726 359 | 1729 360 | 1737 361 | 1747 362 | 1748 363 | 1749 364 | 1752 365 | 1753 366 | 1761 367 | 1764 368 | 1766 369 | 1776 370 | 1779 371 | 1787 372 | 1789 373 | 1790 374 | 1791 375 | 1793 376 | 1799 377 | 1810 378 | 1812 379 | 1816 380 | 1817 381 | 1818 382 | 1827 383 | 1828 384 | 1833 385 | 1835 386 | 1838 387 | 1842 388 | 1844 389 | 1848 390 | 1854 391 | 1855 392 | 1856 393 | 1859 394 | 1879 395 | 1885 396 | 1886 397 | 1888 398 | 1890 399 | 1896 400 | 1901 401 | 1905 402 | 1906 403 | 1907 404 | 1912 405 | 1918 406 | 1922 407 | 1924 408 | 1928 409 | 1932 410 | 1937 411 | 1943 412 | 1958 413 | 1964 414 | 1971 415 | 1975 416 | 1976 417 | 1980 418 | 1981 419 | 1991 420 | 1997 421 | 2003 422 | 2012 423 | 2017 424 | 2019 425 | 2024 426 | 2028 427 | 2030 428 | 2036 429 | 2037 430 | 2047 431 | 2051 432 | 2053 433 | 2060 434 | 2063 435 | 2068 436 | 2081 437 | 2087 438 | 2095 439 | 2097 440 | 2102 441 | 2103 442 | 2111 443 | 2118 444 | 2123 445 | 2131 446 | 2142 447 | 2144 448 | 2146 449 | 2152 450 | 2153 451 | 2163 452 | 2170 453 | 2171 454 | 2181 455 | 2193 456 | 2195 457 | 2202 458 | 2217 459 | 2221 460 | 2224 461 | 2228 462 | 2232 463 | 2259 464 | 2263 465 | 2264 466 | 2266 467 | 2272 468 | 2273 469 | 2276 470 | 2283 471 | 2287 472 | 2289 473 | 2292 474 | 2295 475 | 2300 476 | 2306 477 | 2319 478 | 2329 479 | 2333 480 | 2335 481 | 2347 482 | 2356 483 | 2368 484 | 2371 485 | 2376 486 | 2391 487 | 2394 488 | 2402 489 | 2404 490 | 2410 491 | 2425 492 | 2435 493 | 2437 494 | 2439 495 | 2445 496 | 2447 497 | 2463 498 | 2466 499 | 2470 500 | 2473 501 | 2479 502 | 2482 503 | 2489 504 | 2490 505 | 2492 506 | 2494 507 | 2495 508 | 2496 509 | 2500 510 | 2502 511 | 2507 512 | 2511 513 | 2522 514 | 2523 515 | 2530 516 | 2532 517 | 2538 518 | 2542 519 | 2548 520 | 2564 521 | 2574 522 | 2582 523 | 2586 524 | 2588 525 | 2598 526 | 2606 527 | 2623 528 | 2624 529 | 2629 530 | 2632 531 | 2638 532 | 2640 533 | 2642 534 | 2643 535 | 2653 536 | 2670 537 | 2673 538 | 2676 539 | 2679 540 | 2686 541 | 2693 542 | 2694 543 | 2696 544 | 2697 545 | 2703 546 | -------------------------------------------------------------------------------- /data/demo/train.data/train_node_id.txt: -------------------------------------------------------------------------------- 1 | 2 2 | 5 3 | 9 4 | 14 5 | 22 6 | 25 7 | 30 8 | 33 9 | 40 10 | 46 11 | 49 12 | 51 13 | 52 14 | 53 15 | 63 16 | 70 17 | 73 18 | 80 19 | 83 20 | 85 21 | 86 22 | 96 23 | 98 24 | 99 25 | 100 26 | 110 27 | 120 28 | 122 29 | 128 30 | 134 31 | 136 32 | 142 33 | 147 34 | 150 35 | 158 36 | 160 37 | 166 38 | 168 39 | 171 40 | 174 41 | 177 42 | 181 43 | 182 44 | 189 45 | 192 46 | 195 47 | 196 48 | 197 49 | 206 50 | 207 51 | 222 52 | 228 53 | 236 54 | 237 55 | 238 56 | 242 57 | 244 58 | 245 59 | 260 60 | 263 61 | 265 62 | 266 63 | 277 64 | 280 65 | 284 66 | 289 67 | 290 68 | 294 69 | 297 70 | 308 71 | 309 72 | 313 73 | 318 74 | 319 75 | 322 76 | 328 77 | 333 78 | 345 79 | 354 80 | 355 81 | 356 82 | 359 83 | 370 84 | 371 85 | 384 86 | 387 87 | 391 88 | 395 89 | 398 90 | 403 91 | 404 92 | 405 93 | 408 94 | 411 95 | 412 96 | 416 97 | 418 98 | 419 99 | 421 100 | 422 101 | 423 102 | 424 103 | 433 104 | 437 105 | 439 106 | 441 107 | 448 108 | 449 109 | 453 110 | 456 111 | 457 112 | 459 113 | 460 114 | 468 115 | 471 116 | 478 117 | 481 118 | 482 119 | 483 120 | 484 121 | 505 122 | 508 123 | 510 124 | 513 125 | 514 126 | 520 127 | 527 128 | 531 129 | 534 130 | 537 131 | 540 132 | 544 133 | 548 134 | 557 135 | 559 136 | 560 137 | 580 138 | 582 139 | 593 140 | 596 141 | 597 142 | 599 143 | 613 144 | 633 145 | 635 146 | 647 147 | 648 148 | 664 149 | 665 150 | 667 151 | 674 152 | 678 153 | 680 154 | 687 155 | 689 156 | 691 157 | 692 158 | 693 159 | 695 160 | 699 161 | 703 162 | 722 163 | 726 164 | 730 165 | 731 166 | 737 167 | 749 168 | 750 169 | 751 170 | 752 171 | 755 172 | 759 173 | 786 174 | 790 175 | 795 176 | 799 177 | 802 178 | 803 179 | 805 180 | 818 181 | 819 182 | 827 183 | 830 184 | 836 185 | 842 186 | 846 187 | 851 188 | 853 189 | 857 190 | 874 191 | 876 192 | 880 193 | 883 194 | 886 195 | 899 196 | 901 197 | 910 198 | 917 199 | 937 200 | 942 201 | 945 202 | 949 203 | 951 204 | 953 205 | 955 206 | 961 207 | 964 208 | 975 209 | 977 210 | 980 211 | 981 212 | 984 213 | 986 214 | 992 215 | 993 216 | 996 217 | 1002 218 | 1003 219 | 1008 220 | 1012 221 | 1013 222 | 1015 223 | 1016 224 | 1019 225 | 1027 226 | 1028 227 | 1029 228 | 1040 229 | 1042 230 | 1048 231 | 1066 232 | 1067 233 | 1103 234 | 1111 235 | 1115 236 | 1121 237 | 1125 238 | 1128 239 | 1133 240 | 1139 241 | 1164 242 | 1168 243 | 1170 244 | 1172 245 | 1173 246 | 1178 247 | 1188 248 | 1199 249 | 1200 250 | 1201 251 | 1205 252 | 1208 253 | 1212 254 | 1219 255 | 1221 256 | 1223 257 | 1232 258 | 1233 259 | 1234 260 | 1235 261 | 1238 262 | 1240 263 | 1242 264 | 1243 265 | 1245 266 | 1253 267 | 1257 268 | 1271 269 | 1272 270 | 1274 271 | 1279 272 | 1280 273 | 1281 274 | 1286 275 | 1288 276 | 1291 277 | 1301 278 | 1308 279 | 1316 280 | 1318 281 | 1327 282 | 1333 283 | 1336 284 | 1337 285 | 1350 286 | 1354 287 | 1371 288 | 1373 289 | 1374 290 | 1380 291 | 1381 292 | 1387 293 | 1388 294 | 1393 295 | 1407 296 | 1409 297 | 1414 298 | 1418 299 | 1419 300 | 1421 301 | 1422 302 | 1429 303 | 1431 304 | 1443 305 | 1444 306 | 1448 307 | 1451 308 | 1457 309 | 1469 310 | 1475 311 | 1479 312 | 1490 313 | 1495 314 | 1497 315 | 1498 316 | 1499 317 | 1514 318 | 1517 319 | 1530 320 | 1532 321 | 1534 322 | 1537 323 | 1542 324 | 1546 325 | 1549 326 | 1577 327 | 1578 328 | 1579 329 | 1585 330 | 1591 331 | 1605 332 | 1609 333 | 1612 334 | 1613 335 | 1619 336 | 1625 337 | 1628 338 | 1630 339 | 1631 340 | 1633 341 | 1639 342 | 1642 343 | 1644 344 | 1647 345 | 1661 346 | 1674 347 | 1676 348 | 1690 349 | 1693 350 | 1697 351 | 1700 352 | 1717 353 | 1728 354 | 1731 355 | 1741 356 | 1750 357 | 1754 358 | 1757 359 | 1762 360 | 1767 361 | 1771 362 | 1772 363 | 1774 364 | 1777 365 | 1788 366 | 1795 367 | 1796 368 | 1802 369 | 1804 370 | 1806 371 | 1813 372 | 1826 373 | 1829 374 | 1832 375 | 1836 376 | 1847 377 | 1857 378 | 1860 379 | 1865 380 | 1866 381 | 1872 382 | 1877 383 | 1884 384 | 1891 385 | 1894 386 | 1897 387 | 1899 388 | 1902 389 | 1910 390 | 1911 391 | 1913 392 | 1914 393 | 1931 394 | 1934 395 | 1936 396 | 1940 397 | 1945 398 | 1957 399 | 1960 400 | 1962 401 | 1972 402 | 1985 403 | 1993 404 | 1994 405 | 1995 406 | 1996 407 | 2002 408 | 2007 409 | 2008 410 | 2020 411 | 2022 412 | 2023 413 | 2027 414 | 2039 415 | 2041 416 | 2043 417 | 2046 418 | 2048 419 | 2065 420 | 2073 421 | 2076 422 | 2084 423 | 2088 424 | 2091 425 | 2107 426 | 2128 427 | 2143 428 | 2147 429 | 2148 430 | 2150 431 | 2157 432 | 2161 433 | 2165 434 | 2169 435 | 2172 436 | 2176 437 | 2177 438 | 2180 439 | 2184 440 | 2187 441 | 2192 442 | 2197 443 | 2201 444 | 2211 445 | 2214 446 | 2218 447 | 2222 448 | 2227 449 | 2230 450 | 2233 451 | 2238 452 | 2241 453 | 2245 454 | 2246 455 | 2250 456 | 2252 457 | 2254 458 | 2257 459 | 2258 460 | 2260 461 | 2262 462 | 2265 463 | 2268 464 | 2269 465 | 2277 466 | 2279 467 | 2282 468 | 2284 469 | 2286 470 | 2288 471 | 2308 472 | 2316 473 | 2323 474 | 2328 475 | 2336 476 | 2346 477 | 2348 478 | 2352 479 | 2354 480 | 2355 481 | 2358 482 | 2360 483 | 2361 484 | 2363 485 | 2364 486 | 2367 487 | 2380 488 | 2383 489 | 2388 490 | 2389 491 | 2397 492 | 2406 493 | 2412 494 | 2416 495 | 2433 496 | 2448 497 | 2450 498 | 2451 499 | 2452 500 | 2453 501 | 2454 502 | 2462 503 | 2475 504 | 2487 505 | 2499 506 | 2501 507 | 2504 508 | 2508 509 | 2513 510 | 2515 511 | 2529 512 | 2531 513 | 2540 514 | 2544 515 | 2546 516 | 2555 517 | 2567 518 | 2568 519 | 2597 520 | 2601 521 | 2607 522 | 2613 523 | 2614 524 | 2615 525 | 2620 526 | 2626 527 | 2631 528 | 2634 529 | 2637 530 | 2639 531 | 2641 532 | 2645 533 | 2646 534 | 2649 535 | 2650 536 | 2655 537 | 2663 538 | 2666 539 | 2672 540 | 2678 541 | 2690 542 | 2699 543 | 2702 544 | 0 545 | 1 546 | 3 547 | 8 548 | 12 549 | 16 550 | 18 551 | 19 552 | 26 553 | 27 554 | 32 555 | 36 556 | 39 557 | 41 558 | 42 559 | 43 560 | 44 561 | 47 562 | 50 563 | 54 564 | 56 565 | 58 566 | 65 567 | 67 568 | 76 569 | 77 570 | 79 571 | 89 572 | 105 573 | 111 574 | 112 575 | 118 576 | 124 577 | 126 578 | 127 579 | 131 580 | 137 581 | 143 582 | 149 583 | 153 584 | 159 585 | 165 586 | 169 587 | 172 588 | 173 589 | 179 590 | 193 591 | 204 592 | 209 593 | 219 594 | 220 595 | 225 596 | 229 597 | 231 598 | 258 599 | 268 600 | 271 601 | 272 602 | 274 603 | 286 604 | 288 605 | 291 606 | 299 607 | 312 608 | 314 609 | 321 610 | 323 611 | 334 612 | 349 613 | 357 614 | 363 615 | 364 616 | 366 617 | 367 618 | 368 619 | 376 620 | 377 621 | 378 622 | 380 623 | 381 624 | 390 625 | 392 626 | 393 627 | 394 628 | 397 629 | 401 630 | 406 631 | 413 632 | 431 633 | 435 634 | 440 635 | 445 636 | 464 637 | 466 638 | 467 639 | 469 640 | 474 641 | 479 642 | 487 643 | 491 644 | 496 645 | 498 646 | 501 647 | 504 648 | 511 649 | 521 650 | 524 651 | 532 652 | 533 653 | 538 654 | 542 655 | 543 656 | 545 657 | 552 658 | 556 659 | 583 660 | 585 661 | 605 662 | 623 663 | 629 664 | 650 665 | 653 666 | 659 667 | 661 668 | 663 669 | 668 670 | 672 671 | 673 672 | 679 673 | 705 674 | 709 675 | 710 676 | 712 677 | 716 678 | 718 679 | 725 680 | 727 681 | 738 682 | 745 683 | 747 684 | 764 685 | 766 686 | 772 687 | 774 688 | 776 689 | 783 690 | 784 691 | 791 692 | 792 693 | 794 694 | 797 695 | 801 696 | 804 697 | 809 698 | 813 699 | 814 700 | 822 701 | 824 702 | 828 703 | 840 704 | 843 705 | 845 706 | 849 707 | 869 708 | 873 709 | 893 710 | 895 711 | 900 712 | 904 713 | 909 714 | 913 715 | 916 716 | 919 717 | 924 718 | 928 719 | 934 720 | 939 721 | 941 722 | 956 723 | 962 724 | 967 725 | 971 726 | 973 727 | 988 728 | 989 729 | 995 730 | 1014 731 | 1024 732 | 1025 733 | 1030 734 | 1032 735 | 1047 736 | 1050 737 | 1051 738 | 1052 739 | 1073 740 | 1083 741 | 1085 742 | 1094 743 | 1096 744 | 1099 745 | 1100 746 | 1101 747 | 1104 748 | 1106 749 | 1109 750 | 1110 751 | 1119 752 | 1120 753 | 1122 754 | 1124 755 | 1127 756 | 1134 757 | 1138 758 | 1143 759 | 1144 760 | 1147 761 | 1149 762 | 1155 763 | 1158 764 | 1159 765 | 1161 766 | 1162 767 | 1169 768 | 1175 769 | 1176 770 | 1177 771 | 1180 772 | 1181 773 | 1182 774 | 1184 775 | 1196 776 | 1206 777 | 1211 778 | 1215 779 | 1218 780 | 1227 781 | 1237 782 | 1250 783 | 1251 784 | 1255 785 | 1256 786 | 1263 787 | 1270 788 | 1275 789 | 1276 790 | 1277 791 | 1289 792 | 1292 793 | 1294 794 | 1295 795 | 1299 796 | 1306 797 | 1317 798 | 1323 799 | 1325 800 | 1329 801 | 1330 802 | 1332 803 | 1334 804 | 1338 805 | 1346 806 | 1352 807 | 1357 808 | 1358 809 | 1363 810 | 1364 811 | 1366 812 | 1368 813 | 1370 814 | 1372 815 | 1382 816 | 1385 817 | 1386 818 | 1392 819 | 1394 820 | 1398 821 | 1399 822 | 1404 823 | 1412 824 | 1427 825 | 1430 826 | 1439 827 | 1441 828 | 1445 829 | 1449 830 | 1454 831 | 1459 832 | 1467 833 | 1471 834 | 1473 835 | 1474 836 | 1484 837 | 1485 838 | 1492 839 | 1493 840 | 1496 841 | 1503 842 | 1504 843 | 1507 844 | 1512 845 | 1516 846 | 1529 847 | 1535 848 | 1539 849 | 1541 850 | 1547 851 | 1550 852 | 1559 853 | 1561 854 | 1570 855 | 1576 856 | 1580 857 | 1582 858 | 1589 859 | 1604 860 | 1606 861 | 1610 862 | 1611 863 | 1615 864 | 1618 865 | 1621 866 | 1623 867 | 1632 868 | 1637 869 | 1643 870 | 1650 871 | 1651 872 | 1654 873 | 1659 874 | 1667 875 | 1672 876 | 1675 877 | 1681 878 | 1682 879 | 1685 880 | 1687 881 | 1692 882 | 1699 883 | 1701 884 | 1705 885 | 1707 886 | 1708 887 | 1713 888 | 1716 889 | 1722 890 | 1724 891 | 1730 892 | 1735 893 | 1736 894 | 1738 895 | 1739 896 | 1740 897 | 1742 898 | 1745 899 | 1746 900 | 1755 901 | 1756 902 | 1759 903 | 1760 904 | 1763 905 | 1765 906 | 1769 907 | 1780 908 | 1797 909 | 1800 910 | 1801 911 | 1805 912 | 1814 913 | 1821 914 | 1823 915 | 1825 916 | 1830 917 | 1834 918 | 1841 919 | 1852 920 | 1853 921 | 1858 922 | 1862 923 | 1863 924 | 1868 925 | 1870 926 | 1871 927 | 1878 928 | 1881 929 | 1908 930 | 1909 931 | 1916 932 | 1917 933 | 1920 934 | 1921 935 | 1925 936 | 1926 937 | 1929 938 | 1947 939 | 1948 940 | 1949 941 | 1950 942 | 1953 943 | 1963 944 | 1967 945 | 1969 946 | 1970 947 | 1982 948 | 1983 949 | 1988 950 | 1998 951 | 2013 952 | 2032 953 | 2042 954 | 2045 955 | 2049 956 | 2064 957 | 2066 958 | 2067 959 | 2069 960 | 2072 961 | 2079 962 | 2083 963 | 2085 964 | 2086 965 | 2098 966 | 2104 967 | 2108 968 | 2109 969 | 2110 970 | 2114 971 | 2120 972 | 2130 973 | 2133 974 | 2151 975 | 2155 976 | 2159 977 | 2160 978 | 2168 979 | 2174 980 | 2175 981 | 2178 982 | 2186 983 | 2196 984 | 2204 985 | 2210 986 | 2216 987 | 2219 988 | 2226 989 | 2235 990 | 2237 991 | 2240 992 | 2244 993 | 2247 994 | 2251 995 | 2270 996 | 2271 997 | 2275 998 | 2280 999 | 2290 1000 | 2293 1001 | 2305 1002 | 2309 1003 | 2312 1004 | 2317 1005 | 2320 1006 | 2324 1007 | 2340 1008 | 2342 1009 | 2357 1010 | 2377 1011 | 2384 1012 | 2390 1013 | 2395 1014 | 2400 1015 | 2401 1016 | 2403 1017 | 2405 1018 | 2407 1019 | 2409 1020 | 2413 1021 | 2418 1022 | 2419 1023 | 2420 1024 | 2428 1025 | 2434 1026 | 2438 1027 | 2444 1028 | 2446 1029 | 2457 1030 | 2471 1031 | 2476 1032 | 2483 1033 | 2485 1034 | 2491 1035 | 2512 1036 | 2521 1037 | 2524 1038 | 2527 1039 | 2535 1040 | 2541 1041 | 2549 1042 | 2550 1043 | 2551 1044 | 2558 1045 | 2561 1046 | 2565 1047 | 2571 1048 | 2575 1049 | 2576 1050 | 2579 1051 | 2580 1052 | 2583 1053 | 2584 1054 | 2590 1055 | 2594 1056 | 2599 1057 | 2604 1058 | 2608 1059 | 2611 1060 | 2612 1061 | 2617 1062 | 2621 1063 | 2622 1064 | 2636 1065 | 2648 1066 | 2654 1067 | 2656 1068 | 2658 1069 | 2661 1070 | 2664 1071 | 2665 1072 | 2667 1073 | 2668 1074 | 2669 1075 | 2671 1076 | 2675 1077 | 2677 1078 | 2680 1079 | 2687 1080 | 2688 1081 | 2691 1082 | 2695 1083 | 2700 1084 | 2704 1085 | 2706 1086 | 7 1087 | 21 1088 | 24 1089 | 28 1090 | 34 1091 | 35 1092 | 37 1093 | 59 1094 | 60 1095 | 62 1096 | 71 1097 | 78 1098 | 81 1099 | 93 1100 | 101 1101 | 103 1102 | 109 1103 | 113 1104 | 117 1105 | 129 1106 | 135 1107 | 140 1108 | 146 1109 | 152 1110 | 155 1111 | 162 1112 | 170 1113 | 180 1114 | 183 1115 | 184 1116 | 190 1117 | 198 1118 | 200 1119 | 205 1120 | 213 1121 | 214 1122 | 215 1123 | 216 1124 | 218 1125 | 221 1126 | 227 1127 | 235 1128 | 246 1129 | 249 1130 | 251 1131 | 252 1132 | 259 1133 | 267 1134 | 270 1135 | 273 1136 | 279 1137 | 281 1138 | 283 1139 | 287 1140 | 296 1141 | 302 1142 | 303 1143 | 305 1144 | 311 1145 | 315 1146 | 316 1147 | 331 1148 | 332 1149 | 335 1150 | 336 1151 | 339 1152 | 340 1153 | 353 1154 | 361 1155 | 382 1156 | 388 1157 | 402 1158 | 415 1159 | 425 1160 | 426 1161 | 436 1162 | 438 1163 | 444 1164 | 450 1165 | 455 1166 | 461 1167 | 470 1168 | 473 1169 | 475 1170 | 477 1171 | 480 1172 | 485 1173 | 486 1174 | 492 1175 | 493 1176 | 494 1177 | 497 1178 | 512 1179 | 522 1180 | 523 1181 | 525 1182 | 529 1183 | 547 1184 | 549 1185 | 551 1186 | 558 1187 | 565 1188 | 570 1189 | 572 1190 | 573 1191 | 575 1192 | 576 1193 | 586 1194 | 594 1195 | 618 1196 | 619 1197 | 624 1198 | 627 1199 | 634 1200 | 636 1201 | 637 1202 | 651 1203 | 655 1204 | 657 1205 | 658 1206 | 662 1207 | 666 1208 | 676 1209 | 677 1210 | 688 1211 | 698 1212 | 702 1213 | 704 1214 | 714 1215 | 719 1216 | 728 1217 | 741 1218 | 743 1219 | 746 1220 | 748 1221 | 753 1222 | 757 1223 | 760 1224 | 762 1225 | 771 1226 | 775 1227 | 782 1228 | 787 1229 | 806 1230 | 807 1231 | 816 1232 | 821 1233 | 823 1234 | 831 1235 | 833 1236 | 835 1237 | 839 1238 | 847 1239 | 848 1240 | 852 1241 | 860 1242 | 861 1243 | 868 1244 | 877 1245 | 879 1246 | 889 1247 | 894 1248 | 896 1249 | 897 1250 | 898 1251 | 905 1252 | 906 1253 | 908 1254 | 915 1255 | 918 1256 | 920 1257 | 921 1258 | 925 1259 | 929 1260 | 931 1261 | 935 1262 | 936 1263 | 944 1264 | 946 1265 | 950 1266 | 952 1267 | 957 1268 | 966 1269 | 972 1270 | 983 1271 | 985 1272 | 990 1273 | 997 1274 | 998 1275 | 999 1276 | 1000 1277 | 1004 1278 | 1005 1279 | 1010 1280 | 1021 1281 | 1022 1282 | 1026 1283 | 1031 1284 | 1033 1285 | 1037 1286 | 1039 1287 | 1044 1288 | 1046 1289 | 1058 1290 | 1059 1291 | 1061 1292 | 1063 1293 | 1069 1294 | 1075 1295 | 1080 1296 | 1086 1297 | 1088 1298 | 1090 1299 | 1092 1300 | 1097 1301 | 1112 1302 | 1117 1303 | 1123 1304 | 1131 1305 | 1140 1306 | 1142 1307 | 1146 1308 | 1154 1309 | 1160 1310 | 1167 1311 | 1171 1312 | 1174 1313 | 1179 1314 | 1185 1315 | 1186 1316 | 1187 1317 | 1192 1318 | 1194 1319 | 1197 1320 | 1209 1321 | 1213 1322 | 1224 1323 | 1228 1324 | 1229 1325 | 1230 1326 | 1246 1327 | 1248 1328 | 1252 1329 | 1258 1330 | 1259 1331 | 1278 1332 | 1296 1333 | 1297 1334 | 1300 1335 | 1303 1336 | 1307 1337 | 1309 1338 | 1311 1339 | 1312 1340 | 1313 1341 | 1314 1342 | 1326 1343 | 1339 1344 | 1355 1345 | 1375 1346 | 1376 1347 | 1377 1348 | 1379 1349 | 1390 1350 | 1397 1351 | 1411 1352 | 1433 1353 | 1435 1354 | 1436 1355 | 1461 1356 | 1462 1357 | 1463 1358 | 1464 1359 | 1468 1360 | 1472 1361 | 1476 1362 | 1480 1363 | 1482 1364 | 1487 1365 | 1491 1366 | 1494 1367 | 1501 1368 | 1502 1369 | 1505 1370 | 1509 1371 | 1510 1372 | 1515 1373 | 1518 1374 | 1519 1375 | 1521 1376 | 1525 1377 | 1526 1378 | 1533 1379 | 1540 1380 | 1544 1381 | 1545 1382 | 1553 1383 | 1555 1384 | 1558 1385 | 1560 1386 | 1562 1387 | 1564 1388 | 1566 1389 | 1569 1390 | 1572 1391 | 1573 1392 | 1575 1393 | 1583 1394 | 1587 1395 | 1592 1396 | 1593 1397 | 1603 1398 | 1617 1399 | 1624 1400 | 1627 1401 | 1629 1402 | 1635 1403 | 1638 1404 | 1645 1405 | 1646 1406 | 1652 1407 | 1656 1408 | 1665 1409 | 1668 1410 | 1670 1411 | 1689 1412 | 1691 1413 | 1715 1414 | 1719 1415 | 1720 1416 | 1723 1417 | 1725 1418 | 1733 1419 | 1743 1420 | 1744 1421 | 1758 1422 | 1768 1423 | 1778 1424 | 1782 1425 | 1783 1426 | 1784 1427 | 1785 1428 | 1792 1429 | 1794 1430 | 1807 1431 | 1809 1432 | 1819 1433 | 1822 1434 | 1831 1435 | 1840 1436 | 1845 1437 | 1846 1438 | 1850 1439 | 1851 1440 | 1861 1441 | 1864 1442 | 1869 1443 | 1875 1444 | 1876 1445 | 1882 1446 | 1883 1447 | 1892 1448 | 1893 1449 | 1898 1450 | 1903 1451 | 1915 1452 | 1939 1453 | 1944 1454 | 1952 1455 | 1954 1456 | 1955 1457 | 1956 1458 | 1965 1459 | 1966 1460 | 1968 1461 | 1973 1462 | 1974 1463 | 1977 1464 | 1986 1465 | 1987 1466 | 1999 1467 | 2000 1468 | 2005 1469 | 2009 1470 | 2011 1471 | 2015 1472 | 2021 1473 | 2033 1474 | 2035 1475 | 2038 1476 | 2050 1477 | 2059 1478 | 2061 1479 | 2071 1480 | 2077 1481 | 2080 1482 | 2082 1483 | 2089 1484 | 2090 1485 | 2093 1486 | 2100 1487 | 2101 1488 | 2106 1489 | 2112 1490 | 2113 1491 | 2115 1492 | 2117 1493 | 2119 1494 | 2121 1495 | 2125 1496 | 2127 1497 | 2129 1498 | 2134 1499 | 2135 1500 | 2136 1501 | 2137 1502 | 2139 1503 | 2162 1504 | 2164 1505 | 2167 1506 | 2173 1507 | 2182 1508 | 2183 1509 | 2188 1510 | 2205 1511 | 2208 1512 | 2212 1513 | 2223 1514 | 2231 1515 | 2234 1516 | 2236 1517 | 2239 1518 | 2243 1519 | 2248 1520 | 2253 1521 | 2255 1522 | 2256 1523 | 2261 1524 | 2274 1525 | 2291 1526 | 2297 1527 | 2298 1528 | 2299 1529 | 2303 1530 | 2307 1531 | 2315 1532 | 2321 1533 | 2330 1534 | 2331 1535 | 2332 1536 | 2337 1537 | 2339 1538 | 2341 1539 | 2344 1540 | 2345 1541 | 2349 1542 | 2350 1543 | 2353 1544 | 2359 1545 | 2365 1546 | 2370 1547 | 2374 1548 | 2375 1549 | 2379 1550 | 2382 1551 | 2386 1552 | 2387 1553 | 2393 1554 | 2408 1555 | 2411 1556 | 2414 1557 | 2417 1558 | 2422 1559 | 2423 1560 | 2429 1561 | 2430 1562 | 2436 1563 | 2443 1564 | 2455 1565 | 2456 1566 | 2459 1567 | 2461 1568 | 2465 1569 | 2469 1570 | 2472 1571 | 2474 1572 | 2478 1573 | 2480 1574 | 2481 1575 | 2484 1576 | 2498 1577 | 2503 1578 | 2505 1579 | 2509 1580 | 2510 1581 | 2516 1582 | 2517 1583 | 2519 1584 | 2520 1585 | 2528 1586 | 2534 1587 | 2545 1588 | 2547 1589 | 2553 1590 | 2556 1591 | 2559 1592 | 2562 1593 | 2563 1594 | 2572 1595 | 2573 1596 | 2577 1597 | 2578 1598 | 2585 1599 | 2591 1600 | 2593 1601 | 2595 1602 | 2596 1603 | 2602 1604 | 2603 1605 | 2605 1606 | 2609 1607 | 2610 1608 | 2616 1609 | 2618 1610 | 2619 1611 | 2625 1612 | 2628 1613 | 2630 1614 | 2633 1615 | 2647 1616 | 2660 1617 | 2662 1618 | 2681 1619 | 2683 1620 | 2685 1621 | 2689 1622 | 2692 1623 | 2698 1624 | 2705 1625 | 6 1626 | 11 1627 | 13 1628 | 23 1629 | 29 1630 | 31 1631 | 45 1632 | 55 1633 | 57 1634 | 64 1635 | 66 1636 | 68 1637 | 72 1638 | 74 1639 | 82 1640 | 87 1641 | 88 1642 | 94 1643 | 95 1644 | 97 1645 | 104 1646 | 107 1647 | 115 1648 | 116 1649 | 123 1650 | 132 1651 | 139 1652 | 141 1653 | 151 1654 | 154 1655 | 157 1656 | 163 1657 | 164 1658 | 176 1659 | 178 1660 | 185 1661 | 186 1662 | 187 1663 | 188 1664 | 199 1665 | 201 1666 | 202 1667 | 208 1668 | 212 1669 | 233 1670 | 234 1671 | 239 1672 | 243 1673 | 247 1674 | 250 1675 | 253 1676 | 254 1677 | 261 1678 | 275 1679 | 276 1680 | 285 1681 | 293 1682 | 295 1683 | 298 1684 | 300 1685 | 304 1686 | 306 1687 | 307 1688 | 317 1689 | 320 1690 | 325 1691 | 327 1692 | 329 1693 | 342 1694 | 347 1695 | 351 1696 | 360 1697 | 362 1698 | 372 1699 | 373 1700 | 375 1701 | 379 1702 | 383 1703 | 385 1704 | 389 1705 | 396 1706 | 399 1707 | 400 1708 | 407 1709 | 410 1710 | 414 1711 | 417 1712 | 420 1713 | 427 1714 | 428 1715 | 429 1716 | 432 1717 | 434 1718 | 443 1719 | 452 1720 | 454 1721 | 462 1722 | 463 1723 | 465 1724 | 472 1725 | 476 1726 | 489 1727 | 495 1728 | 499 1729 | 502 1730 | 503 1731 | 509 1732 | 515 1733 | 518 1734 | 530 1735 | 535 1736 | 536 1737 | 539 1738 | 541 1739 | 550 1740 | 554 1741 | 561 1742 | 567 1743 | 571 1744 | 578 1745 | 581 1746 | 584 1747 | 589 1748 | 590 1749 | 592 1750 | 598 1751 | 602 1752 | 603 1753 | 607 1754 | 608 1755 | 609 1756 | 612 1757 | 617 1758 | 621 1759 | 622 1760 | 625 1761 | 628 1762 | 631 1763 | 641 1764 | 642 1765 | 643 1766 | 644 1767 | 645 1768 | 646 1769 | 669 1770 | 670 1771 | 681 1772 | 685 1773 | 694 1774 | 696 1775 | 697 1776 | 706 1777 | 708 1778 | 715 1779 | 721 1780 | 723 1781 | 732 1782 | 733 1783 | 734 1784 | 735 1785 | 736 1786 | 739 1787 | 740 1788 | 754 1789 | 758 1790 | 761 1791 | 763 1792 | 765 1793 | 778 1794 | 781 1795 | 793 1796 | 796 1797 | 798 1798 | 800 1799 | 808 1800 | 811 1801 | 826 1802 | 834 1803 | 837 1804 | 855 1805 | 856 1806 | 858 1807 | 862 1808 | 865 1809 | 866 1810 | 867 1811 | 875 1812 | 878 1813 | 882 1814 | 884 1815 | 891 1816 | 902 1817 | 907 1818 | 911 1819 | 912 1820 | 914 1821 | 923 1822 | 926 1823 | 933 1824 | 938 1825 | 940 1826 | 948 1827 | 954 1828 | 958 1829 | 959 1830 | 960 1831 | 963 1832 | 968 1833 | 978 1834 | 1001 1835 | 1006 1836 | 1007 1837 | 1020 1838 | 1034 1839 | 1035 1840 | 1054 1841 | 1055 1842 | 1056 1843 | 1060 1844 | 1065 1845 | 1070 1846 | 1072 1847 | 1076 1848 | 1079 1849 | 1082 1850 | 1089 1851 | 1093 1852 | 1098 1853 | 1102 1854 | 1107 1855 | 1116 1856 | 1126 1857 | 1135 1858 | 1136 1859 | 1141 1860 | 1145 1861 | 1150 1862 | 1163 1863 | 1165 1864 | 1183 1865 | 1191 1866 | 1195 1867 | 1198 1868 | 1203 1869 | 1204 1870 | 1231 1871 | 1244 1872 | 1261 1873 | 1262 1874 | 1265 1875 | 1268 1876 | 1269 1877 | 1273 1878 | 1284 1879 | 1302 1880 | 1304 1881 | 1310 1882 | 1315 1883 | 1319 1884 | 1320 1885 | 1322 1886 | 1324 1887 | 1328 1888 | 1335 1889 | 1340 1890 | 1341 1891 | 1343 1892 | 1344 1893 | 1345 1894 | 1347 1895 | 1349 1896 | 1353 1897 | 1356 1898 | 1359 1899 | 1361 1900 | 1362 1901 | 1369 1902 | 1378 1903 | 1389 1904 | 1391 1905 | 1395 1906 | 1396 1907 | 1408 1908 | 1415 1909 | 1416 1910 | 1432 1911 | 1446 1912 | 1447 1913 | 1452 1914 | 1460 1915 | 1466 1916 | 1477 1917 | 1481 1918 | 1483 1919 | 1486 1920 | 1488 1921 | 1508 1922 | 1511 1923 | 1522 1924 | 1531 1925 | 1538 1926 | 1543 1927 | 1548 1928 | 1552 1929 | 1556 1930 | 1557 1931 | 1563 1932 | 1567 1933 | 1568 1934 | 1574 1935 | 1586 1936 | 1590 1937 | 1599 1938 | 1600 1939 | 1601 1940 | 1602 1941 | 1620 1942 | 1634 1943 | 1640 1944 | 1641 1945 | 1649 1946 | 1653 1947 | 1657 1948 | 1662 1949 | 1671 1950 | 1673 1951 | 1679 1952 | 1688 1953 | 1694 1954 | 1695 1955 | 1696 1956 | 1698 1957 | 1702 1958 | 1704 1959 | 1706 1960 | 1712 1961 | 1714 1962 | 1718 1963 | 1727 1964 | 1732 1965 | 1734 1966 | 1751 1967 | 1770 1968 | 1773 1969 | 1775 1970 | 1781 1971 | 1786 1972 | 1798 1973 | 1803 1974 | 1808 1975 | 1811 1976 | 1815 1977 | 1820 1978 | 1824 1979 | 1837 1980 | 1839 1981 | 1843 1982 | 1849 1983 | 1867 1984 | 1873 1985 | 1874 1986 | 1880 1987 | 1887 1988 | 1889 1989 | 1895 1990 | 1900 1991 | 1904 1992 | 1919 1993 | 1923 1994 | 1927 1995 | 1930 1996 | 1933 1997 | 1935 1998 | 1938 1999 | 1941 2000 | 1942 2001 | 1946 2002 | 1951 2003 | 1959 2004 | 1961 2005 | 1978 2006 | 1979 2007 | 1984 2008 | 1989 2009 | 1990 2010 | 1992 2011 | 2001 2012 | 2004 2013 | 2006 2014 | 2010 2015 | 2014 2016 | 2016 2017 | 2018 2018 | 2025 2019 | 2026 2020 | 2029 2021 | 2031 2022 | 2034 2023 | 2040 2024 | 2044 2025 | 2052 2026 | 2054 2027 | 2055 2028 | 2056 2029 | 2057 2030 | 2058 2031 | 2062 2032 | 2070 2033 | 2074 2034 | 2075 2035 | 2078 2036 | 2092 2037 | 2094 2038 | 2096 2039 | 2099 2040 | 2105 2041 | 2116 2042 | 2122 2043 | 2124 2044 | 2126 2045 | 2132 2046 | 2138 2047 | 2140 2048 | 2141 2049 | 2145 2050 | 2149 2051 | 2154 2052 | 2156 2053 | 2158 2054 | 2166 2055 | 2179 2056 | 2185 2057 | 2189 2058 | 2190 2059 | 2191 2060 | 2194 2061 | 2198 2062 | 2199 2063 | 2200 2064 | 2203 2065 | 2206 2066 | 2207 2067 | 2209 2068 | 2213 2069 | 2215 2070 | 2220 2071 | 2225 2072 | 2229 2073 | 2242 2074 | 2249 2075 | 2267 2076 | 2278 2077 | 2281 2078 | 2285 2079 | 2294 2080 | 2296 2081 | 2301 2082 | 2302 2083 | 2304 2084 | 2310 2085 | 2311 2086 | 2313 2087 | 2314 2088 | 2318 2089 | 2322 2090 | 2325 2091 | 2326 2092 | 2327 2093 | 2334 2094 | 2338 2095 | 2343 2096 | 2351 2097 | 2362 2098 | 2366 2099 | 2369 2100 | 2372 2101 | 2373 2102 | 2378 2103 | 2381 2104 | 2385 2105 | 2392 2106 | 2396 2107 | 2398 2108 | 2399 2109 | 2415 2110 | 2421 2111 | 2424 2112 | 2426 2113 | 2427 2114 | 2431 2115 | 2432 2116 | 2440 2117 | 2441 2118 | 2442 2119 | 2449 2120 | 2458 2121 | 2460 2122 | 2464 2123 | 2467 2124 | 2468 2125 | 2477 2126 | 2486 2127 | 2488 2128 | 2493 2129 | 2497 2130 | 2506 2131 | 2514 2132 | 2518 2133 | 2525 2134 | 2526 2135 | 2533 2136 | 2536 2137 | 2537 2138 | 2539 2139 | 2543 2140 | 2552 2141 | 2554 2142 | 2557 2143 | 2560 2144 | 2566 2145 | 2569 2146 | 2570 2147 | 2581 2148 | 2587 2149 | 2589 2150 | 2592 2151 | 2600 2152 | 2627 2153 | 2635 2154 | 2644 2155 | 2651 2156 | 2652 2157 | 2657 2158 | 2659 2159 | 2674 2160 | 2682 2161 | 2684 2162 | 2701 2163 | 2707 2164 | -------------------------------------------------------------------------------- /ingestion/__pycache__/common.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JunweiSUN/AutoGL/e1743e4571d88889ce87c14cb3bce63a0a2a505d/ingestion/__pycache__/common.cpython-36.pyc -------------------------------------------------------------------------------- /ingestion/__pycache__/dataset.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JunweiSUN/AutoGL/e1743e4571d88889ce87c14cb3bce63a0a2a505d/ingestion/__pycache__/dataset.cpython-36.pyc -------------------------------------------------------------------------------- /ingestion/__pycache__/timing.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JunweiSUN/AutoGL/e1743e4571d88889ce87c14cb3bce63a0a2a505d/ingestion/__pycache__/timing.cpython-36.pyc -------------------------------------------------------------------------------- /ingestion/common.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=logging-fstring-interpolation, broad-except 2 | """common""" 3 | import logging 4 | import importlib 5 | import sys 6 | 7 | 8 | class ModelApiError(Exception): 9 | """Model api error""" 10 | 11 | 12 | def get_logger(verbosity_level, name, use_error_log=False): 13 | """Set logging format to something like: 14 | 2019-04-25 12:52:51,924 INFO score.py: 15 | """ 16 | logger = logging.getLogger(name) 17 | logging_level = getattr(logging, verbosity_level) 18 | logger.setLevel(logging_level) 19 | formatter = logging.Formatter( 20 | fmt='%(asctime)s %(levelname)s %(filename)s: %(message)s') 21 | stdout_handler = logging.StreamHandler(sys.stdout) 22 | stdout_handler.setLevel(logging_level) 23 | stdout_handler.setFormatter(formatter) 24 | logger.addHandler(stdout_handler) 25 | if use_error_log: 26 | stderr_handler = logging.StreamHandler(sys.stderr) 27 | stderr_handler.setLevel(logging.WARNING) 28 | stderr_handler.setFormatter(formatter) 29 | logger.addHandler(stderr_handler) 30 | logger.propagate = False 31 | return logger 32 | 33 | 34 | VERBOSITY_LEVEL = 'INFO' 35 | LOGGER = get_logger(VERBOSITY_LEVEL, __file__) 36 | METHOD_LIST = ['train_predict'] 37 | 38 | 39 | def _check_umodel_methed(umodel): 40 | # Check if the model has methods in METHOD_LIST 41 | for attr in ['train_predict']: 42 | if not hasattr(umodel, attr): 43 | raise ModelApiError( 44 | f"Your model object doesn't have the method attr") 45 | 46 | 47 | def import_umodel(): 48 | """import user model""" 49 | model_cls = importlib.import_module('model').Model 50 | _check_umodel_methed(model_cls) 51 | 52 | return model_cls 53 | 54 | 55 | def init_usermodel(): 56 | """initialize user model""" 57 | return import_umodel()() 58 | -------------------------------------------------------------------------------- /ingestion/dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | AutoWSL datasets. 3 | """ 4 | import copy 5 | from os.path import join 6 | from datetime import datetime 7 | import numpy as np 8 | import pandas as pd 9 | import yaml 10 | from common import get_logger 11 | 12 | TYPE_MAP = { 13 | 'cat': str, 14 | 'multi-cat': str, 15 | 'str': str, 16 | 'num': np.float64, 17 | 'timestamp': 'str' 18 | } 19 | 20 | VERBOSITY_LEVEL = 'WARNING' 21 | LOGGER = get_logger(VERBOSITY_LEVEL, __file__) 22 | TIMESTAMP_TYPE_NAME = 'timestamp' 23 | TRAIN_FILE = 'train_node_id.txt' 24 | TRAIN_LABEL = 'train_label.tsv' 25 | TEST_FILE = 'test_node_id.txt' 26 | INFO_FILE = 'config.yml' 27 | FEA_TABLE = 'feature.tsv' 28 | EDGE_FILE = 'edge.tsv' 29 | 30 | SEP = '\t' 31 | 32 | 33 | def _date_parser(millisecs): 34 | if np.isnan(float(millisecs)): 35 | return millisecs 36 | 37 | return datetime.fromtimestamp(float(millisecs)) 38 | 39 | 40 | class Dataset: 41 | """"Dataset""" 42 | def __init__(self, dataset_dir): 43 | """ 44 | train_dataset, test_dataset: list of strings 45 | train_label: np.array 46 | """ 47 | self.dataset_dir_ = dataset_dir 48 | self.metadata_ = self._read_metadata(join(dataset_dir, INFO_FILE)) 49 | self.edge_data = None 50 | self.train_indices = None 51 | self.train_label = None 52 | self.test_indices = None 53 | self.fea_table = None 54 | self.get_data() 55 | 56 | def get_data(self): 57 | """get all training data""" 58 | data = { 59 | 'fea_table': self.get_fea_table(), 60 | 'edge_file': self.get_edge(), 61 | 'train_indices': self.get_train_indices(), 62 | 'test_indices': self.get_test_indices(), 63 | 'train_label': self.get_train_label(), 64 | } 65 | return data 66 | 67 | def get_fea_table(self): 68 | """get train""" 69 | if self.fea_table is None: 70 | self.fea_table = self._read_dataset( 71 | join(self.dataset_dir_, FEA_TABLE)) 72 | return self.fea_table 73 | 74 | def get_edge(self): 75 | """get edge file""" 76 | dtype = { 77 | 'src_id': int, 78 | 'dst_idx': int, 79 | 'edge_weight': float 80 | } 81 | if self.edge_data is None: 82 | self.edge_data = pd.read_csv( 83 | join(self.dataset_dir_, EDGE_FILE), dtype=dtype, sep=SEP) 84 | return self.edge_data 85 | 86 | def get_train_label(self): 87 | """get train label""" 88 | dtype = { 89 | 'node_index': int, 90 | 'label': int, 91 | } 92 | if self.train_label is None: 93 | self.train_label = pd.read_csv( 94 | join(self.dataset_dir_, TRAIN_LABEL), dtype=dtype, sep=SEP) 95 | 96 | return self.train_label 97 | 98 | def get_test_indices(self): 99 | """get test index file""" 100 | if self.test_indices is None: 101 | with open(join(self.dataset_dir_, TEST_FILE), 'r') as ftmp: 102 | self.test_indices = [int(line.strip()) for line in ftmp] 103 | 104 | return self.test_indices 105 | 106 | def get_train_indices(self): 107 | """get train index file""" 108 | if self.train_indices is None: 109 | with open(join(self.dataset_dir_, TRAIN_FILE), 'r') as ftmp: 110 | self.train_indices = [int(line.strip()) for line in ftmp] 111 | 112 | return self.train_indices 113 | 114 | def get_metadata(self): 115 | """get metadata""" 116 | return copy.deepcopy(self.metadata_) 117 | 118 | @staticmethod 119 | def _read_metadata(metadata_path): 120 | with open(metadata_path, 'r') as ftmp: 121 | return yaml.safe_load(ftmp) 122 | 123 | def _read_dataset(self, dataset_path): 124 | schema = self.metadata_['schema'] 125 | if isinstance(schema, dict): 126 | table_dtype = {key: TYPE_MAP[val] for key, val in schema.items()} 127 | date_list = [key for key, val in schema.items() 128 | if val == TIMESTAMP_TYPE_NAME] 129 | dataset = pd.read_csv( 130 | dataset_path, sep=SEP, dtype=table_dtype, 131 | parse_dates=date_list, date_parser=_date_parser) 132 | else: 133 | dataset = pd.read_csv(dataset_path, sep=SEP) 134 | 135 | return dataset 136 | -------------------------------------------------------------------------------- /ingestion/ingestion.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=logging-fstring-interpolation, broad-except 2 | """ingestion program for autoWSL""" 3 | import os 4 | from os.path import join 5 | import sys 6 | from sys import path 7 | import argparse 8 | import time 9 | import pandas as pd 10 | import yaml 11 | from filelock import FileLock 12 | 13 | from common import get_logger, init_usermodel 14 | 15 | import timing 16 | from timing import Timer 17 | from dataset import Dataset 18 | 19 | 20 | # Verbosity level of logging: 21 | # Can be: NOTSET, DEBUG, INFO, WARNING, ERROR, CRITICAL 22 | VERBOSITY_LEVEL = 'INFO' 23 | LOGGER = get_logger(VERBOSITY_LEVEL, __file__) 24 | 25 | 26 | def _here(*args): 27 | """Helper function for getting the current directory of this script.""" 28 | here = os.path.dirname(os.path.realpath(__file__)) 29 | return os.path.abspath(os.path.join(here, *args)) 30 | 31 | 32 | def write_start_file(output_dir): 33 | """write start file""" 34 | start_filepath = os.path.join(output_dir, 'start.txt') 35 | lockfile = os.path.join(output_dir, 'start.txt.lock') 36 | ingestion_pid = os.getpid() 37 | 38 | with FileLock(lockfile): 39 | with open(start_filepath, 'w') as ftmp: 40 | ftmp.write(str(ingestion_pid)) 41 | 42 | LOGGER.info('===== Finished writing "start.txt" file.') 43 | 44 | 45 | class IngestionError(RuntimeError): 46 | """Model api error""" 47 | 48 | 49 | def _parse_args(): 50 | root_dir = _here(os.pardir) 51 | default_dataset_dir = join(root_dir, "sample_data") 52 | default_output_dir = join(root_dir, "sample_result_submission") 53 | default_ingestion_program_dir = join(root_dir, "ingestion_program") 54 | default_code_dir = join(root_dir, "code_submission") 55 | default_score_dir = join(root_dir, "scoring_output") 56 | default_temp_dir = join(root_dir, 'temp_output') 57 | parser = argparse.ArgumentParser() 58 | parser.add_argument('--dataset_dir', type=str, 59 | default=default_dataset_dir, 60 | help="Directory storing the dataset (containing " 61 | "e.g. adult.data/)") 62 | parser.add_argument('--output_dir', type=str, 63 | default=default_output_dir, 64 | help="Directory storing the predictions. It will " 65 | "contain e.g. [start.txt, predictions, end.yaml]" 66 | "when ingestion terminates.") 67 | parser.add_argument('--ingestion_program_dir', type=str, 68 | default=default_ingestion_program_dir, 69 | help="Directory storing the ingestion program " 70 | "`ingestion.py` and other necessary packages.") 71 | parser.add_argument('--code_dir', type=str, 72 | default=default_code_dir, 73 | help="Directory storing the submission code " 74 | "`model.py` and other necessary packages.") 75 | parser.add_argument('--score_dir', type=str, 76 | default=default_score_dir, 77 | help="Directory storing the scoring output " 78 | "e.g. `scores.txt` and `detailed_results.html`.") 79 | parser.add_argument('--temp_dir', type=str, 80 | default=default_temp_dir, 81 | help="Directory storing the temporary output." 82 | "e.g. save the participants` model after " 83 | "trainning.") 84 | 85 | args = parser.parse_args() 86 | LOGGER.debug(f'Parsed args are: {args}') 87 | LOGGER.debug("-" * 50) 88 | if (args.dataset_dir.endswith('run/input') and 89 | args.code_dir.endswith('run/program')): 90 | LOGGER.debug("Since dataset_dir ends with 'run/input' and code_dir " 91 | "ends with 'run/program', suppose running on " 92 | "CodaLab platform. Modify dataset_dir to 'run/input_data'" 93 | " and code_dir to 'run/submission'. " 94 | "Directory parsing should be more flexible in the code of" 95 | " compute worker: we need explicit directories for " 96 | "dataset_dir and code_dir.") 97 | 98 | args.dataset_dir = args.dataset_dir.replace( 99 | 'run/input', 'run/input_data') 100 | args.code_dir = args.code_dir.replace( 101 | 'run/program', 'run/submission') 102 | 103 | # Show directories for debugging 104 | LOGGER.debug(f"sys.argv = {sys.argv}") 105 | LOGGER.debug(f"Using dataset_dir: {args.dataset_dir}") 106 | LOGGER.debug(f"Using output_dir: {args.output_dir}") 107 | LOGGER.debug( 108 | f"Using ingestion_program_dir: {args.ingestion_program_dir}") 109 | LOGGER.debug(f"Using code_dir: {args.code_dir}") 110 | return args 111 | 112 | 113 | def _init_python_path(args): 114 | path.append(args.ingestion_program_dir) 115 | path.append(args.code_dir) 116 | os.makedirs(args.output_dir, exist_ok=True) 117 | os.makedirs(args.temp_dir, exist_ok=True) 118 | 119 | 120 | def _train_predict(umodel, dataset, timer, n_class, schema): 121 | # Train the model 122 | data = dataset.get_data() 123 | 124 | with timer.time_limit('train_predict'): 125 | predictions = umodel.train_predict( 126 | data, timer.get_all_remain()['train_predict'], n_class, schema) 127 | 128 | return predictions 129 | 130 | 131 | def _finalize(args, timer): 132 | # Finishing ingestion program 133 | end_time = time.time() 134 | 135 | time_stats = timer.get_all_stats() 136 | for pname, stats in time_stats.items(): 137 | for stat_name, val in stats.items(): 138 | LOGGER.info(f'the {stat_name} of duration in {pname}: {val} sec') 139 | 140 | overall_time_spent = timer.get_overall_duration() 141 | 142 | # Write overall_time_spent to a end.yaml file 143 | end_filename = 'end.yaml' 144 | content = { 145 | 'ingestion_duration': overall_time_spent, 146 | 'time_stats': time_stats, 147 | 'end_time': end_time} 148 | 149 | with open(join(args.output_dir, end_filename), 'w') as ftmp: 150 | yaml.dump(content, ftmp) 151 | LOGGER.info( 152 | f'Wrote the file {end_filename} marking the end of ingestion.') 153 | 154 | LOGGER.info("[+] Done. Ingestion program successfully terminated.") 155 | LOGGER.info(f"[+] Overall time spent {overall_time_spent:5.2} sec") 156 | 157 | # Copy all files in output_dir to score_dir 158 | os.system( 159 | f"cp -R {os.path.join(args.output_dir, '*')} {args.score_dir}") 160 | LOGGER.debug( 161 | "Copied all ingestion output to scoring output directory.") 162 | 163 | LOGGER.info("[Ingestion terminated]") 164 | 165 | 166 | def _write_predict(output_dir, prediction): 167 | """prediction should be list""" 168 | os.makedirs(output_dir, exist_ok=True) 169 | prediction = pd.Series(prediction, name='label') 170 | LOGGER.debug(f'prediction shape: {prediction.shape}') 171 | prediction.to_csv( 172 | join(output_dir, 'predictions'), index=False, header=True) 173 | 174 | 175 | def _init_timer(time_budgets): 176 | timer = Timer() 177 | timer.add_process('train_predict', time_budgets, timing.RESET) 178 | LOGGER.debug( 179 | f"init time budget of train_predict: {time_budgets} " 180 | f"mode: {timing.RESET}") 181 | return timer 182 | 183 | 184 | def main(): 185 | """main entry""" 186 | LOGGER.info('===== Start ingestion program.') 187 | # Parse directories from input arguments 188 | LOGGER.info('===== Initialize args.') 189 | args = _parse_args() 190 | 191 | _init_python_path(args) 192 | 193 | write_start_file(args.output_dir) 194 | 195 | LOGGER.info('===== Load data.') 196 | dataset = Dataset(args.dataset_dir) 197 | time_budget = dataset.get_metadata().get("time_budget") 198 | n_class = dataset.get_metadata().get("n_class") 199 | schema = dataset.get_metadata().get("schema") 200 | 201 | LOGGER.info(f"Time budget: {time_budget}") 202 | 203 | LOGGER.info("===== import user model") 204 | umodel = init_usermodel() 205 | 206 | LOGGER.info("===== Begin training user model") 207 | timer = _init_timer(time_budget) 208 | predictions = _train_predict(umodel, dataset, timer, n_class, schema) 209 | _write_predict(args.output_dir, predictions) 210 | 211 | _finalize(args, timer) 212 | 213 | 214 | if __name__ == "__main__": 215 | main() 216 | -------------------------------------------------------------------------------- /ingestion/metadata: -------------------------------------------------------------------------------- 1 | command: python $ingestion_program/ingestion.py --dataset_dir=$input --output_dir=$predictions --ingestion_program_dir=$ingestion_program --code_dir=$submission_program --score_dir=$output --temp_dir=$tmp 2 | -------------------------------------------------------------------------------- /ingestion/timing.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=logging-fstring-interpolation, broad-except 2 | """common""" 3 | import signal 4 | import math 5 | import time 6 | from contextlib import contextmanager 7 | import numpy as np 8 | import yaml 9 | from common import get_logger 10 | 11 | VERBOSITY_LEVEL = 'INFO' 12 | LOGGER = get_logger(VERBOSITY_LEVEL, __file__) 13 | 14 | CUM = 0 15 | RESET = 1 16 | MODES = set([CUM, RESET]) 17 | 18 | 19 | OP_MAP = { 20 | 'mean': np.mean, 21 | 'max': np.max, 22 | 'std': np.std, 23 | 'sum': sum, 24 | } 25 | 26 | 27 | class TimeoutException(Exception): 28 | """timeoutexception""" 29 | 30 | 31 | class Timer: 32 | """timer""" 33 | def __init__(self): 34 | self.total = {} 35 | self.history = {} 36 | self.modes = {} 37 | 38 | @classmethod 39 | def from_file(cls, save_file): 40 | """contruct timer from a save file""" 41 | timer = Timer() 42 | timer.load(save_file) 43 | return timer 44 | 45 | def add_process(self, pname, time_budget, mode=RESET): 46 | """set time_budget 47 | mode: CUM/RESET 48 | """ 49 | if pname in self.total: 50 | raise ValueError(f"Existing process of timer: {pname}") 51 | if mode not in MODES: 52 | raise ValueError(f"wrong process mode: {mode}") 53 | 54 | self.total[pname] = time_budget 55 | self.history[pname] = [] 56 | self.modes[pname] = mode 57 | 58 | @contextmanager 59 | def time_limit(self, pname, verbose=True): 60 | """limit time""" 61 | def signal_handler(signum, frame): 62 | raise TimeoutException(f"{pname}: Timed out!") 63 | signal.signal(signal.SIGALRM, signal_handler) 64 | time_budget = int(math.ceil(self.get_remain(pname))) 65 | signal.alarm(time_budget) 66 | start_time = time.time() 67 | 68 | try: 69 | 70 | if verbose: 71 | LOGGER.info(f'start {pname} with time budget {time_budget}') 72 | yield 73 | finally: 74 | exec_time = time.time() - start_time 75 | signal.alarm(0) 76 | self.history[pname].append(exec_time) 77 | 78 | if verbose: 79 | LOGGER.info(f'{pname} success, time spent {exec_time} sec') 80 | 81 | if self.get_remain(pname) <= 0: 82 | raise TimeoutException(f"{pname}: Timed out!") 83 | 84 | def get_remain(self, pname): 85 | """get remaining time of process""" 86 | if self.modes[pname] == CUM: 87 | remain = self.total[pname] - sum(self.history[pname]) 88 | else: 89 | remain = self.total[pname] 90 | 91 | return remain 92 | 93 | def get_all_remain(self): 94 | """get remaining time of process""" 95 | return {key: self.get_remain(key) for key in self.total.keys()} 96 | 97 | def get_stats(self, pname): 98 | """get stats of timing history""" 99 | result = {} 100 | for stat in ['sum', 'mean', 'max', 'std']: 101 | history = self.history[pname] 102 | if history: 103 | result[stat] = float(OP_MAP[stat](self.history[pname])) 104 | else: 105 | result[stat] = 0 106 | return result 107 | 108 | def get_overall_duration(self): 109 | """get overall duration""" 110 | duration = 0 111 | for _, value in self.history.items(): 112 | duration += sum(value) 113 | return duration 114 | 115 | def get_all_stats(self): 116 | """get all stats of timing history""" 117 | stats = {pname: self.get_stats(pname) for pname in self.total.keys()} 118 | return stats 119 | 120 | def save(self, save_file): 121 | """save timer""" 122 | save_content = { 123 | 'total': self.total, 124 | 'history': self.history, 125 | 'modes': self.modes 126 | } 127 | with open(save_file, 'w') as ftmp: 128 | yaml.dump(save_content, ftmp) 129 | 130 | def load(self, save_file): 131 | """load timer""" 132 | with open(save_file, 'r') as ftmp: 133 | save_content = yaml.safe_load(ftmp) 134 | self.total = save_content['total'] 135 | self.history = save_content['history'] 136 | self.modes = save_content['modes'] 137 | -------------------------------------------------------------------------------- /run_local_test.py: -------------------------------------------------------------------------------- 1 | """run local test in starting kit""" 2 | # pylint: disable=logging-fstring-interpolation 3 | 4 | import argparse 5 | import logging 6 | import os 7 | from os.path import join, isdir 8 | import shutil 9 | from multiprocessing import Process 10 | 11 | VERBOSITY_LEVEL = 'WARNING' 12 | 13 | logging.basicConfig( 14 | level=getattr(logging, VERBOSITY_LEVEL), 15 | format='%(asctime)s %(levelname)s %(filename)s: %(message)s', 16 | datefmt='%Y-%m-%d %H:%M:%S' 17 | ) 18 | 19 | 20 | def _here(*args): 21 | here = os.path.dirname(os.path.realpath(__file__)) 22 | return os.path.join(here, *args) 23 | 24 | 25 | def _ingestion_program(starting_kit_dir): 26 | return join(starting_kit_dir, 'ingestion', 'ingestion.py') 27 | 28 | 29 | def _scoring_program(starting_kit_dir): 30 | return join(starting_kit_dir, 'scoring', 'score.py') 31 | 32 | 33 | def remove_dir(output_dir): 34 | """Remove the directory `output_dir`. 35 | This aims to clean existing output of last run of local test. 36 | """ 37 | if isdir(output_dir): 38 | logging.info( 39 | f"Cleaning existing output directory of last run: {output_dir}") 40 | shutil.rmtree(output_dir) 41 | 42 | 43 | def _clean(starting_kit_dir): 44 | ingestion_output_dir = join(starting_kit_dir, 'sample_result_submission') 45 | score_dir = os.path.join(starting_kit_dir, 'scoring_output') 46 | remove_dir(ingestion_output_dir) 47 | remove_dir(score_dir) 48 | 49 | 50 | def run(dataset_dir, code_dir): 51 | """run""" 52 | # Current directory containing this script 53 | starting_kit_dir = _here() 54 | path_ingestion = _ingestion_program(starting_kit_dir) 55 | path_scoring = _scoring_program(starting_kit_dir) 56 | 57 | # Run ingestion and scoring at the same time 58 | command_ingestion = ( 59 | 'python ' 60 | # f'{path_ingestion} --dataset_dir={dataset_dir}/data ' 61 | f'{path_ingestion} --dataset_dir={dataset_dir}/train.data' 62 | f' --code_dir={code_dir}') 63 | 64 | command_scoring = ( 65 | # f'python {path_scoring} --solution_dir={dataset_dir}/solution') 66 | f'python {path_scoring} --solution_dir={dataset_dir}') 67 | 68 | def run_ingestion(): 69 | os.system(command_ingestion) 70 | 71 | def run_scoring(): 72 | os.system(command_scoring) 73 | 74 | ingestion_process = Process(name='ingestion', target=run_ingestion) 75 | scoring_process = Process(name='scoring', target=run_scoring) 76 | _clean(starting_kit_dir) 77 | 78 | ingestion_process.start() 79 | scoring_process.start() 80 | 81 | 82 | def _parse_args(): 83 | default_starting_kit_dir = _here() 84 | default_dataset_dir = join(default_starting_kit_dir, 'data', 'demo') 85 | default_code_dir = join(default_starting_kit_dir, 'code_submission') 86 | 87 | parser = argparse.ArgumentParser() 88 | parser.add_argument('--dataset_dir', type=str, 89 | default=default_dataset_dir, 90 | help="Directory storing the dataset, should contain" 91 | "'data' and 'solution'") 92 | 93 | parser.add_argument('--code_dir', type=str, 94 | default=default_code_dir, 95 | help="Directory storing the submission code " 96 | "`model.py` and other necessary packages.") 97 | 98 | args = parser.parse_args() 99 | return args 100 | 101 | 102 | def main(): 103 | """main entry""" 104 | args = _parse_args() 105 | dataset_dir = args.dataset_dir 106 | code_dir = args.code_dir 107 | logging.info("#" * 50) 108 | logging.info("Begin running local test using") 109 | logging.info(f"code_dir = {code_dir}") 110 | logging.info(f"dataset_dir = {dataset_dir}") 111 | logging.info("#" * 50) 112 | run(dataset_dir, code_dir) 113 | 114 | 115 | if __name__ == '__main__': 116 | main() 117 | -------------------------------------------------------------------------------- /scoring/metadata: -------------------------------------------------------------------------------- 1 | command: python $program/score.py --solution_dir=$hidden --prediction_dir=$predictions --score_dir=$output 2 | description: Compute scores for the competition 3 | -------------------------------------------------------------------------------- /scoring/score.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=logging-fstring-interpolation 2 | """scoring function for autograph""" 3 | 4 | import argparse 5 | import datetime 6 | import os 7 | from os.path import join 8 | import logging 9 | import sys 10 | import time 11 | 12 | import yaml 13 | import pandas as pd 14 | from sklearn.metrics import accuracy_score 15 | 16 | from filelock import FileLock 17 | 18 | # Verbosity level of logging. 19 | # Can be: NOTSET, DEBUG, INFO, WARNING, ERROR, CRITICAL 20 | # VERBOSITY_LEVEL = 'INFO' 21 | VERBOSITY_LEVEL = 'INFO' 22 | WAIT_TIME = 30 23 | MAX_TIME_DIFF = datetime.timedelta(seconds=600) 24 | DEFAULT_SCORE = -1 25 | SOLUTION_FILE = 'test_label.tsv' 26 | 27 | 28 | def get_logger(verbosity_level, use_error_log=False): 29 | """Set logging format to something like: 30 | 2019-04-25 12:52:51,924 INFO score.py: 31 | """ 32 | logger = logging.getLogger(__file__) 33 | logging_level = getattr(logging, verbosity_level) 34 | logger.setLevel(logging_level) 35 | formatter = logging.Formatter( 36 | fmt='%(asctime)s %(levelname)s %(filename)s: %(message)s') 37 | stdout_handler = logging.StreamHandler(sys.stdout) 38 | stdout_handler.setLevel(logging_level) 39 | stdout_handler.setFormatter(formatter) 40 | logger.addHandler(stdout_handler) 41 | if use_error_log: 42 | stderr_handler = logging.StreamHandler(sys.stderr) 43 | stderr_handler.setLevel(logging.WARNING) 44 | stderr_handler.setFormatter(formatter) 45 | logger.addHandler(stderr_handler) 46 | logger.propagate = False 47 | return logger 48 | 49 | 50 | LOGGER = get_logger(VERBOSITY_LEVEL) 51 | 52 | 53 | def _here(*args): 54 | """Helper function for getting the current directory of the script.""" 55 | here_dir = os.path.dirname(os.path.realpath(__file__)) 56 | return os.path.abspath(join(here_dir, *args)) 57 | 58 | 59 | def _get_solution(solution_dir): 60 | """Get the solution array from solution directory.""" 61 | solution_file = join(solution_dir, SOLUTION_FILE) 62 | solution = pd.read_csv(solution_file, sep='\t') 63 | return solution 64 | 65 | 66 | def _get_prediction(prediction_dir): 67 | pred_file = join(prediction_dir, 'predictions') 68 | return pd.read_csv(pred_file)['label'] 69 | 70 | 71 | def _get_score(solution_dir, prediction_dir): 72 | """get score""" 73 | LOGGER.info('===== get solution') 74 | solution = _get_solution(solution_dir)['label'] 75 | LOGGER.info('===== read prediction') 76 | prediction = _get_prediction(prediction_dir) 77 | if solution.shape != prediction.shape: 78 | raise ValueError(f"Bad prediction shape: {prediction.shape}. " 79 | f"Expected shape: {solution.shape}") 80 | 81 | LOGGER.info('===== calculate score') 82 | LOGGER.debug(f'solution shape = {solution.shape}') 83 | LOGGER.debug(f'prediction shape = {prediction.shape}') 84 | score = accuracy_score(solution, prediction) 85 | 86 | return score 87 | 88 | 89 | def _update_score(args, duration): 90 | score = _get_score(solution_dir=args.solution_dir, 91 | prediction_dir=args.prediction_dir) 92 | # Update learning curve page (detailed_results.html) 93 | _write_scores_html(args.score_dir) 94 | # Write score 95 | LOGGER.info('===== write score') 96 | write_score(args.score_dir, score, duration) 97 | LOGGER.info(f"accuracy: {score:.4}") 98 | return score 99 | 100 | 101 | def _init_scores_html(detailed_results_filepath): 102 | html_head = (' ' 103 | '
')
104 |     html_end = '
' 105 | with open(detailed_results_filepath, 'a') as html_file: 106 | html_file.write(html_head) 107 | html_file.write("Starting training process...
Please be patient. " 108 | "Learning curves will be generated when first " 109 | "predictions are made.") 110 | html_file.write(html_end) 111 | 112 | 113 | def _write_scores_html(score_dir, auto_refresh=True, append=False): 114 | filename = 'detailed_results.html' 115 | if auto_refresh: 116 | html_head = (' ' 117 | '
')
118 |     else:
119 |         html_head = """
"""
120 |     html_end = '
' 121 | if append: 122 | mode = 'a' 123 | else: 124 | mode = 'w' 125 | filepath = join(score_dir, filename) 126 | with open(filepath, mode) as html_file: 127 | html_file.write(html_head) 128 | html_file.write(html_end) 129 | LOGGER.debug(f"Wrote learning curve page to {filepath}") 130 | 131 | 132 | def write_score(score_dir, score, duration): 133 | """Write score and duration to score_dir/scores.txt""" 134 | score_filename = join(score_dir, 'scores.txt') 135 | with open(score_filename, 'w') as ftmp: 136 | ftmp.write(f'score: {score}\n') 137 | ftmp.write(f'Duration: {duration}\n') 138 | LOGGER.debug(f"Wrote to score_filename={score_filename} with " 139 | f"score={score}, duration={duration}") 140 | 141 | 142 | class IngestionError(Exception): 143 | """Ingestion error""" 144 | 145 | 146 | class ScoringError(Exception): 147 | """scoring error""" 148 | 149 | 150 | def get_ingestion_info(prediction_dir): 151 | """get ingestion information""" 152 | ingestion_info = None 153 | endfile_path = os.path.join(prediction_dir, 'end.yaml') 154 | 155 | if not os.path.isfile(endfile_path): 156 | raise IngestionError("[-] No end.yaml exist, ingestion failed") 157 | 158 | LOGGER.info('===== Detected end.yaml file, get ingestion information') 159 | with open(endfile_path, 'r') as ftmp: 160 | ingestion_info = yaml.safe_load(ftmp) 161 | 162 | return ingestion_info 163 | 164 | 165 | def get_ingestion_pid(prediction_dir): 166 | """get ingestion pid""" 167 | # Wait 60 seconds for ingestion to start and write 'start.txt', 168 | # Otherwise, raise an exception. 169 | wait_time = 60 170 | startfile = os.path.join(prediction_dir, 'start.txt') 171 | lockfile = os.path.join(prediction_dir, 'start.txt.lock') 172 | 173 | for i in range(wait_time): 174 | if os.path.exists(startfile): 175 | with FileLock(lockfile): 176 | with open(startfile, 'r') as ftmp: 177 | ingestion_pid = ftmp.read() 178 | LOGGER.info( 179 | f'Detected the start of ingestion after {i} seconds.') 180 | return int(ingestion_pid) 181 | else: 182 | time.sleep(1) 183 | raise IngestionError(f'[-] Failed: scoring didn\'t detected the start of' 184 | 'ingestion after {wait_time} seconds.') 185 | 186 | 187 | def is_process_alive(ingestion_pid): 188 | """detect ingestion alive""" 189 | try: 190 | os.kill(ingestion_pid, 0) 191 | except OSError: 192 | return False 193 | else: 194 | return True 195 | 196 | 197 | def _parse_args(): 198 | # Default I/O directories: 199 | root_dir = _here(os.pardir) 200 | default_solution_dir = join(root_dir, "sample_data") 201 | default_prediction_dir = join(root_dir, "sample_result_submission") 202 | default_score_dir = join(root_dir, "scoring_output") 203 | parser = argparse.ArgumentParser() 204 | parser.add_argument('--solution_dir', type=str, 205 | default=default_solution_dir, 206 | help=("Directory storing the solution with true " 207 | "labels, e.g. adult.solution.")) 208 | parser.add_argument('--prediction_dir', type=str, 209 | default=default_prediction_dir, 210 | help=("Directory storing the predictions. It should" 211 | "contain e.g. [start.txt, adult.predict_0, " 212 | "adult.predict_1, ..., end.yaml].")) 213 | parser.add_argument('--score_dir', type=str, 214 | default=default_score_dir, 215 | help=("Directory storing the scoring output e.g. " 216 | "`scores.txt` and `detailed_results.html`.")) 217 | args = parser.parse_args() 218 | LOGGER.debug(f"Parsed args are: {args}") 219 | LOGGER.debug("-" * 50) 220 | LOGGER.debug(f"Using solution_dir: {args.solution_dir}") 221 | LOGGER.debug(f"Using prediction_dir: {args.prediction_dir}") 222 | LOGGER.debug(f"Using score_dir: {args.score_dir}") 223 | return args 224 | 225 | 226 | def _init(args): 227 | if not os.path.isdir(args.score_dir): 228 | os.mkdir(args.score_dir) 229 | detailed_results_filepath = join( 230 | args.score_dir, 'detailed_results.html') 231 | # Initialize detailed_results.html 232 | _init_scores_html(detailed_results_filepath) 233 | 234 | 235 | def _finalize(score, scoring_start): 236 | """finalize the scoring""" 237 | # Use 'end.yaml' file to detect if ingestion program ends 238 | duration = time.time() - scoring_start 239 | LOGGER.info( 240 | "[+] Successfully finished scoring! " 241 | f"Scoring duration: {duration:.2} sec. " 242 | f"The score of your algorithm on the task is: {score:.6}.") 243 | 244 | LOGGER.info("[Scoring terminated]") 245 | 246 | 247 | def main(): 248 | """main entry""" 249 | scoring_start = time.time() 250 | LOGGER.info('===== init scoring program') 251 | args = _parse_args() 252 | _init(args) 253 | score = DEFAULT_SCORE 254 | 255 | ingestion_pid = get_ingestion_pid(args.prediction_dir) 256 | 257 | LOGGER.info("===== wait for the exit of ingestion.") 258 | while is_process_alive(ingestion_pid): 259 | time.sleep(1) 260 | 261 | # Compute/write score 262 | ingestion_info = get_ingestion_info(args.prediction_dir) 263 | duration = ingestion_info['ingestion_duration'] 264 | score = _update_score(args, duration) 265 | 266 | _finalize(score, scoring_start) 267 | 268 | 269 | if __name__ == "__main__": 270 | main() 271 | --------------------------------------------------------------------------------