├── LICENSE
├── README.md
├── code_submission
    ├── data_space.py
    ├── explore.py
    ├── feat_engine.py
    ├── model.py
    ├── model_lib
    │   ├── __init__.py
    │   ├── appnp.py
    │   ├── arma.py
    │   ├── gat.py
    │   ├── gatedgraph.py
    │   ├── gcn.py
    │   ├── gin.py
    │   ├── graphconvnet.py
    │   ├── graphsage.py
    │   ├── incepgcn.py
    │   ├── jkgcn.py
    │   ├── resgcn.py
    │   ├── sg.py
    │   └── tag.py
    ├── model_space.py
    ├── timer.py
    └── utils
    │   ├── __init__.py
    │   ├── eda.py
    │   └── tools.py
├── data
    └── demo
    │   ├── test_label.tsv
    │   └── train.data
    │       ├── config.yml
    │       ├── edge.tsv
    │       ├── feature.tsv
    │       ├── test_node_id.txt
    │       ├── train_label.tsv
    │       └── train_node_id.txt
├── ingestion
    ├── __pycache__
    │   ├── common.cpython-36.pyc
    │   ├── dataset.cpython-36.pyc
    │   └── timing.cpython-36.pyc
    ├── common.py
    ├── dataset.py
    ├── ingestion.py
    ├── metadata
    └── timing.py
├── run_local_test.py
└── scoring
    ├── metadata
    └── score.py


/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # AutoGL
 2 | 
 3 | ## What is AutoGL？
 4 | 
 5 | AutoGL is graph learning framework with automatic machine learning techniques. AutoGL now mainly focus on node classification problems, but it's easy to apply this program to other graph learning problems.
 6 | 
 7 | AutoGL is the 6th solution for AutoGraph Challenge@KDD'20, the competition rules can be found [here](https://www.automl.ai/competitions/3). We achieve 1st, 4th, 1st, 6th and 27th on 5 final phase datasets. 
 8 | | #   | Dataset1     | Dataset2    | Dataset3   | Dataset4   | Dataset5 | Avg |
 9 | | --- | -------- | ------- | -------- | ------ | ----------- | ---------------------- | 
10 | | rank   | 1  | 4   | 1      | 6  | 27 | 7.8 |
11 | 
12 | ## Usage
13 | Clone this repository to your machine:
14 | ```
15 | git clone https://github.com/JunweiSUN/AutoGL.git
16 | ```
17 | Download datasets from [here](https://www.automl.ai/competitions/6?secret_key=c10be8ef-9a94-417d-bb7a-5711aa6c895b#learn_the_details). You can also create your own datasets with required format.<br>
18 | When the download process finished, unzip the datasets and move them to the `data` folder. Or you can just simple use the demo dataset in `data`.<br>
19 | 
20 | AutoGL could be easily started with [docker](https://www.docker.com/):
21 | ```
22 | cd path/to/AutoGL/
23 | docker run --gpus=0 -it --rm -v "$(pwd):/app/autograph" -w /app/autograph nehzux/kddcup2020:v2
24 | python run_local_test.py --dataset_dir=./data/demo --code_dir=./code_submission
25 | ```
26 | You can change the argument dataset_dir to other datasets. On the other hand, you can also modify the directory containing your other sample code.<br>
27 | 
28 | You can also use your own python environment to run this program. In this way, you must install all the necessary packages. So we recommend users to run this program with docker.
29 | 
30 | ## Acknowledgements
31 | We refer to these packages and codes when developing this program:<br>
32 | 
33 | [nni](https://github.com/microsoft/nni): An open source AutoML toolkit from microsoft<br>
34 | [AutoDL (tabular part)](https://github.com/DeepWisdom/AutoDL/tree/master/AutoDL_sample_code_submission/Auto_Tabular): Automated Deep Learning without ANY human intervention<br>
35 | [pytorch_geometric](https://github.com/rusty1s/pytorch_geometric): Geometric Deep Learning Extension Library for PyTorch<br>
36 | [sparsesvd](https://github.com/RaRe-Technologies/sparsesvd): a fast library for sparse Singular Value Decomposition<br>
37 | [DropEdge](https://github.com/DropEdge/DropEdge): a Pytorch implementation of paper: DropEdge: Towards Deep Graph Convolutional Networks on Node Classification
38 | 
39 | ## Contact us
40 | If you have any question or advice, please feel free to contact our team members:<br>
41 | Junwei Sun: junweisun@bupt.edu.cn<br>
42 | Ruifeng Kuang: kuangruifeng@bupt.edu.cn<br>
43 | Wei Huang: 18262998091@163.com<br>
44 | Changrui Mu: u3553427@connect.hku.hk<br>
45 | Jiayan Wang: jiayanwangno1@gmail.com
46 | 
47 | ## License 
48 | [Apache License 2.0](https://github.com/JunweiSUN/AutoGL/blob/master/LICENSE)
49 | 


--------------------------------------------------------------------------------
/code_submission/data_space.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from sklearn.model_selection import StratifiedShuffleSplit
 4 | from sklearn.preprocessing import StandardScaler, OneHotEncoder
 5 | import gc
 6 | import torch
 7 | from torch_geometric.data import Data
 8 | import time
 9 | import torch_geometric.transforms as T
10 | from torch_geometric.utils import dense_to_sparse
11 | 
12 | class DataSpace:
13 |     def __init__(self, info, data):
14 |         """
15 |         Generating training / validation / testing data.
16 |         Parameters:
17 |         ----------
18 |         info: dict
19 |             The eda infomation generated by AutoEDA
20 |         data: dict
21 |             The original data passed by the ingestion program.
22 |         ----------
23 |         """
24 |         self.info = info
25 | 
26 |         self.y = data['train_label']['label'].to_numpy()
27 |         self.pyg_data, self.all_train_idxs, self.test_idxs = self.generate_pyg_data(data)
28 |         self.splits = {}
29 |         self.n_splits = 5
30 |         self.split_train_valid(ratio=0.1)
31 |         self.update = False
32 | 
33 |     def split_train_valid(self, ratio=0.1):
34 |         sss = StratifiedShuffleSplit(n_splits=self.n_splits, test_size=ratio, random_state=0)
35 |         i = 0
36 |         for train, val in sss.split(self.all_train_idxs, self.y):
37 |             self.splits[i] = (self.all_train_idxs[train], self.all_train_idxs[val])
38 |             i += 1
39 |     
40 |     def get_data(self, round_num):
41 |         train_idxs, val_idxs = self.splits[(round_num-1) % self.n_splits]
42 |         print(f'Round {round_num}')
43 | 
44 |         train_mask = torch.zeros(self.pyg_data.num_nodes, dtype=torch.bool)
45 |         train_mask[train_idxs] = 1
46 |         self.pyg_data.train_mask = train_mask
47 | 
48 |         valid_mask = torch.zeros(self.pyg_data.num_nodes, dtype=torch.bool)
49 |         valid_mask[val_idxs] = 1
50 |         self.pyg_data.valid_mask = valid_mask
51 | 
52 |         return self.pyg_data
53 |   
54 |     def generate_pyg_data(self, data):
55 |         x = data['fea_table']
56 |         x = x.drop('node_index', axis=1).to_numpy()
57 |         x = torch.tensor(x, dtype=torch.float)
58 | 
59 |         df = data['edge_file']
60 |         edge_index = df[['src_idx', 'dst_idx']].to_numpy()
61 |         edge_index = sorted(edge_index, key=lambda d: d[0])
62 |         edge_index = torch.tensor(edge_index, dtype=torch.long).transpose(0, 1)
63 | 
64 |         edge_weight = df['edge_weight'].to_numpy()
65 |         edge_weight = torch.tensor(edge_weight, dtype=torch.float32)
66 | 
67 |         num_nodes = x.size(0)
68 | 
69 |         y = torch.zeros(num_nodes, dtype=torch.long)
70 |         inds = data['train_label'][['node_index']].to_numpy()
71 |         train_y = data['train_label'][['label']].to_numpy()
72 |         y[inds] = torch.tensor(train_y, dtype=torch.long)
73 | 
74 |         all_train_idxs = np.array(data['train_indices'], dtype=int)
75 |         test_idxs = np.array(data['test_indices'], dtype=int)
76 | 
77 |         data = Data(x=x, edge_index=edge_index, y=y, edge_weight=edge_weight)
78 |         data.num_nodes = num_nodes
79 | 
80 |         data.test_idxs = test_idxs
81 | 
82 |         test_mask = torch.zeros(num_nodes, dtype=torch.bool)
83 |         test_mask[test_idxs] = 1
84 |         data.test_mask = test_mask
85 | 
86 |         data.label_weights = self.info['label_weights']
87 | 
88 |         if self.info['normalize_features'] == 'row':
89 |             print('Feature Normalized By Row')
90 |             data.x = data.x / data.x.sum(1, keepdim=True).clamp(min=1)
91 |         elif self.info['normalize_features'] == 'col':
92 |             print('Feature Normalized By Column')
93 |             data.x = data.x / data.x.sum(0, keepdim=True).clamp(min=1)
94 | 
95 |         return data.to('cuda'), all_train_idxs, test_idxs
96 | 


--------------------------------------------------------------------------------
/code_submission/explore.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import gc
  3 | import collections
  4 | import torch
  5 | import time
  6 | 
  7 | class Explore:
  8 |     def __init__(self, info, model_space, data_space):
  9 |         """
 10 |         Training models and making predictions
 11 |         Parameters:
 12 |         ----------
 13 |         info: dict
 14 |             The eda infomation generated by AutoEDA
 15 |         model_space: ModelSpace
 16 |             Model space
 17 |         data_space: DataSpace
 18 |             Data space
 19 |         ----------
 20 |         """
 21 |         self.info = info
 22 |         self.model_space = model_space
 23 |         self.data_space = data_space
 24 | 
 25 |         self.models = self.model_space.get_models()
 26 |         self.model = None
 27 |         self.model_prior = self.model_space.model_prior
 28 |         self.model_idx = 0
 29 | 
 30 |         self.ensemble_threshold = self.info['ensemble_threshold']
 31 | 
 32 |         self.round_num = 1
 33 | 
 34 |         self.hist_info = {}
 35 |         self.pyg_data = None
 36 |         self.update_predict = True
 37 | 
 38 |     def explore_space(self):
 39 |         if self.model_idx == 0:
 40 |             print('Model Prior:', self.model_prior)
 41 |         self.explore_data_space()
 42 |         self.explore_model_space()
 43 |         val_score = self.model.trial(self.pyg_data, self.round_num)
 44 |         print('Model Name:', self.model.name, 'Round:', self.round_num, 'Val score:', val_score)
 45 |         
 46 |         self.update_model_hist(val_score)
 47 | 
 48 |     def explore_model_space(self):
 49 |         self.model = self.models[self.model_prior[self.model_idx]]
 50 |         self.model_idx += 1
 51 | 
 52 |     def explore_data_space(self):
 53 |         if self.data_space.update or self.pyg_data is None:
 54 |             self.pyg_data = self.data_space.get_data(round_num=self.round_num)
 55 |             self.data_space.update = False
 56 | 
 57 |     def update_model_hist(self, val_score):
 58 |         self.model.hist_score.append(val_score)
 59 |         if val_score > self.model.best_score:
 60 |             self.model.best_score = val_score
 61 |             self.update_predict = True
 62 |         else:
 63 |             self.update_predict = False
 64 | 
 65 |     def sort_model_prior(self):
 66 |         model_perform = collections.defaultdict(list)
 67 |         for name, info in self.hist_info.items():
 68 |             model_perform[name] = [e[0] for e in info]
 69 | 
 70 |         self.model_prior = sorted(self.model_prior, key=lambda x: np.mean(model_perform[x]), reverse=True)
 71 |         self.model_idx = 0
 72 |         self.round_num += 1
 73 | 
 74 |     def get_top_preds(self):
 75 |         models_name = self.hist_info.keys()
 76 |         top_score_and_preds_for_each_model = [sorted(self.hist_info[name], key=lambda e: e[0], reverse=True)[0] for name in models_name]
 77 | 
 78 |         models_name_sorted, models_score_and_preds_sorted = (list(i) for i in
 79 |                                                  zip(*sorted(zip(models_name, top_score_and_preds_for_each_model), key=lambda x: x[1][0], reverse=True)))
 80 | 
 81 |         models_score_sorted = [e[0] for e in models_score_and_preds_sorted]
 82 |         models_preds_sorted = [e[1] for e in models_score_and_preds_sorted]
 83 | 
 84 |         max_score = max(models_score_sorted)
 85 | 
 86 |         for i in range(len(models_score_sorted), 0, -1):
 87 |             top_num = i
 88 |             if models_score_sorted[i-1] + self.ensemble_threshold >= max_score:
 89 |                 break
 90 | 
 91 |         top_score = np.array(models_score_sorted[:top_num])
 92 |         top_score = top_score + 50 * (top_score - top_score.mean())
 93 |         top_score = np.array([max(0.01, i) for i in top_score])
 94 |         weights = top_score / top_score.sum()
 95 |         print('Ensmble Models Weights:', weights)
 96 | 
 97 |         top_preds = []
 98 |         for i in range(top_num):
 99 |             name = models_name_sorted[i]
100 |             rank = i + 1
101 |             score = models_score_sorted[i]
102 |             weight = weights[i]
103 |             preds = models_preds_sorted[i]
104 |             top_preds.append((name, rank, score, weight, preds))
105 | 
106 |         return top_preds
107 | 
108 |     def predict(self):
109 |         if self.update_predict:
110 |             preds = self.model.predict()
111 |             self.model.best_preds = preds
112 |             if self.model.name in self.hist_info:
113 |                 self.hist_info[self.model.name].append((self.model.best_score, self.model.best_preds))
114 |             else:
115 |                 self.hist_info[self.model.name] = [(self.model.best_score, self.model.best_preds)]
116 |             self.update_predict = False
117 | 
118 |         if self.model_idx >= len(self.model_prior):
119 |             self.sort_model_prior()
120 |             self.data_space.update = True
121 | 
122 |         preds = self.blending_predict().argmax(1).flatten()
123 |         return preds
124 | 
125 |     def blending_predict(self):
126 |         top_preds = self.get_top_preds()
127 |         ensmble_models = []
128 |         ensmble_val_scores = []
129 |         ensmble_preds = 0
130 |         for name, rank, score, weight, preds in top_preds:
131 |             m = np.mean(preds)
132 |             ensmble_models.append(name)
133 |             ensmble_val_scores.append(score)
134 |             ensmble_preds += weight * preds / m
135 |         print('Ensmble Models Including:', ensmble_models)
136 |         print('Ensmble Models Val Score:', ensmble_val_scores)
137 |         return ensmble_preds
138 | 


--------------------------------------------------------------------------------
/code_submission/feat_engine.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | from sparsesvd import sparsesvd
  4 | import scipy
  5 | import time
  6 | import torch_geometric.transforms as T
  7 | from torch_geometric.nn import Node2Vec
  8 | import networkx as nx
  9 | import torch
 10 | 
 11 | class FeatEngine:
 12 |     """
 13 |     A tool box for generating node features.
 14 |     Feature type including: SVD / One Hot / Degree / Node2Vec / Adjacency Matrix .
 15 |     These features can be concatenated.
 16 |     Parameters:
 17 |     ----------
 18 |     info: dict
 19 |         The eda infomation generated by AutoEDA
 20 |     ----------
 21 |     """
 22 |     def __init__(self, info):
 23 |         self.info = info
 24 | 
 25 |     def fit_transform(self, data):
 26 |         if 'original' in self.info['feature_type']:
 27 |             print('Use Original Feature')
 28 |         if 'one_hot' in self.info['feature_type']:
 29 |             print('Use One Hot Feature')
 30 |             data['fea_table'] = self.generate_one_hot_feature(data)
 31 |         if 'svd' in self.info['feature_type']:
 32 |             print('Use SVD Feature')
 33 |             data['fea_table'] = self.generate_svd_feature(data, num_features=64)
 34 |         if 'degree' in self.info['feature_type']:
 35 |             print('Use Degree Feature')
 36 |             data['fea_table'] = self.generate_degree_feature(data)
 37 |         if 'node2vec' in self.info['feature_type']:
 38 |             print('Use Node2Vec Feature')
 39 |             data['fea_table'] = self.generate_node2vec_feature(data, epochs=20, num_features=64)
 40 |         if 'adj' in self.info['feature_type']:
 41 |             print('Use Adjacency Feature')
 42 |             data['fea_table'] = self.generate_adj_feature(data, use_weight=False)
 43 | 
 44 |     def generate_svd_feature(self, data, num_features=64):
 45 |         feat_df, edge_df = data['fea_table'], data['edge_file']
 46 |         adj_matrix = np.zeros((self.info['num_nodes'], self.info['num_nodes']))
 47 |         edges = edge_df.to_numpy(dtype=int)
 48 |         for edge in edges:
 49 |             adj_matrix[edge[0], edge[1]] = 1
 50 |         sparse_adj_matrix = scipy.sparse.csc_matrix(adj_matrix)
 51 |         ut, s, vt = sparsesvd(sparse_adj_matrix, num_features)
 52 |         svd_feats = pd.DataFrame(np.dot(ut.T, np.diag(s)))
 53 |         return pd.concat([feat_df, svd_feats], axis=1)
 54 | 
 55 |     def generate_adj_feature(self, data, use_weight=True):
 56 |         feat_df, edge_df = data['fea_table'], data['edge_file']
 57 |         adj_matrix = np.zeros((self.info['num_nodes'], self.info['num_nodes']))
 58 |         edges = edge_df.to_numpy(dtype=int)
 59 | 
 60 |         if use_weight:
 61 |             for edge in edges:
 62 |                 adj_matrix[edge[0], edge[1]] = edge[2]
 63 |         else:
 64 |             for edge in edges:
 65 |                 adj_matrix[edge[0], edge[1]] = 1
 66 |         
 67 |         adj_feats = pd.DataFrame(adj_matrix)
 68 |         return pd.concat([feat_df, adj_feats], axis=1)
 69 | 
 70 |     def generate_one_hot_feature(self, data):
 71 |         return pd.concat([data['fea_table'], pd.get_dummies(data['fea_table'].to_numpy().flatten())], axis=1)
 72 | 
 73 |     def generate_degree_feature(self, data):
 74 |         g = nx.DiGraph()
 75 |         edges = data['edge_file'].to_numpy().astype(int)
 76 |         g.add_weighted_edges_from(edges)
 77 | 
 78 |         degree_feat = np.zeros((self.info['num_nodes'], 2))
 79 |         for node_idx in range(self.info['num_nodes']):
 80 |             in_degree, out_degree = g.in_degree(node_idx), g.out_degree(node_idx)
 81 |             degree_feat[node_idx,0], degree_feat[node_idx,1] = in_degree, out_degree
 82 |             in_edges = g.in_edges(node_idx, data=True)
 83 |             out_edges = g.out_edges(node_idx, data=True)
 84 |             in_weights = [e[2]['weight'] for e in in_edges]
 85 |             out_weights = [e[2]['weight'] for e in out_edges]
 86 |             degree_feat[2] = in_degree - out_degree
 87 | 
 88 |         return pd.concat([data['fea_table'], pd.DataFrame(degree_feat)], axis=1)
 89 | 
 90 |     def generate_node2vec_feature(self, data, epochs=20, num_features=64):
 91 |         edge_index = data['edge_file'][['src_idx', 'dst_idx']].to_numpy()
 92 |         edge_index = sorted(edge_index, key=lambda d: d[0])
 93 |         edge_index = torch.tensor(edge_index, dtype=torch.long).transpose(0, 1)
 94 | 
 95 |         model = Node2Vec(edge_index, embedding_dim=num_features, walk_length=20,
 96 |                  context_size=10, walks_per_node=10, num_negative_samples=1, sparse=True).to('cuda')
 97 | 
 98 |         loader = model.loader(batch_size=128, shuffle=True, num_workers=4)
 99 |         optimizer = torch.optim.SparseAdam(model.parameters(), lr=0.01)
100 | 
101 |         def train():
102 |             model.train()
103 |             total_loss = 0
104 |             for pos_rw, neg_rw in loader:
105 |                 optimizer.zero_grad()
106 |                 loss = model.loss(pos_rw.to('cuda'), neg_rw.to('cuda'))
107 |                 loss.backward()
108 |                 optimizer.step()
109 |                 total_loss += loss.item()
110 |             return total_loss / len(loader)
111 | 
112 |         for epoch in range(1, epochs+1):
113 |             loss = train()
114 |             print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}')
115 | 
116 |         return pd.concat([data['fea_table'], pd.DataFrame(model().detach().cpu().numpy())], axis=1)
117 | 


--------------------------------------------------------------------------------
/code_submission/model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import torch
 4 | import time
 5 | import random
 6 | import os
 7 | import signal
 8 | os.system('pip install nni')
 9 | os.system('pip install seaborn')
10 | os.system('pip install cython')
11 | os.system('pip install sparsesvd')
12 | from utils.eda import AutoEDA
13 | from utils.tools import fix_seed
14 | from explore import Explore
15 | from data_space import DataSpace
16 | from model_space import ModelSpace
17 | from feat_engine import FeatEngine
18 | 
19 | fix_seed(1234)
20 | def timeout_handler(signum, frame):
21 |     """
22 |     Signal handler
23 |     Inform the main process when time runs out.
24 |     """
25 |     raise Timeout
26 | signal.signal(signal.SIGTSTP, timeout_handler)
27 | 
28 | class Timeout(Exception):
29 |     """Timeout"""
30 | 
31 | class Model:
32 |     """
33 |     Main Class for training and predicting.
34 |     """
35 |     def __init__(self):
36 |         self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
37 | 
38 |     def predict(self):
39 |         self.explore.explore_space()
40 |         preds = self.explore.predict()
41 |         return preds
42 | 
43 |     def train_predict(self, data, time_budget, n_class, schema):
44 |         # start a timer for timing.
45 |         timer_abs_path = os.path.abspath(__file__).replace('/model.py', '/timer.py')
46 |         pid = os.getpid()
47 |         os.system(f'python {timer_abs_path} {time_budget - 1} {pid} &')
48 | 
49 |         start = time.time()
50 |         self.auto_eda = AutoEDA(n_class)
51 |         info = self.auto_eda.get_info(data)
52 |         print('EDA Finished, Remaining', time_budget + start - time.time())
53 |         self.feat_engine = FeatEngine(info)
54 |         self.feat_engine.fit_transform(data)
55 |         print('Feature Engine Finished, Remaining', time_budget + start - time.time())
56 |         self.data_space = DataSpace(info, data)
57 |         print('Data Space Constructed, Remaining', time_budget + start - time.time())
58 |         self.model_space = ModelSpace(info)
59 |         print('Model Space Constructed, Remaining', time_budget + start - time.time())
60 |         self.explore = Explore(info, self.model_space, self.data_space)
61 | 
62 |         # start training
63 |         while True:
64 |             if time_budget + start - time.time() <= 0:
65 |                 return self.preds
66 |             try:
67 |                 self.preds = self.predict()
68 |             except Timeout:
69 |                 return self.preds
70 | 
71 |         return self.preds
72 | 


--------------------------------------------------------------------------------
/code_submission/model_lib/__init__.py:
--------------------------------------------------------------------------------
 1 | from .gat import GAT
 2 | from .gcn import GCN
 3 | from .graphconvnet import GraphConvNet
 4 | from .graphsage import GraphSAGE
 5 | from .appnp import APPNPNet
 6 | from .arma import ARMA
 7 | from .gatedgraph import GatedGraphNet
 8 | from .gin import GIN
 9 | from .sg import SG
10 | from .tag import TAG
11 | from .incepgcn import IncepGCN
12 | from .resgcn import ResGCN
13 | from .jkgcn import JKGCN
14 | 


--------------------------------------------------------------------------------
/code_submission/model_lib/appnp.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from torch.nn import Linear
  4 | from torch_geometric.nn import APPNP
  5 | import copy
  6 | from sklearn.metrics import f1_score
  7 | from utils.tools import fix_seed, AverageMeter
  8 | from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner
  9 | from torch_geometric.utils import dropout_adj
 10 | import random
 11 | fix_seed(1234)
 12 | 
 13 | 
 14 | class APPNPNet(torch.nn.Module):
 15 | 
 16 |     def __init__(self, info):
 17 |         super(APPNPNet, self).__init__()
 18 | 
 19 |         self.info = info
 20 | 
 21 |         self.best_score = 0
 22 |         self.hist_score = []
 23 | 
 24 |         self.best_preds = None
 25 |         self.current_round_best_preds = None
 26 |         self.best_valid_score = 0
 27 |         self.max_patience = 100
 28 |         self.max_epochs = 1600
 29 |         
 30 |         self.name = 'APPNP'
 31 | 
 32 |         self.hyperparameters = {
 33 |             'num_layers': self.info['num_layers'],
 34 |             'lr': 0.005,
 35 |             'K': 10,
 36 |             'alpha': 0.15,
 37 |             'dropedge_rate': self.info['dropedge_rate'],
 38 |             'dropout_rate': self.info['dropout_rate'],
 39 |             'hidden': self.info['init_hidden_size']
 40 |         }
 41 |         self.best_hp = None
 42 |         self.tuner = HyperoptTuner(algorithm_name='tpe', optimize_mode='maximize')
 43 |         search_space = {
 44 |                 "dropedge_rate": {
 45 |                     "_type": "uniform",
 46 |                     "_value": [0, 1]
 47 |                 },
 48 |                 "dropout_rate": {
 49 |                     "_type": "uniform",
 50 |                     "_value": [0, 1]
 51 |                 },
 52 |                 "num_layers": {
 53 |                     "_type": "randint",
 54 |                     "_value": [2, 3]
 55 |                 },
 56 |                 "hidden": {
 57 |                     "_type": "quniform",
 58 |                     "_value": [4, 7, 1]
 59 |                 },
 60 |                 "lr":{
 61 |                     "_type": "choice",
 62 |                     "_value": [self.info['lr']]
 63 |                 },
 64 |                 'K' :{
 65 |                     "_type": "quniform",
 66 |                     "_value": [1, 6, 1]
 67 |                 },
 68 |                 'alpha':{
 69 |                     "_type": "uniform",
 70 |                     "_value": [0, 1] 
 71 |                 }
 72 |             }
 73 |         self.tuner.update_search_space(search_space)
 74 | 
 75 |     def init_model(self, n_class, feature_num):
 76 |         hidden_size = int(2 ** self.hyperparameters['hidden'])
 77 |         K = int(self.hyperparameters['K'])
 78 |         self.lin1 = Linear(feature_num, hidden_size)
 79 |         self.lin2 = Linear(hidden_size, n_class)
 80 |         self.prop1 = APPNP(K=K, alpha=self.hyperparameters['alpha'])
 81 | 
 82 |         self.optimizer = torch.optim.Adam(self.parameters(), lr=self.hyperparameters['lr'], weight_decay=5e-4)
 83 | 
 84 |         self = self.to('cuda')
 85 | 
 86 |     def forward(self, data):
 87 |         x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight
 88 |         if self.hyperparameters['dropedge_rate'] is not None:
 89 |             edge_index, edge_weight = dropout_adj(edge_index, edge_weight, p=self.hyperparameters['dropedge_rate'],\
 90 |                  force_undirected=False, num_nodes=None, training=self.training)
 91 | 
 92 |         x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training)
 93 |         x = F.relu(self.lin1(x))
 94 |         x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training)
 95 |         x = self.lin2(x)
 96 |         x = self.prop1(x, edge_index,edge_weight)
 97 |         return x
 98 | 
 99 |     def trial(self, data, round_num):
100 |         n_class, feature_num = self.info['n_class'], data.x.shape[1]
101 |         if round_num >= 2:
102 |             self.hyperparameters = self.tuner.generate_parameters(round_num-1)
103 |         print(self.hyperparameters)    
104 |            
105 |         while True:
106 |             try:
107 |                 self.init_model(n_class, feature_num)
108 |                 best_valid_score = self.train_valid(data, round_num)
109 |                 if round_num > 1:
110 |                     self.tuner.receive_trial_result(round_num-1,self.hyperparameters,val_score)
111 |                 if val_score > self.best_score:
112 |                     self.best_hp = copy.deepcopy(self.hyperparameters)
113 |                 break
114 |             except RuntimeError as e:
115 |                 print(self.name,e, 'OOM with Hidden Size', self.hyperparameters['hidden'])
116 |                 if round_num > 1:
117 |                     self.tuner.receive_trial_result(round_num-1,self.hyperparameters,0)
118 |                 return 0
119 |         print("Best Hyperpameters of", self.name, self.best_hp)
120 |         return val_score
121 | 
122 |     def train_valid(self, data, round_num):
123 |         y, train_mask, valid_mask, test_mask, label_weights = data.y, data.train_mask, data.valid_mask, data.test_mask, data.label_weights
124 | 
125 |         patience = self.max_patience
126 |         best_valid_score = 0
127 |         valid_acc_meter = AverageMeter()
128 |         for epoch in range(self.max_epochs):
129 | 
130 |             # train
131 |             self.train()
132 |             self.optimizer.zero_grad()
133 |             preds = self.forward(data)
134 |             loss = F.cross_entropy(preds[train_mask], y[train_mask], label_weights)
135 |             loss.backward()
136 |             self.optimizer.step()
137 | 
138 |             # valid
139 |             self.eval()
140 |             with torch.no_grad():
141 |                 preds = F.softmax(self.forward(data), dim=-1)
142 |                 valid_preds, test_preds = preds[valid_mask], preds[test_mask]
143 |                 valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro')
144 | 
145 |             valid_acc_meter.update(valid_score)
146 | 
147 |             # patience
148 |             if valid_acc_meter.avg > best_valid_score:
149 |                 best_valid_score = valid_acc_meter.avg
150 |                 self.current_round_best_preds = test_preds
151 |                 patience = self.max_patience
152 |             else:
153 |                 patience -= 1
154 | 
155 |             if patience == 0:
156 |                 break
157 | 
158 |         return best_valid_score
159 | 
160 |     def predict(self):
161 |         return self.current_round_best_preds.cpu().numpy()
162 | 
163 |     def __repr__(self):
164 |         return self.__class__.__name__


--------------------------------------------------------------------------------
/code_submission/model_lib/arma.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from torch.nn import Linear
  4 | from torch_geometric.nn import ARMAConv
  5 | import copy
  6 | from sklearn.metrics import f1_score
  7 | from utils.tools import fix_seed, AverageMeter
  8 | from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner
  9 | from torch_geometric.utils import dropout_adj
 10 | import random
 11 | fix_seed(1234)
 12 | 
 13 | class ARMA(torch.nn.Module):
 14 | 
 15 |     def __init__(self, info):
 16 |         super(ARMA, self).__init__()
 17 | 
 18 |         self.info = info
 19 | 
 20 |         self.best_score = 0
 21 |         self.hist_score = []
 22 | 
 23 |         self.best_preds = None
 24 |         self.current_round_best_preds = None
 25 |         self.best_valid_score = 0
 26 |         self.max_patience = 100
 27 |         self.max_epochs = 1600
 28 |         
 29 |         self.name = 'ARMA'
 30 | 
 31 |         self.hidden = 16
 32 |         self.lr = 0.005
 33 |         self.hyperparameters = {
 34 |             'num_layers': self.info['num_layers'],
 35 |             'lr': self.info['lr'],
 36 |             'num_stacks': 1,
 37 |             'conv_layers': 1,
 38 |             'dropedge_rate': self.info['dropedge_rate'],
 39 |             'dropout_rate': 0.5,
 40 |             'hidden': self.info['init_hidden_size'],
 41 |             'use_linear': self.info['use_linear']
 42 |         }
 43 |         self.best_hp = None
 44 |         self.tuner = HyperoptTuner(algorithm_name='tpe', optimize_mode='maximize')
 45 |         search_space = {
 46 |                 "dropedge_rate": {
 47 |                     "_type": "choice",
 48 |                     "_value": [self.info['dropedge_rate']]
 49 |                 },
 50 |                 "dropout_rate": {
 51 |                     "_type": "choice",
 52 |                     "_value": [self.info['dropout_rate']]
 53 |                 },
 54 |                 "num_layers": {
 55 |                     "_type": "quniform",
 56 |                     "_value": [1, 3, 1]
 57 |                 },
 58 |                 "hidden": {
 59 |                     "_type": "quniform",
 60 |                     "_value": [4, 7, 1]
 61 |                 },
 62 |                 "lr":{
 63 |                     "_type": "choice",
 64 |                     "_value": [0.005]
 65 |                 },
 66 |                 'num_stacks' : {
 67 |                     "_type": "quniform",
 68 |                     "_value": [1, 5, 1]
 69 |                 },
 70 |                 'conv_layers' : {
 71 |                     "_type": "quniform",
 72 |                     "_value": [1, 5, 1]
 73 |                 },
 74 |                 'use_linear': {
 75 |                     "_type":"choice",
 76 |                     "_value":[True, False]
 77 |                 }
 78 |             }
 79 |         self.tuner.update_search_space(search_space)
 80 | 
 81 |     def init_model(self, n_class, feature_num):
 82 |         hidden_size = int(2 ** self.hyperparameters['hidden'])
 83 |         num_stacks = int(self.hyperparameters['num_stacks'])
 84 |         conv_layers = int(self.hyperparameters['conv_layers'])
 85 |         lr = self.hyperparameters['lr']
 86 |         dropout = self.hyperparameters['dropout_rate']
 87 |         num_layers = int(self.hyperparameters['num_layers'])
 88 |         if self.hyperparameters['use_linear']:
 89 |             self.input_lin = Linear(feature_num, hidden_size)
 90 |             self.convs = torch.nn.ModuleList()
 91 |             for i in range(num_layers):
 92 |                 self.convs.append(ARMAConv(hidden_size, hidden_size, num_stacks=num_stacks, num_layers=conv_layers, dropout=dropout))
 93 |             self.output_lin = Linear(hidden_size, n_class)
 94 |         else:
 95 |             if num_layers == 1:
 96 |                 self.conv1 = ARMAConv(feature_num, n_class, num_stacks=num_stacks,\
 97 |                  num_layers=conv_layers, shared_weights=False, dropout=dropout)
 98 |             else:
 99 |                 self.conv1 = ARMAConv(feature_num, hidden_size, num_stacks=num_stacks,\
100 |                     num_layers=conv_layers, shared_weights=False, dropout=dropout)
101 |                 self.convs = torch.nn.ModuleList()
102 |                 for i in range(num_layers - 2):    
103 |                     self.convs.append(ARMAConv(hidden_size, hidden_size, num_stacks=num_stacks,\
104 |                         num_layers=conv_layers, shared_weights=False, dropout=dropout))
105 |                 self.conv2 = ARMAConv(hidden_size, n_class, num_stacks=num_stacks,\
106 |                         num_layers=conv_layers, shared_weights=False, dropout=dropout)
107 |         self.optimizer = torch.optim.Adam(self.parameters(), lr=lr, weight_decay=5e-4)
108 | 
109 |         self = self.to('cuda')
110 | 
111 |         torch.cuda.empty_cache()
112 | 
113 |     def forward(self, data):
114 |         x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight
115 |         if self.hyperparameters['dropedge_rate'] is not None:
116 |             edge_index, edge_weight = dropout_adj(edge_index, edge_weight, p=self.hyperparameters['dropedge_rate'],\
117 |                  force_undirected=False, num_nodes=None, training=self.training)
118 |         
119 |         if self.hyperparameters['use_linear']:
120 |             x = F.relu(self.input_lin(x))
121 |         else:
122 |             x = F.relu(self.conv1(x, edge_index,edge_weight))
123 |             if self.hyperparameters['num_layers'] == 1:
124 |                 return x
125 | 
126 |         x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training)
127 |         for conv in self.convs:
128 |             x = F.relu(conv(x, edge_index, edge_weight=edge_weight))
129 |         if self.hyperparameters['use_linear']:
130 |             x = self.output_lin(x)
131 |         else:
132 |             x = self.conv2(x, edge_index,edge_weight)
133 |         return x
134 |     
135 |     def trial(self, data, round_num):
136 |         n_class, feature_num = self.info['n_class'], data.x.shape[1]
137 |         if round_num >= 2:
138 |             self.hyperparameters = self.tuner.generate_parameters(round_num-1)
139 |         print(self.hyperparameters)    
140 |            
141 |         while True:
142 |             try:
143 |                 self.init_model(n_class, feature_num)
144 |                 val_score = self.train_valid(data, round_num)
145 |                 if round_num > 1:
146 |                     self.tuner.receive_trial_result(round_num-1,self.hyperparameters,val_score)
147 |                 if val_score > self.best_score:
148 |                     self.best_hp = copy.deepcopy(self.hyperparameters)
149 |                 break
150 |             except RuntimeError as e:
151 |                 print(self.name,e, 'OOM with Hidden Size', self.hyperparameters['hidden'])
152 |                 if round_num > 1:
153 |                     self.tuner.receive_trial_result(round_num-1,self.hyperparameters,0)
154 |                 return 0
155 |         print("Best Hyperparameters of", self.name, self.best_hp)
156 |         return val_score
157 | 
158 |     def train_valid(self, data, round_num):
159 |         y, train_mask, valid_mask, test_mask, label_weights = data.y, data.train_mask, data.valid_mask, data.test_mask, data.label_weights
160 | 
161 | 
162 |         patience = self.max_patience
163 |         best_valid_score = 0
164 |         valid_acc_meter = AverageMeter()
165 |         for epoch in range(self.max_epochs):
166 | 
167 |             # train
168 |             self.train()
169 |             self.optimizer.zero_grad()
170 |             preds = self.forward(data)
171 |             loss = F.cross_entropy(preds[train_mask], y[train_mask], label_weights)
172 |             loss.backward()
173 |             self.optimizer.step()
174 | 
175 |             # valid
176 |             self.eval()
177 |             with torch.no_grad():
178 |                 preds = F.softmax(self.forward(data), dim=-1)
179 |                 valid_preds, test_preds = preds[valid_mask], preds[test_mask]
180 |                 valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro')
181 | 
182 |             valid_acc_meter.update(valid_score)
183 |             # patience
184 |             if valid_acc_meter.avg > best_valid_score:
185 |                 best_valid_score = valid_acc_meter.avg
186 |                 self.current_round_best_preds = test_preds
187 |                 patience = self.max_patience
188 |             else:
189 |                 patience -= 1
190 | 
191 |             if patience == 0:
192 |                 break
193 | 
194 |         return best_valid_score
195 | 
196 |     def predict(self):
197 |         return self.current_round_best_preds.cpu().numpy()
198 | 
199 |     def __repr__(self):
200 |         return self.__class__.__name__


--------------------------------------------------------------------------------
/code_submission/model_lib/gat.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from torch.nn import Linear
  4 | from torch_geometric.nn import GATConv
  5 | import copy
  6 | from sklearn.metrics import f1_score
  7 | from utils.tools import fix_seed, AverageMeter
  8 | from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner
  9 | from torch_geometric.utils import dropout_adj
 10 | import random
 11 | fix_seed(1234)
 12 | 
 13 | class GAT(torch.nn.Module):
 14 | 
 15 |     def __init__(self, info):
 16 |         super(GAT, self).__init__()
 17 |         self.info = info
 18 | 
 19 |         self.best_score = 0
 20 |         self.hist_score = []
 21 | 
 22 |         self.best_preds = None
 23 |         self.current_round_best_preds = None
 24 |         self.best_valid_score = 0
 25 |         self.max_patience = 100
 26 |         self.max_epochs = 1600
 27 |         
 28 |         self.name = 'GAT'
 29 |         self.hyperparameters = {
 30 |             'num_layers': self.info['num_layers'],
 31 |             'lr': self.info['lr'],
 32 |             'heads': 5,
 33 |             'dropedge_rate': self.info['dropedge_rate'],
 34 |             'dropout_rate': self.info['dropout_rate'],
 35 |             'hidden': 8
 36 |         }
 37 |         self.best_hp = None
 38 |         self.tuner = HyperoptTuner(algorithm_name='tpe', optimize_mode='maximize')
 39 |         search_space = {
 40 |                 "dropedge_rate": {
 41 |                     "_type": "choice",
 42 |                     "_value": [self.info['dropedge_rate']]
 43 |                 },
 44 |                 "dropout_rate": {
 45 |                     "_type": "choice",
 46 |                     "_value": [self.info['dropout_rate']]
 47 |                 },
 48 |                 "num_layers": {
 49 |                     "_type": "choice",
 50 |                     "_value": [2]
 51 |                 },
 52 |                 "hidden": {
 53 |                     "_type": "quniform",
 54 |                     "_value": [4, 7, 1]
 55 |                 },
 56 |                 "lr":{
 57 |                     "_type": "choice",
 58 |                     "_value": [0.005]
 59 |                 },
 60 |                 'heads' :{
 61 |                     "_type": "quniform",
 62 |                     "_value": [1, 10, 1]
 63 |                 } 
 64 |             }
 65 |         self.tuner.update_search_space(search_space)
 66 | 
 67 |     def init_model(self, n_class, feature_num):
 68 |         heads = int(self.hyperparameters['heads'])
 69 |         hidden_size = int(2 ** self.hyperparameters['hidden'])
 70 | 
 71 |         self.conv1 = GATConv(feature_num, hidden_size, heads=heads, dropout=self.hyperparameters['dropout_rate'])
 72 |         self.conv2 = GATConv(hidden_size * heads, n_class, concat=False, dropout=self.hyperparameters['dropout_rate'])
 73 | 
 74 |         self.optimizer = torch.optim.Adam(self.parameters(), lr=self.hyperparameters['lr'], weight_decay=5e-4)
 75 | 
 76 |         self = self.to('cuda')
 77 | 
 78 |         torch.cuda.empty_cache()
 79 |         
 80 | 
 81 |     def forward(self, data):
 82 |         x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight
 83 |         if self.hyperparameters['dropedge_rate'] is not None:
 84 |             edge_index, edge_weight = dropout_adj(edge_index, edge_weight, p=self.hyperparameters['dropedge_rate'],\
 85 |                  force_undirected=False, num_nodes=None, training=self.training)
 86 | 
 87 |         x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training)
 88 |         x = F.elu(self.conv1(x, edge_index))
 89 |         x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training)
 90 |         x = self.conv2(x, edge_index)
 91 |         return x
 92 | 
 93 |     def trial(self, data, round_num):
 94 |         n_class, feature_num = self.info['n_class'], data.x.shape[1]
 95 |         if round_num >= 2:
 96 |             self.hyperparameters = self.tuner.generate_parameters(round_num-1)
 97 |         print(self.hyperparameters)    
 98 |            
 99 |         while True:
100 |             try:
101 |                 self.init_model(n_class, feature_num)
102 |                 val_score = self.train_valid(data, round_num)
103 |                 if round_num > 1:
104 |                     self.tuner.receive_trial_result(round_num-1,self.hyperparameters,val_score)
105 |                 if val_score > self.best_score:
106 |                     self.best_hp = copy.deepcopy(self.hyperparameters)
107 |                 break
108 |             except RuntimeError as e:
109 |                 print(self.name,e, 'OOM with Hidden Size', self.hyperparameters['hidden'])
110 |                 if round_num > 1:
111 |                     self.tuner.receive_trial_result(round_num-1,self.hyperparameters,0)
112 |                 return 0
113 |         print("Best Hyperparameters of", self.name, self.best_hp)
114 |         return val_score
115 | 
116 | 
117 | 
118 |     def train_valid(self, data, round_num):
119 |         y, train_mask, valid_mask, test_mask, label_weights = data.y, data.train_mask, data.valid_mask, data.test_mask, data.label_weights
120 | 
121 |         patience = self.max_patience
122 |         best_valid_score = 0
123 |         valid_acc_meter = AverageMeter()
124 |         for epoch in range(self.max_epochs):
125 | 
126 |             # train
127 |             self.train()
128 |             self.optimizer.zero_grad()
129 |             preds = self.forward(data)
130 |             loss = F.cross_entropy(preds[train_mask], y[train_mask], label_weights)
131 |             loss.backward()
132 |             self.optimizer.step()
133 | 
134 |             # valid
135 |             self.eval()
136 |             with torch.no_grad():
137 |                 preds = F.softmax(self.forward(data), dim=-1)
138 |                 valid_preds, test_preds = preds[valid_mask], preds[test_mask]
139 |                 valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro')
140 | 
141 |             valid_acc_meter.update(valid_score)
142 |             # patience
143 |             if valid_acc_meter.avg > best_valid_score:
144 |                 best_valid_score = valid_acc_meter.avg
145 |                 self.current_round_best_preds = test_preds
146 |                 patience = self.max_patience
147 |             else:
148 |                 patience -= 1
149 | 
150 |             if patience == 0:
151 |                 break
152 | 
153 |         return best_valid_score
154 | 
155 |     def epoch_train(self, data, run_num, info, time_remain):
156 |         y, train_mask = data.y, data.train_mask
157 |         self.train()
158 |         self.optimizer.zero_grad()
159 |         preds = self.forward(data)
160 |         loss = F.cross_entropy(preds[train_mask], y[train_mask])
161 |         loss.backward()
162 |         self.optimizer.step()
163 | 
164 | 
165 |     def epoch_valid(self, data):
166 |         y, valid_mask, test_mask = data.y, data.valid_mask, data.test_mask
167 |         
168 |         self.eval()
169 |         with torch.no_grad():
170 |             preds = F.softmax(self.forward(data), dim=-1)
171 |             valid_preds, test_preds = preds[valid_mask], preds[test_mask]
172 |             self.current_preds = test_preds
173 |             valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro')
174 |         return valid_score
175 | 
176 |     def predict(self):
177 |         return self.current_round_best_preds.cpu().numpy()
178 | 
179 |     def __repr__(self):
180 |         return self.__class__.__name__
181 | 


--------------------------------------------------------------------------------
/code_submission/model_lib/gatedgraph.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from torch.nn import Linear
  4 | from torch_geometric.nn import GatedGraphConv
  5 | import copy
  6 | from sklearn.metrics import f1_score
  7 | from utils.tools import fix_seed, AverageMeter
  8 | from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner
  9 | from torch_geometric.utils import dropout_adj
 10 | import random
 11 | 
 12 | fix_seed(1234)
 13 | class GatedGraphNet(torch.nn.Module):
 14 | 
 15 |     def __init__(self, info):
 16 |         super(GatedGraphNet, self).__init__()
 17 | 
 18 |         self.info = info
 19 |         self.hyperparameters = {
 20 |             'num_layers': self.info['num_layers'],
 21 |             'lr': 0.005,
 22 |             'gated_conv_layers': 3,
 23 |             'dropedge_rate': self.info['dropedge_rate'],
 24 |             'dropout_rate': self.info['dropout_rate'],
 25 |             'hidden': self.info['init_hidden_size']
 26 |         }
 27 | 
 28 |         self.best_score = 0
 29 |         self.hist_score = []
 30 | 
 31 |         self.best_preds = None
 32 |         self.current_round_best_preds = None
 33 |         self.best_valid_score = 0
 34 |         self.max_patience = 100
 35 |         self.max_epochs = 1600
 36 |         
 37 |         self.name = 'GatedGraph'
 38 | 
 39 |         self.best_hp = None
 40 |         self.tuner = HyperoptTuner(algorithm_name='tpe', optimize_mode='maximize')
 41 |         search_space = {
 42 |                 "dropedge_rate": {
 43 |                     "_type": "uniform",
 44 |                     "_value": [0, 1]
 45 |                 },
 46 |                 "dropout_rate": {
 47 |                     "_type": "uniform",
 48 |                     "_value": [0, 1]
 49 |                 },
 50 |                 "num_layers": {
 51 |                     "_type": "randint",
 52 |                     "_value": [2, 4]
 53 |                 },
 54 |                 "hidden": {
 55 |                     "_type": "quniform",
 56 |                     "_value": [4, 7, 1]
 57 |                 },
 58 |                 "lr":{
 59 |                     "_type": "choice",
 60 |                     "_value": [0.005]
 61 |                 },
 62 |                 'gated_conv_layers' :{
 63 |                     "_type": "quniform",
 64 |                     "_value": [1, 10, 1]
 65 |                 } 
 66 |             }
 67 |         self.tuner.update_search_space(search_space)
 68 | 
 69 |     def init_model(self, n_class, feature_num):
 70 |         num_layers = self.hyperparameters['num_layers']
 71 |         hidden_size = int(2 ** self.hyperparameters['hidden'])
 72 |         lr = self.hyperparameters['lr']
 73 |         gated_conv_layers = int(self.hyperparameters['gated_conv_layers'])
 74 |         
 75 |         self.input_linear = Linear(feature_num, hidden_size)
 76 |         self.convs = torch.nn.ModuleList()
 77 |         for i in range(num_layers - 1):
 78 |             self.convs.append(GatedGraphConv(out_channels=hidden_size, num_layers=gated_conv_layers))
 79 |         self.output_linear = Linear(hidden_size, n_class)
 80 |         self.optimizer = torch.optim.Adam(self.parameters(), lr=lr, weight_decay=5e-4)
 81 | 
 82 |         self = self.to('cuda')
 83 | 
 84 |         torch.cuda.empty_cache()
 85 | 
 86 |     def forward(self, data):
 87 |         x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight
 88 |         if self.hyperparameters['dropedge_rate'] is not None:
 89 |             edge_index, edge_weight = dropout_adj(edge_index, edge_weight, p=self.hyperparameters['dropedge_rate'],\
 90 |                  force_undirected=False, num_nodes=None, training=self.training)
 91 |  
 92 |         x = F.relu(self.input_linear(x))
 93 |         x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training)
 94 |         for conv in self.convs:
 95 |                 x = F.relu(conv(x, edge_index,edge_weight))   
 96 |                 x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training)
 97 |         x = self.output_linear(x)
 98 |         return x
 99 |     
100 |     def trial(self, data, round_num):
101 |         n_class, feature_num = self.info['n_class'], data.x.shape[1]
102 |         if round_num >= 2:
103 |             self.hyperparameters = self.tuner.generate_parameters(round_num-1)
104 |         print(self.hyperparameters)    
105 |            
106 |         while True:
107 |             try:
108 |                 self.init_model(n_class, feature_num)
109 |                 val_score = self.train_valid(data, round_num)
110 |                 if round_num > 1:
111 |                     self.tuner.receive_trial_result(round_num-1,self.hyperparameters,val_score)
112 |                 if val_score > self.best_score:
113 |                     self.best_hp = copy.deepcopy(self.hyperparameters)
114 |                 break
115 |             except RuntimeError as e:
116 |                 print(self.name,e, 'OOM with Hidden Size', self.hyperparameters['hidden'])
117 |                 if round_num > 1:
118 |                     self.tuner.receive_trial_result(round_num-1,self.hyperparameters,0)
119 |                 return 0
120 |         print("Best Hyperparameters of", self.name, self.best_hp)
121 |         return val_score
122 | 
123 |     def train_valid(self, data, round_num):
124 |         y, train_mask, valid_mask, test_mask, label_weights = data.y, data.train_mask, data.valid_mask, data.test_mask, data.label_weights
125 | 
126 |         patience = self.max_patience
127 |         best_valid_score = 0
128 |         valid_acc_meter = AverageMeter()
129 |         for epoch in range(self.max_epochs):
130 | 
131 |             # train
132 |             self.train()
133 |             self.optimizer.zero_grad()
134 |             preds = self.forward(data)
135 |             loss = F.cross_entropy(preds[train_mask], y[train_mask], label_weights)
136 |             loss.backward()
137 |             self.optimizer.step()
138 | 
139 |             # valid
140 |             self.eval()
141 |             with torch.no_grad():
142 |                 preds = F.softmax(self.forward(data), dim=-1)
143 |                 valid_preds, test_preds = preds[valid_mask], preds[test_mask]
144 |                 valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro')
145 |             valid_acc_meter.update(valid_score)
146 |             # patience
147 |             if valid_acc_meter.avg > best_valid_score:
148 |                 best_valid_score = valid_acc_meter.avg
149 |                 self.current_round_best_preds = test_preds
150 |                 patience = self.max_patience
151 |             else:
152 |                 patience -= 1
153 | 
154 |             if patience == 0:
155 |                 break
156 | 
157 |         return best_valid_score
158 | 
159 |     def predict(self):
160 |         return self.current_round_best_preds.cpu().numpy()
161 | 
162 |     def __repr__(self):
163 |         return self.__class__.__name__


--------------------------------------------------------------------------------
/code_submission/model_lib/gcn.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from torch.nn import Linear
  4 | from torch_geometric.nn import GCNConv
  5 | import copy
  6 | from sklearn.metrics import f1_score
  7 | from utils.tools import fix_seed, AverageMeter
  8 | from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner
  9 | from torch_geometric.utils import dropout_adj
 10 | import random
 11 | 
 12 | fix_seed(1234)
 13 | class GCN(torch.nn.Module):
 14 | 
 15 |     def __init__(self, info):
 16 |         super(GCN, self).__init__()
 17 |         self.info = info
 18 | 
 19 |         self.hyperparameters = {
 20 |             'num_layers': self.info['num_layers'],
 21 |             'lr':self.info['lr'],
 22 |             'dropedge_rate':self.info['dropedge_rate'],
 23 |             'dropout_rate':self.info['dropout_rate'],
 24 |             'hidden': self.info['init_hidden_size']
 25 |         }
 26 | 
 27 |         self.best_score = 0
 28 |         self.hist_score = []
 29 | 
 30 |         self.best_preds = None
 31 |         self.current_round_best_preds = None
 32 |         self.best_valid_score = 0
 33 |         self.max_patience = 100
 34 |         self.max_epochs = 1600
 35 |         
 36 |         self.name = 'GCN'
 37 |         self.tuner = HyperoptTuner(algorithm_name='tpe', optimize_mode='maximize')
 38 |         search_space = {
 39 |                 "dropedge_rate": {
 40 |                     "_type": "choice",
 41 |                     "_value": [self.info['dropedge_rate']]
 42 |                 },
 43 |                 "dropout_rate": {
 44 |                     "_type": "choice",
 45 |                     "_value": [self.info['dropout_rate']]
 46 |                 },
 47 |                 "num_layers": {
 48 |                     "_type": "randint",
 49 |                     "_value": [2, 4]
 50 |                 },
 51 |                 "hidden": {
 52 |                     "_type": "quniform",
 53 |                     "_value": [4, 7, 1]
 54 |                 },
 55 |                 "lr":{
 56 |                     "_type": "choice",
 57 |                     "_value": [0.005]
 58 |                 }
 59 |             }
 60 |         self.tuner.update_search_space(search_space)
 61 |         self.best_hp = None
 62 | 
 63 |     def init_model(self, n_class, feature_num):
 64 |         hidden_size = int(2 ** self.hyperparameters['hidden'])
 65 |         if self.info['num_edges'] > 1000000:
 66 |             self.conv1 = Linear(feature_num, hidden_sizes)
 67 |         else:
 68 |             self.conv1 = GCNConv(feature_num, hidden_sizes)
 69 |         if self.hyperparameters['num_layers'] > 2:
 70 |             self.convs = torch.nn.ModuleList()
 71 |             for i in range(self.hyperparameters['num_layers'] - 2):
 72 |                 self.convs.append(GCNConv(hidden_sizes,hidden_sizes))
 73 |         self.conv2 = GCNConv(hidden_sizes, n_class)
 74 | 
 75 |         self.optimizer = torch.optim.Adam(self.parameters(), lr=self.hyperparameters['lr'], weight_decay=5e-4)
 76 | 
 77 |         self = self.to('cuda')
 78 | 
 79 |         torch.cuda.empty_cache()
 80 | 
 81 |     def forward(self, data):
 82 |         x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight
 83 |         if self.hyperparameters['dropedge_rate'] is not None:
 84 |             edge_index, edge_weight = dropout_adj(edge_index, edge_weight, p=self.hyperparameters['dropedge_rate'],\
 85 |                  force_undirected=False, num_nodes=None, training=self.training)
 86 |         if self.info['num_edges'] > 1000000:
 87 |             x = F.relu(self.conv1(x))
 88 |         else:
 89 |             x = F.relu(self.conv1(x, edge_index,edge_weight))
 90 |         x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training)
 91 |         if self.hyperparameters['num_layers'] > 2:
 92 |             for conv in self.convs:
 93 |                  x = F.relu(conv(x, edge_index,edge_weight))   
 94 |                  x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training)
 95 |         x = self.conv2(x, edge_index,edge_weight)
 96 |         return x
 97 | 
 98 |     def trial(self, data, round_num):
 99 |         n_class, feature_num = self.info['n_class'], data.x.shape[1]
100 |         if round_num >= 2:
101 |             self.hyperparameters = self.tuner.generate_parameters(round_num-1)
102 |         print(self.hyperparameters)    
103 |            
104 |         while True:
105 |             try:
106 |                 self.init_model(n_class, feature_num)
107 |                 val_score = self.train_valid(data, round_num)
108 |                 if round_num > 1:
109 |                     self.tuner.receive_trial_result(round_num-1,self.hyperparameters,val_score)
110 |                 if val_score > self.best_score:
111 |                     self.best_hp = copy.deepcopy(self.hyperparameters)
112 |                 break
113 |             except RuntimeError as e:
114 |                 print(self.name,e, 'OOM with Hidden Size', self.hyperparameters['hidden'])
115 |                 if round_num > 1:
116 |                     self.tuner.receive_trial_result(round_num-1,self.hyperparameters,0)
117 |                 return 0
118 |         print("Best Hyperparameters of", self.name, self.best_hp)
119 |         return val_score
120 | 
121 | 
122 |     def train_valid(self, data, round_num):
123 |         y, train_mask, valid_mask, test_mask, label_weights = data.y, data.train_mask, data.valid_mask, data.test_mask, data.label_weights
124 |         patience = self.max_patience
125 |         best_valid_score = 0
126 |         valid_acc_meter = AverageMeter()
127 |         for epoch in range(self.max_epochs):
128 | 
129 |             # train
130 |             self.train()
131 |             self.optimizer.zero_grad()
132 |             preds = self.forward(data)
133 |             loss = F.cross_entropy(preds[train_mask], y[train_mask], label_weights)
134 |             loss.backward()
135 |             self.optimizer.step()
136 | 
137 |             # valid
138 |             self.eval()
139 |             with torch.no_grad():
140 |                 preds = F.softmax(self.forward(data), dim=-1)
141 |                 valid_preds, test_preds = preds[valid_mask], preds[test_mask]
142 |                 valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro')
143 |             valid_acc_meter.update(valid_score)
144 |             # patience
145 |             if valid_acc_meter.avg > best_valid_score:
146 |                 best_valid_score = valid_acc_meter.avg
147 |                 self.current_round_best_preds = test_preds
148 |                 patience = self.max_patience
149 |             else:
150 |                 patience -= 1
151 | 
152 |             if patience == 0:
153 |                 break
154 | 
155 |         return best_valid_score
156 | 
157 |     def epoch_train(self, data, run_num, info, time_remain):
158 |         y, train_mask = data.y, data.train_mask
159 |         self.train()
160 |         self.optimizer.zero_grad()
161 |         preds = self.forward(data)
162 |         loss = F.cross_entropy(preds[train_mask], y[train_mask])
163 |         loss.backward()
164 |         self.optimizer.step()
165 | 
166 |     def epoch_valid(self, data):
167 |         y, valid_mask, test_mask = data.y, data.valid_mask, data.test_mask
168 |         self.eval()
169 | 
170 |         with torch.no_grad():
171 |             preds = F.softmax(self.forward(data), dim=-1)
172 |             valid_preds, test_preds = preds[valid_mask], preds[test_mask]
173 |             self.current_preds = test_preds
174 |             valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro')
175 | 
176 |         return valid_score
177 | 
178 |     def predict(self):
179 |         return self.current_round_best_preds.cpu().numpy()
180 | 
181 |     def __repr__(self):
182 |         return self.__class__.__name__


--------------------------------------------------------------------------------
/code_submission/model_lib/gin.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from torch.nn import Linear
  4 | from torch_geometric.nn import GINConv
  5 | import copy
  6 | from sklearn.metrics import f1_score
  7 | from utils.tools import fix_seed, AverageMeter
  8 | from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner
  9 | from torch_geometric.utils import dropout_adj
 10 | import random
 11 | 
 12 | fix_seed(1234)
 13 | class GIN(torch.nn.Module):
 14 | 
 15 |     def __init__(self, info):
 16 |         super(GIN, self).__init__()
 17 | 
 18 |         self.info = info
 19 |         self.hyperparameters = {
 20 |             'num_layers': self.info['num_layers'],
 21 |             'lr': self.info['lr'],
 22 |             'dropedge_rate':self.info['dropedge_rate'],
 23 |             'dropout_rate':self.info['dropout_rate'],
 24 |             'hidden': self.info['init_hidden_size']
 25 |         }
 26 | 
 27 |         self.best_score = 0
 28 |         self.hist_score = []
 29 | 
 30 |         self.best_preds = None
 31 |         self.current_round_best_preds = None
 32 |         self.best_valid_score = 0
 33 |         self.max_patience = 100
 34 |         self.max_epochs = 1600
 35 |         
 36 |         self.name = 'GIN'
 37 | 
 38 |         self.tuner = HyperoptTuner(algorithm_name='tpe', optimize_mode='maximize')
 39 |         search_space = {
 40 |                 "dropedge_rate": {
 41 |                     "_type": "choice",
 42 |                     "_value": [self.info['dropedge_rate']]
 43 |                 },
 44 |                 "dropout_rate": {
 45 |                     "_type": "choice",
 46 |                     "_value": [self.info['dropout_rate']]
 47 |                 },
 48 |                 "num_layers": {
 49 |                     "_type": "quniform",
 50 |                     "_value": [1, 3, 1]
 51 |                 },
 52 |                 "hidden": {
 53 |                     "_type": "quniform",
 54 |                     "_value": [4, 7, 1]
 55 |                 },
 56 |                 "lr":{
 57 |                     "_type": "choice",
 58 |                     "_value": [0.005]
 59 |                 }
 60 |             }
 61 |         self.tuner.update_search_space(search_space)
 62 |         self.best_hp = None
 63 | 
 64 |     def init_model(self, n_class, feature_num):
 65 |         num_layers = int(self.hyperparameters['num_layers'])
 66 |         hidden_size = int(2 ** self.hyperparameters['hidden'])
 67 |         lr = self.hyperparameters['lr']
 68 | 
 69 |         self.input_linear = Linear(feature_num, hidden_size)
 70 |         self.convs = torch.nn.ModuleList()
 71 |         for i in range(num_layers):
 72 |             self.convs.append(GINConv(torch.nn.Sequential(
 73 |                 Linear(hidden_size, hidden_size),
 74 |                 torch.nn.ReLU(),
 75 |                 Linear(hidden_size, hidden_size),
 76 |                 torch.nn.ReLU()
 77 |                 ), eps=0, train_eps=False)
 78 |             )
 79 |         self.output_linear = Linear(hidden_size, n_class)
 80 |         self.optimizer = torch.optim.Adam(self.parameters(), lr=lr, weight_decay=5e-4)
 81 | 
 82 |         self = self.to('cuda')
 83 | 
 84 |         torch.cuda.empty_cache()
 85 | 
 86 |     def forward(self, data):
 87 |         x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight
 88 |         if self.hyperparameters['dropedge_rate'] is not None:
 89 |             edge_index, edge_weight = dropout_adj(edge_index, edge_weight, p=self.hyperparameters['dropedge_rate'],\
 90 |                  force_undirected=False, num_nodes=None, training=self.training)
 91 |         x = F.relu(self.input_linear(x))
 92 |         x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training)
 93 |         for conv in self.convs:
 94 |             x = conv(x, edge_index)
 95 |             x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training)
 96 |         x = self.output_linear(x)
 97 | 
 98 |         return x
 99 | 
100 |     def trial(self, data, round_num):
101 |         n_class, feature_num = self.info['n_class'], data.x.shape[1]
102 |         if round_num >= 2:
103 |             self.hyperparameters = self.tuner.generate_parameters(round_num-1)
104 |         print(self.hyperparameters)    
105 |            
106 |         while True:
107 |             try:
108 |                 self.init_model(n_class, feature_num)
109 |                 val_score = self.train_valid(data, round_num)
110 |                 if round_num > 1:
111 |                     self.tuner.receive_trial_result(round_num-1,self.hyperparameters,val_score)
112 |                 if val_score > self.best_score:
113 |                     self.best_hp = copy.deepcopy(self.hyperparameters)
114 |                 break
115 |             except RuntimeError as e:
116 |                 print(self.name,e, 'OOM with Hidden Size', self.hyperparameters['hidden'])
117 |                 if round_num > 1:
118 |                     self.tuner.receive_trial_result(round_num-1, self.hyperparameters, 0)
119 |                 return 0
120 |         print("Best Hyperparameters of", self.name, self.best_hp)
121 |         return val_score
122 | 
123 |     def train_valid(self, data, round_num):
124 |         y, train_mask, valid_mask, test_mask, label_weights = data.y, data.train_mask, data.valid_mask, data.test_mask, data.label_weights
125 | 
126 |         patience = self.max_patience
127 |         best_valid_score = 0
128 |         valid_acc_meter = AverageMeter()
129 |         for epoch in range(self.max_epochs):
130 | 
131 |             # train
132 |             self.train()
133 |             self.optimizer.zero_grad()
134 |             preds = self.forward(data)
135 |             loss = F.cross_entropy(preds[train_mask], y[train_mask], label_weights)
136 |             loss.backward()
137 |             self.optimizer.step()
138 | 
139 |             # valid
140 |             self.eval()
141 |             with torch.no_grad():
142 |                 preds = F.softmax(self.forward(data), dim=-1)
143 |                 valid_preds, test_preds = preds[valid_mask], preds[test_mask]
144 |                 valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro')
145 |             valid_acc_meter.update(valid_score)
146 | 
147 |             # patience
148 |             if valid_acc_meter.avg > best_valid_score:
149 |                 best_valid_score = valid_acc_meter.avg
150 |                 self.current_round_best_preds = test_preds
151 |                 patience = self.max_patience
152 |             else:
153 |                 patience -= 1
154 | 
155 |             if patience == 0:
156 |                 break
157 | 
158 |         return best_valid_score
159 | 
160 |     def predict(self):
161 |         return self.current_round_best_preds.cpu().numpy()
162 | 
163 |     def __repr__(self):
164 |         return self.__class__.__name__


--------------------------------------------------------------------------------
/code_submission/model_lib/graphconvnet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from torch.nn import Linear
  4 | from torch_geometric.nn import GraphConv
  5 | import copy
  6 | from sklearn.metrics import f1_score
  7 | from utils.tools import fix_seed, AverageMeter
  8 | from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner
  9 | from torch_geometric.utils import dropout_adj
 10 | import random
 11 | 
 12 | fix_seed(1234)
 13 | class GraphConvNet(torch.nn.Module):
 14 | 
 15 |     def __init__(self, info):
 16 |         super(GraphConvNet, self).__init__()
 17 | 
 18 |         self.info = info
 19 |         self.hyperparameters = {
 20 |             'num_layers': self.info['num_layers'],
 21 |             'lr': self.info['lr'],
 22 |             'dropedge_rate': self.info['dropedge_rate'],
 23 |             'dropout_rate': self.info['dropout_rate'],
 24 |             'hidden': self.info['init_hidden_size'],
 25 |             'use_linear':self.info['use_linear']
 26 |         }
 27 | 
 28 |         self.best_score = 0
 29 |         self.hist_score = []
 30 | 
 31 |         self.best_preds = None
 32 |         self.current_round_best_preds = None
 33 |         self.best_valid_score = 0
 34 |         self.max_patience = 100
 35 |         self.max_epochs = 1600
 36 | 
 37 |         self.name = 'GraphConvNet'
 38 | 
 39 |         self.best_hp = None
 40 |         self.tuner = HyperoptTuner(algorithm_name='tpe', optimize_mode='maximize')
 41 |         search_space = {
 42 |                 "dropedge_rate": {
 43 |                     "_type": "choice",
 44 |                     "_value": [self.info['dropedge_rate']]
 45 |                 },
 46 |                 "dropout_rate": {
 47 |                     "_type": "choice",
 48 |                     "_value": [self.info['dropout_rate']]
 49 |                 },
 50 |                 "num_layers": {
 51 |                     "_type": "quniform",
 52 |                     "_value": [1, 3, 1]
 53 |                 },
 54 |                 "hidden": {
 55 |                     "_type": "quniform",
 56 |                     "_value": [4, 7, 1]
 57 |                 },
 58 |                 "lr":{
 59 |                     "_type": "choice",
 60 |                     "_value": [0.005]
 61 |                 },
 62 |                 'use_linear': {
 63 |                     "_type":"choice",
 64 |                     "_value":[True, False]
 65 |                 }
 66 |                 
 67 |             }
 68 |         self.tuner.update_search_space(search_space)
 69 |         
 70 | 
 71 |     def init_model(self, n_class, feature_num):
 72 |         num_layers = int(self.hyperparameters['num_layers'])
 73 |         hidden_size = int(2 ** self.hyperparameters['hidden'])
 74 |         lr = self.hyperparameters['lr']
 75 |         if self.hyperparameters['use_linear']:
 76 |             self.input_lin = Linear(feature_num, hidden_size)
 77 |             self.convs = torch.nn.ModuleList()
 78 |             for i in range(num_layers):
 79 |                 self.convs.append(GraphConv(hidden_size, hidden_size))
 80 |             self.output_lin = Linear(hidden_size, n_class)
 81 |             
 82 |         else:
 83 |             if num_layers == 1:
 84 |                 self.conv1 = GraphConv(in_channels=feature_num, out_channels=n_class)
 85 |             else:    
 86 |                 self.conv1 = GraphConv(in_channels=feature_num, out_channels=hidden_size)
 87 |                 self.convs = torch.nn.ModuleList()
 88 |                 for i in range(num_layers - 2):
 89 |                     self.convs.append(GraphConv(in_channels=hidden_size, out_channels=hidden_size))
 90 |                 self.conv2 = GraphConv(hidden_size, n_class)
 91 |         self.optimizer = torch.optim.Adam(self.parameters(), lr=lr, weight_decay=5e-4)
 92 | 
 93 |         self = self.to('cuda')
 94 | 
 95 |         torch.cuda.empty_cache()
 96 | 
 97 |     def forward(self, data):
 98 |         x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight
 99 |         if self.hyperparameters['dropedge_rate'] is not None:
100 |             edge_index, edge_weight = dropout_adj(edge_index, edge_weight, p=self.hyperparameters['dropedge_rate'],\
101 |                  force_undirected=False, num_nodes=None, training=self.training)
102 |         
103 |         if self.hyperparameters['use_linear']:
104 |             x = F.relu(self.input_lin(x))
105 |         else:
106 |             x = F.relu(self.conv1(x, edge_index,edge_weight))
107 |             if self.hyperparameters['num_layers'] == 1:
108 |                 return x
109 |         x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training)
110 |         for conv in self.convs:
111 |             x = F.relu(conv(x, edge_index, edge_weight=edge_weight))
112 |             x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training)
113 |         if self.hyperparameters['use_linear']:
114 |             x = self.output_lin(x)
115 |         else:
116 |             x = self.conv2(x, edge_index,edge_weight)
117 |         return x
118 |     
119 |     def trial(self, data, round_num):
120 |         n_class, feature_num = self.info['n_class'], data.x.shape[1]
121 |         if round_num >= 2:
122 |             self.hyperparameters = self.tuner.generate_parameters(round_num-1)
123 |         print(self.hyperparameters)    
124 |            
125 |         while True:
126 |             try:
127 |                 self.init_model(n_class, feature_num)
128 |                 val_score = self.train_valid(data, round_num)
129 |                 if round_num > 1:
130 |                     self.tuner.receive_trial_result(round_num-1,self.hyperparameters,val_score)
131 |                 if val_score > self.best_score:
132 |                     self.best_hp = copy.deepcopy(self.hyperparameters)
133 |                 break
134 |             except RuntimeError as e:
135 |                 print(self.name,e, 'OOM with Hidden Size', self.hyperparameters['hidden'])
136 |                 if round_num > 1:
137 |                     self.tuner.receive_trial_result(round_num-1,self.hyperparameters,0)
138 |                 return 0
139 |         print("Best Hyperparameters of ", self.name, self.best_hp)
140 |         return val_score
141 | 
142 |     def train_valid(self, data, round_num):
143 |         y, train_mask, valid_mask, test_mask, label_weights = data.y, data.train_mask, data.valid_mask, data.test_mask, data.label_weights
144 | 
145 |         patience = self.max_patience
146 |         best_valid_score = 0
147 |         valid_acc_meter = AverageMeter()
148 |         for epoch in range(self.max_epochs):
149 | 
150 |             # train
151 |             self.train()
152 |             self.optimizer.zero_grad()
153 |             preds = self.forward(data)
154 |             loss = F.cross_entropy(preds[train_mask], y[train_mask], label_weights)
155 |             loss.backward()
156 |             self.optimizer.step()
157 | 
158 |             # valid
159 |             self.eval()
160 |             with torch.no_grad():
161 |                 preds = F.softmax(self.forward(data), dim=-1)
162 |                 valid_preds, test_preds = preds[valid_mask], preds[test_mask]
163 |                 valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro')
164 |             valid_acc_meter.update(valid_score)
165 | 
166 |             # patience
167 |             if valid_acc_meter.avg > best_valid_score:
168 |                 best_valid_score = valid_acc_meter.avg
169 |                 self.current_round_best_preds = test_preds
170 |                 patience = self.max_patience
171 |             else:
172 |                 patience -= 1
173 | 
174 |             if patience == 0:
175 |                 break
176 | 
177 |         return best_valid_score
178 | 
179 |     def epoch_train(self, data, run_num, info, time_remain):
180 |         y, train_mask = data.y, data.train_mask
181 |         self.train()
182 |         self.optimizer.zero_grad()
183 |         preds = self.forward(data)
184 |         loss = F.cross_entropy(preds[train_mask], y[train_mask])
185 |         loss.backward()
186 |         self.optimizer.step()
187 | 
188 |     def epoch_valid(self, data):
189 |         y, valid_mask, test_mask = data.y, data.valid_mask, data.test_mask
190 |         self.eval()
191 | 
192 |         with torch.no_grad():
193 |             preds = F.softmax(self.forward(data), dim=-1)
194 |             valid_preds, test_preds = preds[valid_mask], preds[test_mask]
195 |             self.current_preds = test_preds
196 |             valid_score = f1_score(y[valid_mask].flatten().cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro')
197 | 
198 |         return valid_score
199 | 
200 |     def predict(self):
201 |         return self.current_round_best_preds.cpu().numpy()
202 | 
203 |     def __repr__(self):
204 |         return self.__class__.__name__


--------------------------------------------------------------------------------
/code_submission/model_lib/graphsage.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from torch.nn import Linear
  4 | from torch_geometric.nn import SAGEConv
  5 | import copy
  6 | from sklearn.metrics import f1_score
  7 | from utils.tools import fix_seed, AverageMeter
  8 | from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner
  9 | from torch_geometric.utils import dropout_adj
 10 | import random
 11 | 
 12 | fix_seed(1234)
 13 | class GraphSAGE(torch.nn.Module):
 14 | 
 15 |     def __init__(self, info):
 16 |         super(GraphSAGE, self).__init__()
 17 | 
 18 |         self.info = info
 19 |         self.hyperparameters = {
 20 |             'num_layers': self.info['num_layers'],
 21 |             'lr':self.info['lr'],
 22 |             'dropedge_rate':self.info['dropedge_rate'],
 23 |             'dropout_rate':self.info['dropout_rate'],
 24 |             'hidden': self.info['init_hidden_size'],
 25 |             'use_linear':self.info['use_linear']
 26 |         }
 27 | 
 28 |         self.best_score = 0
 29 |         self.hist_score = []
 30 | 
 31 |         self.best_preds = None
 32 |         self.current_round_best_preds = None
 33 |         self.best_valid_score = 0
 34 |         self.max_patience = 100
 35 |         self.max_epochs = 1600
 36 |         
 37 |         self.name = 'GraphSAGE'
 38 | 
 39 |         self.best_hp = None
 40 |         self.tuner = HyperoptTuner(algorithm_name='tpe', optimize_mode='maximize')
 41 |         search_space = {
 42 |                 "dropedge_rate": {
 43 |                     "_type": "choice",
 44 |                     "_value": [self.info['dropedge_rate']]
 45 |                 },
 46 |                 "dropout_rate": {
 47 |                     "_type": "choice",
 48 |                     "_value": [self.info['dropout_rate']]
 49 |                 },
 50 |                 "num_layers": {
 51 |                     "_type": "quniform",
 52 |                     "_value": [1, 3, 1]
 53 |                 },
 54 |                 "hidden": {
 55 |                     "_type": "quniform",
 56 |                     "_value": [4, 7, 1]
 57 |                 },
 58 |                 "lr":{
 59 |                     "_type": "choice",
 60 |                     "_value": [0.005]
 61 |                 },
 62 |                 "use_linear":{
 63 |                     "_type": "choice",
 64 |                     "_value": [True, False]
 65 |                 }
 66 |                 
 67 |             }
 68 |         self.tuner.update_search_space(search_space)
 69 | 
 70 |     def init_model(self, n_class, feature_num):
 71 |         num_layers = int(self.hyperparameters['num_layers'])
 72 |         hidden_size = int(2 ** self.hyperparameters['hidden'])
 73 |         lr = self.hyperparameters['lr']
 74 |         if self.hyperparameters['use_linear']:
 75 |             self.input_lin = Linear(feature_num, hidden_size)
 76 |             self.convs = torch.nn.ModuleList()
 77 |             for i in range(num_layers):
 78 |                 self.convs.append(SAGEConv(hidden_size, hidden_size,normalize=True))
 79 |             self.output_lin = Linear(hidden_size, n_class)
 80 |         else:
 81 |             if num_layers == 1:
 82 |                 self.conv1 = SAGEConv(in_channels=feature_num, out_channels=n_class,normalize=True)
 83 |             else:
 84 |                 self.conv1 = SAGEConv(in_channels=feature_num, out_channels=hidden_size,normalize=True)
 85 |                 self.convs = torch.nn.ModuleList()
 86 |                 for i in range(num_layers - 2):
 87 |                     self.convs.append(SAGEConv(in_channels=hidden_size, out_channels=hidden_size))          
 88 |                 self.conv2 = SAGEConv(in_channels=hidden_size, out_channels=n_class,normalize=True)
 89 | 
 90 |         self.optimizer = torch.optim.Adam(self.parameters(), lr=lr, weight_decay=5e-4)
 91 | 
 92 |         self = self.to('cuda')
 93 | 
 94 |         torch.cuda.empty_cache()
 95 | 
 96 |     def forward(self, data):
 97 |         x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight
 98 |         if self.hyperparameters['dropedge_rate'] is not None:
 99 |             edge_index, edge_weight = dropout_adj(edge_index, edge_weight, p=self.hyperparameters['dropedge_rate'],\
100 |                  force_undirected=False, num_nodes=None, training=self.training)
101 |         
102 |         if self.hyperparameters['use_linear']:
103 |             x = F.relu(self.input_lin(x))
104 |         else:
105 |             x = F.relu(self.conv1(x, edge_index,edge_weight))
106 |             if self.hyperparameters['num_layers'] == 1:
107 |                 return x
108 |         x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training)
109 |         for conv in self.convs:
110 |             x = F.relu(conv(x, edge_index, edge_weight=edge_weight))
111 |             x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training)
112 |         if self.hyperparameters['use_linear']:
113 |             x = self.output_lin(x)
114 |         else:
115 |             x = self.conv2(x, edge_index,edge_weight)
116 |         return x
117 | 
118 |     def trial(self, data, round_num):
119 |         n_class, feature_num = self.info['n_class'], data.x.shape[1]
120 |         if round_num >= 2:
121 |             self.hyperparameters = self.tuner.generate_parameters(round_num-1)
122 |         print(self.hyperparameters)    
123 |            
124 |         while True:
125 |             try:
126 |                 self.init_model(n_class, feature_num)
127 |                 val_score = self.train_valid(data, round_num)
128 |                 if round_num > 1:
129 |                     self.tuner.receive_trial_result(round_num-1,self.hyperparameters,val_score)
130 |                 if val_score > self.best_score:
131 |                     self.best_hp = copy.deepcopy(self.hyperparameters)
132 |                 break
133 |             except RuntimeError as e:
134 |                 print(self.name,e, 'OOM with Hidden Size', self.hyperparameters['hidden'])
135 |                 if round_num > 1:
136 |                     self.tuner.receive_trial_result(round_num-1,self.hyperparameters,0)
137 |                 return 0
138 |         print("Best Hyperparameters of", self.name, self.best_hp)
139 |         return val_score
140 | 
141 |     def train_valid(self, data, round_num):
142 |         y, train_mask, valid_mask, test_mask, label_weights = data.y, data.train_mask, data.valid_mask, data.test_mask, data.label_weights
143 | 
144 |         patience = self.max_patience
145 |         best_valid_score = 0
146 |         valid_acc_meter = AverageMeter()
147 |         for epoch in range(self.max_epochs):
148 | 
149 |             # train
150 |             self.train()
151 |             self.optimizer.zero_grad()
152 |             preds = self.forward(data)
153 |             loss = F.cross_entropy(preds[train_mask], y[train_mask], label_weights)
154 |             loss.backward()
155 |             self.optimizer.step()
156 | 
157 |             # valid
158 |             self.eval()
159 |             with torch.no_grad():
160 |                 preds = F.softmax(self.forward(data), dim=-1)
161 |                 valid_preds, test_preds = preds[valid_mask], preds[test_mask]
162 |                 valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro')
163 |             valid_acc_meter.update(valid_score)
164 |             # patience
165 |             if valid_acc_meter.avg > best_valid_score:
166 |                 best_valid_score = valid_acc_meter.avg
167 |                 self.current_round_best_preds = test_preds
168 |                 patience = self.max_patience
169 |             else:
170 |                 patience -= 1
171 | 
172 |             if patience == 0:
173 |                 break
174 | 
175 |         return best_valid_score
176 | 
177 |     def predict(self):
178 |         return self.current_round_best_preds.cpu().numpy()
179 | 
180 |     def __repr__(self):
181 |         return self.__class__.__name__


--------------------------------------------------------------------------------
/code_submission/model_lib/incepgcn.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from torch.nn import Linear
  4 | import torch.nn as nn
  5 | from torch_geometric.nn import GCNConv
  6 | from sklearn.metrics import f1_score
  7 | from utils.tools import fix_seed, AverageMeter
  8 | from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner
  9 | from torch_geometric.utils import dropout_adj
 10 | import copy
 11 | fix_seed(1234)
 12 | 
 13 | class GraphBaseBlock(torch.nn.Module):
 14 |     """
 15 |     The base block for Multi-layer GCN / ResGCN / Dense GCN 
 16 |     """
 17 | 
 18 |     def __init__(self, in_features, out_features, nbaselayer,
 19 |                  withbn=True, withloop=True, activation=F.relu, dropout=0.5,
 20 |                  aggrmethod="concat", dense=False):
 21 |         """
 22 |         The base block for constructing DeepGCN model.
 23 |         :param in_features: the input feature dimension.
 24 |         :param out_features: the hidden feature dimension.
 25 |         :param nbaselayer: the number of layers in the base block.
 26 |         :param withbn: using batch normalization in graph convolution.
 27 |         :param withloop: using self feature modeling in graph convolution.
 28 |         :param activation: the activation function, default is ReLu.
 29 |         :param dropout: the dropout ratio.
 30 |         :param aggrmethod: the aggregation function for baseblock, can be "concat" and "add". For "resgcn", the default
 31 |                            is "add", for others the default is "concat".
 32 |         :param dense: enable dense connection
 33 |         """
 34 |         super(GraphBaseBlock, self).__init__()
 35 |         self.in_features = in_features
 36 |         self.hiddendim = out_features
 37 |         self.nhiddenlayer = nbaselayer
 38 |         self.activation = activation
 39 |         self.aggrmethod = aggrmethod
 40 |         self.dense = dense
 41 |         self.dropout = dropout
 42 | 
 43 |         self.hiddenlayers = nn.ModuleList()
 44 |         self.__makehidden()
 45 | 
 46 |         if self.aggrmethod == "concat" and dense == False:
 47 |             self.out_features = in_features + out_features
 48 |         elif self.aggrmethod == "concat" and dense == True:
 49 |             self.out_features = in_features + out_features * nbaselayer
 50 |         elif self.aggrmethod == "add":
 51 |             if in_features != self.hiddendim:
 52 |                 raise RuntimeError("The dimension of in_features and hiddendim should be matched in add model.")
 53 |             self.out_features = out_features
 54 |         elif self.aggrmethod == "nores":
 55 |             self.out_features = out_features
 56 |         else:
 57 |             raise NotImplementedError("The aggregation method only support 'concat','add' and 'nores'.")
 58 | 
 59 |     def __makehidden(self):
 60 |         for i in range(self.nhiddenlayer):
 61 |             if i == 0:
 62 |                 layer = GCNConv(self.in_features, self.hiddendim)
 63 |             else:
 64 |                 layer = GCNConv(self.hiddendim, self.hiddendim)
 65 |             self.hiddenlayers.append(layer)
 66 | 
 67 |     def _doconcat(self, x, subx):
 68 |         if x is None:
 69 |             return subx
 70 |         if self.aggrmethod == "concat":
 71 |             return torch.cat((x, subx), 1)
 72 |         elif self.aggrmethod == "add":
 73 |             return x + subx
 74 |         elif self.aggrmethod == "nores":
 75 |             return x
 76 | 
 77 |     def forward(self, input, edge_index, edge_weight):
 78 |         x = input
 79 |         denseout = None
 80 |         # Here out is the result in all levels.
 81 |         for gc in self.hiddenlayers:
 82 |             denseout = self._doconcat(denseout, x)
 83 |             x = self.activation(gc(x, edge_index, edge_weight))
 84 |             x = F.dropout(x, self.dropout, training=self.training)
 85 | 
 86 |         if not self.dense:
 87 |             return self._doconcat(x, input)
 88 |         return self._doconcat(x, denseout)
 89 | 
 90 |     def get_outdim(self):
 91 |         return self.out_features
 92 | 
 93 |     def __repr__(self):
 94 |         return "%s %s (%d - [%d:%d] > %d)" % (self.__class__.__name__,
 95 |                                               self.aggrmethod,
 96 |                                               self.in_features,
 97 |                                               self.hiddendim,
 98 |                                               self.nhiddenlayer,
 99 |                                               self.out_features)
100 |                                               
101 | class InceptionGCNBlock(torch.nn.Module):
102 |     """
103 |     The multiple layer GCN with inception connection block.
104 |     """
105 | 
106 |     def __init__(self, in_features, out_features, nbaselayer,
107 |                  dropout=0.5, aggrmethod="concat", dense=False):
108 |         """
109 |         The multiple layer GCN with inception connection block.
110 |         :param in_features: the input feature dimension.
111 |         :param out_features: the hidden feature dimension.
112 |         :param nbaselayer: the number of layers in the base block.
113 |         :param withbn: using batch normalization in graph convolution.
114 |         :param withloop: using self feature modeling in graph convolution.
115 |         :param activation: the activation function, default is ReLu.
116 |         :param dropout: the dropout ratio.
117 |         :param aggrmethod: the aggregation function for baseblock, can be "concat" and "add". For "resgcn", the default
118 |                            is "add", for others the default is "concat".
119 |         :param dense: not applied. The default is False, cannot be changed.
120 |         """
121 |         super(InceptionGCNBlock, self).__init__()
122 |         self.in_features = in_features
123 |         self.out_features = out_features
124 |         self.hiddendim = out_features
125 |         self.nbaselayer = nbaselayer
126 |         self.aggrmethod = aggrmethod
127 |         self.dropout = dropout
128 |         self.midlayers = torch.nn.ModuleList()
129 |         self.__makehidden()
130 | 
131 |         if self.aggrmethod == "concat":
132 |             self.out_features = in_features + out_features * nbaselayer
133 |         elif self.aggrmethod == "add":
134 |             if in_features != self.hiddendim:
135 |                 raise RuntimeError("The dimension of in_features and hiddendim should be matched in 'add' model.")
136 |             self.out_features = out_features
137 |         else:
138 |             raise NotImplementedError("The aggregation method only support 'concat', 'add'.")
139 | 
140 |     def __makehidden(self):
141 |         for j in range(self.nbaselayer):
142 |             reslayer = torch.nn.ModuleList()
143 |             for i in range(j + 1):
144 |                 if i == 0:
145 |                     layer = GCNConv(self.in_features, self.hiddendim)
146 |                 else:
147 |                     layer = GCNConv(self.hiddendim, self.hiddendim)
148 |                 reslayer.append(layer)
149 |             self.midlayers.append(reslayer)
150 | 
151 |     def forward(self, input, edge_index, edge_weight):
152 |         x = input
153 |         for reslayer in self.midlayers:
154 |             subx = input
155 |             for gc in reslayer:
156 |                 subx = gc(subx, edge_index, edge_weight)
157 |                 subx = F.dropout(subx,  p=self.dropout, training=self.training)
158 |             x = self._doconcat(x, subx)
159 |         return x
160 | 
161 |     def get_outdim(self):
162 |         return self.out_features
163 | 
164 |     def _doconcat(self, x, subx):
165 |         if self.aggrmethod == "concat":
166 |             return torch.cat((x, subx), 1)
167 |         elif self.aggrmethod == "add":
168 |             return x + subx
169 | 
170 |     def __repr__(self):
171 |         return "%s %s (%d - [%d:%d] > %d)" % (self.__class__.__name__,
172 |                                               self.aggrmethod,
173 |                                               self.in_features,
174 |                                               self.hiddendim,
175 |                                               self.nbaselayer,
176 |                                               self.out_features)
177 | 
178 | class IncepGCN(torch.nn.Module):
179 | 
180 |     def __init__(self, info):
181 |         super(IncepGCN, self).__init__()
182 |         self.info = info
183 |         self.best_score = 0
184 |         self.hist_score = []
185 | 
186 |         self.best_preds = None
187 |         self.current_round_best_preds = None
188 |         self.best_valid_score = 0
189 |         self.max_patience = 100
190 |         self.max_epochs = 1600
191 |         
192 |         self.name = 'IncepGCN'
193 | 
194 |         self.tuner = HyperoptTuner(algorithm_name='tpe', optimize_mode='maximize')
195 |         search_space = {
196 |                 "dropedge_rate": {
197 |                     "_type": "choice",
198 |                     "_value": [self.info['dropedge_rate']]
199 |                 },
200 |                 "dropout_rate": {
201 |                     "_type": "choice",
202 |                     "_value": [self.info['dropout_rate']]
203 |                 },
204 |                 "num_layers": {
205 |                     "_type": "quniform",
206 |                     "_value": [2, 4, 1]
207 |                 },
208 |                 "hidden": {
209 |                     "_type": "quniform",
210 |                     "_value": [4, 7, 1]
211 |                 },
212 |                 "lr":{
213 |                     "_type": "choice",
214 |                     "_value": [0.005]
215 |                 }
216 |             }
217 |         self.tuner.update_search_space(search_space)
218 |         self.hyperparameters = {
219 |             'num_layers': self.info['num_layers'],
220 |             'lr': self.info['lr'],
221 |             'dropedge_rate':self.info['dropedge_rate'],
222 |             'dropout_rate':self.info['dropout_rate'],
223 |             'hidden': self.info['init_hidden_size']
224 |         }
225 |         self.best_hp = None
226 | 
227 |     def init_model(self, n_class, features_num):
228 |         hidden = int(2 ** self.hyperparameters['hidden'])
229 |         num_layers = int(self.hyperparameters['num_layers'])
230 |         self.in_lin = nn.Linear(features_num, hidden)
231 |         self.incep_conv = InceptionGCNBlock(hidden, hidden, nbaselayer=num_layers, dropout=self.hyperparameters['dropout_rate'])
232 |         self.out_lin = nn.Linear(self.incep_conv.get_outdim(), n_class)
233 | 
234 |         self.optimizer = torch.optim.Adam(self.parameters(), lr=self.hyperparameters['lr'], weight_decay=5e-4)
235 | 
236 |         self = self.to('cuda')
237 | 
238 |         torch.cuda.empty_cache()
239 | 
240 |     def forward(self, data):
241 |         x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight
242 |         if self.hyperparameters['dropedge_rate'] is not None:
243 |             edge_index, edge_weight = dropout_adj(edge_index, edge_weight, p=self.hyperparameters['dropedge_rate'],\
244 |                  force_undirected=False, num_nodes=None, training=self.training)
245 |         x = self.in_lin(x)
246 |         x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training)
247 |         x = self.incep_conv(x, edge_index, edge_weight)
248 |         x = self.out_lin(x)
249 |         return x
250 | 
251 |     def trial(self, data, round_num):
252 |         n_class, feature_num = self.info['n_class'], data.x.shape[1]
253 |         if round_num >= 2:
254 |             self.hyperparameters = self.tuner.generate_parameters(round_num-1)
255 |         print(self.hyperparameters)    
256 |            
257 |         while True:
258 |             try:
259 |                 self.init_model(n_class, feature_num)
260 |                 val_score = self.train_valid(data, round_num)
261 |                 if round_num > 1:
262 |                     self.tuner.receive_trial_result(round_num-1,self.hyperparameters,val_score)
263 |                 if val_score > self.best_score:
264 |                     self.best_hp = copy.deepcopy(self.hyperparameters)
265 |                 break
266 |             except RuntimeError as e:
267 |                 print(self.name,e, 'OOM with Hidden Size', self.hyperparameters['hidden'])
268 |                 if round_num > 1:
269 |                     self.tuner.receive_trial_result(round_num-1,self.hyperparameters,0)
270 |                 return 0
271 |         print("Best Hyperparameters of", self.name, self.best_hp)
272 |         return val_score
273 | 
274 | 
275 |     def train_valid(self, data, round_num):
276 |         y, train_mask, valid_mask, test_mask, label_weights = data.y, data.train_mask, data.valid_mask, data.test_mask, data.label_weights
277 | 
278 |         score_meter = AverageMeter()
279 |         patience = self.max_patience
280 |         best_valid_score = 0
281 |         for epoch in range(self.max_epochs):
282 | 
283 |             # train
284 |             self.train()
285 |             self.optimizer.zero_grad()
286 |             preds = self.forward(data)
287 |             loss = F.cross_entropy(preds[train_mask], y[train_mask], label_weights)
288 |             loss.backward()
289 |             self.optimizer.step()
290 | 
291 |             # valid
292 |             self.eval()
293 |             with torch.no_grad():
294 |                 preds = F.softmax(self.forward(data), dim=-1)
295 |                 valid_preds, test_preds = preds[valid_mask], preds[test_mask]
296 |                 valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro')
297 | 
298 |             score_meter.update(valid_score)
299 | 
300 |             # patience
301 |             if score_meter.avg > best_valid_score:
302 |                 best_valid_score = score_meter.avg
303 |                 self.current_round_best_preds = test_preds
304 |                 patience = self.max_patience
305 |             else:
306 |                 patience -= 1
307 | 
308 |             if patience == 0:
309 |                 break
310 | 
311 |         return best_valid_score
312 | 
313 |     def predict(self):
314 |         if self.current_round_best_preds is not None:
315 |             return self.current_round_best_preds.cpu().numpy()
316 |         else:
317 |             return None
318 | 
319 |     def __repr__(self):
320 |         return self.__class__.__name__


--------------------------------------------------------------------------------
/code_submission/model_lib/jkgcn.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from torch.nn import Linear
  4 | import torch.nn as nn
  5 | from torch_geometric.nn import GCNConv
  6 | from sklearn.metrics import f1_score
  7 | from utils.tools import fix_seed, AverageMeter
  8 | from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner
  9 | from torch_geometric.utils import dropout_adj
 10 | import copy
 11 | fix_seed(1234)
 12 | 
 13 | class GraphBaseBlock(torch.nn.Module):
 14 |     """
 15 |     The base block for Multi-layer GCN / ResGCN / Dense GCN 
 16 |     """
 17 | 
 18 |     def __init__(self, in_features, out_features, nbaselayer,
 19 |                  withbn=True, withloop=True, activation=F.relu, dropout=0.5,
 20 |                  aggrmethod="concat", dense=False):
 21 |         """
 22 |         The base block for constructing DeepGCN model.
 23 |         :param in_features: the input feature dimension.
 24 |         :param out_features: the hidden feature dimension.
 25 |         :param nbaselayer: the number of layers in the base block.
 26 |         :param withbn: using batch normalization in graph convolution.
 27 |         :param withloop: using self feature modeling in graph convolution.
 28 |         :param activation: the activation function, default is ReLu.
 29 |         :param dropout: the dropout ratio.
 30 |         :param aggrmethod: the aggregation function for baseblock, can be "concat" and "add". For "resgcn", the default
 31 |                            is "add", for others the default is "concat".
 32 |         :param dense: enable dense connection
 33 |         """
 34 |         super(GraphBaseBlock, self).__init__()
 35 |         self.in_features = in_features
 36 |         self.hiddendim = out_features
 37 |         self.nhiddenlayer = nbaselayer
 38 |         self.activation = activation
 39 |         self.aggrmethod = aggrmethod
 40 |         self.dense = dense
 41 |         self.dropout = dropout
 42 |         self.hiddenlayers = nn.ModuleList()
 43 |         self.__makehidden()
 44 | 
 45 |         if self.aggrmethod == "concat" and dense == False:
 46 |             self.out_features = in_features + out_features
 47 |         elif self.aggrmethod == "concat" and dense == True:
 48 |             self.out_features = in_features + out_features * nbaselayer
 49 |         elif self.aggrmethod == "add":
 50 |             if in_features != self.hiddendim:
 51 |                 raise RuntimeError("The dimension of in_features and hiddendim should be matched in add model.")
 52 |             self.out_features = out_features
 53 |         elif self.aggrmethod == "nores":
 54 |             self.out_features = out_features
 55 |         else:
 56 |             raise NotImplementedError("The aggregation method only support 'concat','add' and 'nores'.")
 57 | 
 58 |     def __makehidden(self):
 59 |         for i in range(self.nhiddenlayer):
 60 |             if i == 0:
 61 |                 layer = GCNConv(self.in_features, self.hiddendim)
 62 |             else:
 63 |                 layer = GCNConv(self.hiddendim, self.hiddendim)
 64 |             self.hiddenlayers.append(layer)
 65 | 
 66 |     def _doconcat(self, x, subx):
 67 |         if x is None:
 68 |             return subx
 69 |         if self.aggrmethod == "concat":
 70 |             return torch.cat((x, subx), 1)
 71 |         elif self.aggrmethod == "add":
 72 |             return x + subx
 73 |         elif self.aggrmethod == "nores":
 74 |             return x
 75 | 
 76 |     def forward(self, input, edge_index, edge_weight):
 77 |         x = input
 78 |         denseout = None
 79 |         # Here out is the result in all levels.
 80 |         for gc in self.hiddenlayers:
 81 |             denseout = self._doconcat(denseout, x)
 82 |             x = self.activation(gc(x, edge_index, edge_weight))
 83 |             x = F.dropout(x, self.dropout, training=self.training)
 84 | 
 85 |         if not self.dense:
 86 |             return self._doconcat(x, input)
 87 |         return self._doconcat(x, denseout)
 88 | 
 89 |     def get_outdim(self):
 90 |         return self.out_features
 91 | 
 92 |     def __repr__(self):
 93 |         return "%s %s (%d - [%d:%d] > %d)" % (self.__class__.__name__,
 94 |                                               self.aggrmethod,
 95 |                                               self.in_features,
 96 |                                               self.hiddendim,
 97 |                                               self.nhiddenlayer,
 98 |                                               self.out_features)
 99 | 
100 | class DenseGCNBlock(torch.nn.Module):
101 |     """
102 |     The multiple layer GCN with dense connection block.
103 |     """
104 | 
105 |     def __init__(self, in_features, out_features, nbaselayer,
106 |                  withbn=True, withloop=True, activation=F.relu, dropout=True,
107 |                  aggrmethod="concat", dense=True):
108 |         """
109 |         The multiple layer GCN with dense connection block.
110 |         :param in_features: the input feature dimension.
111 |         :param out_features: the hidden feature dimension.
112 |         :param nbaselayer: the number of layers in the base block.
113 |         :param withbn: using batch normalization in graph convolution.
114 |         :param withloop: using self feature modeling in graph convolution.
115 |         :param activation: the activation function, default is ReLu.
116 |         :param dropout: the dropout ratio.
117 |         :param aggrmethod: the aggregation function for the output. For denseblock, default is "concat".
118 |         :param dense: default is True, cannot be changed.
119 |         """
120 |         super(DenseGCNBlock, self).__init__()
121 |         self.model = GraphBaseBlock(in_features=in_features,
122 |                                     out_features=out_features,
123 |                                     nbaselayer=nbaselayer,
124 |                                     withbn=withbn,
125 |                                     withloop=withloop,
126 |                                     activation=activation,
127 |                                     dropout=dropout,
128 |                                     dense=True,
129 |                                     aggrmethod=aggrmethod)
130 | 
131 |     def forward(self, input, edge_index, edge_weight):
132 |         return self.model.forward(input, edge_index, edge_weight)
133 | 
134 |     def get_outdim(self):
135 |         return self.model.get_outdim()
136 | 
137 |     def __repr__(self):
138 |         return "%s %s (%d - [%d:%d] > %d)" % (self.__class__.__name__,
139 |                                               self.aggrmethod,
140 |                                               self.model.in_features,
141 |                                               self.model.hiddendim,
142 |                                               self.model.nhiddenlayer,
143 |                                               self.model.out_features)
144 | 
145 | class JKGCN(torch.nn.Module):
146 | 
147 |     def __init__(self, info):
148 |         super(JKGCN, self).__init__()
149 |         self.info = info
150 |         self.best_score = 0
151 |         self.hist_score = []
152 | 
153 |         self.best_preds = None
154 |         self.current_round_best_preds = None
155 |         self.best_valid_score = 0
156 |         self.max_patience = 100
157 |         self.max_epochs = 1600
158 |         
159 |         self.name = 'JKGCN'
160 | 
161 |         self.tuner = HyperoptTuner(algorithm_name='tpe', optimize_mode='maximize')
162 |         search_space = {
163 |                 "dropedge_rate": {
164 |                     "_type": "choice",
165 |                     "_value": [self.info['dropedge_rate']]
166 |                 },
167 |                 "dropout_rate": {
168 |                     "_type": "choice",
169 |                     "_value": [self.info['dropout_rate']]
170 |                 },
171 |                 "num_layers": {
172 |                     "_type": "quniform",
173 |                     "_value": [2, 4, 1]
174 |                 },
175 |                 "hidden": {
176 |                     "_type": "quniform",
177 |                     "_value": [4, 7, 1]
178 |                 },
179 |                 "lr":{
180 |                     "_type": "choice",
181 |                     "_value": [0.005]
182 |                 }
183 |             }
184 |         self.tuner.update_search_space(search_space)
185 |         self.hyperparameters = {
186 |             'num_layers': self.info['num_layers'],
187 |             'lr': 0.005,
188 |             'dropedge_rate':self.info['dropedge_rate'],
189 |             'dropout_rate':0.5,
190 |             'hidden': self.info['init_hidden_size']
191 |         }
192 |         self.best_hp = None
193 | 
194 |     def init_model(self, n_class, features_num):
195 |         hidden = int(2 ** self.hyperparameters['hidden'])
196 |         num_layers = int(self.hyperparameters['num_layers'])
197 |         self.in_lin = nn.Linear(features_num, hidden)
198 |         self.jk_conv = DenseGCNBlock(hidden, hidden, nbaselayer=num_layers, dropout=self.hyperparameters['dropout_rate'])
199 |         self.out_lin = nn.Linear(self.jk_conv.get_outdim(), n_class)
200 | 
201 |         self.optimizer = torch.optim.Adam(self.parameters(), lr=self.hyperparameters['lr'], weight_decay=5e-4)
202 | 
203 |         self = self.to('cuda')
204 | 
205 |         torch.cuda.empty_cache()
206 | 
207 |     def forward(self, data):
208 |         x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight
209 |         if self.hyperparameters['dropedge_rate'] is not None:
210 |             edge_index, edge_weight = dropout_adj(edge_index, edge_weight, p=self.hyperparameters['dropedge_rate'],\
211 |                  force_undirected=False, num_nodes=None, training=self.training)
212 |         x = self.in_lin(x)
213 |         x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training)
214 |         x = self.jk_conv(x, edge_index, edge_weight)
215 |         x = self.out_lin(x)
216 |         return x
217 | 
218 |     def trial(self, data, round_num):
219 |         n_class, feature_num = self.info['n_class'], data.x.shape[1]
220 |         if round_num >= 2:
221 |             self.hyperparameters = self.tuner.generate_parameters(round_num-1)
222 |         print(self.hyperparameters)    
223 |            
224 |         while True:
225 |             try:
226 |                 self.init_model(n_class, feature_num)
227 |                 val_score = self.train_valid(data, round_num)
228 |                 if round_num > 1:
229 |                     self.tuner.receive_trial_result(round_num-1,self.hyperparameters,val_score)
230 |                 if val_score > self.best_score:
231 |                     self.best_hp = copy.deepcopy(self.hyperparameters)
232 |                 break
233 |             except RuntimeError as e:
234 |                 print(self.name,e, 'OOM with Hidden Size', self.hyperparameters['hidden'])
235 |                 if round_num > 1:
236 |                     self.tuner.receive_trial_result(round_num-1,self.hyperparameters,0)
237 |                 return 0
238 |         print("Best Hyperparameters of", self.name, self.best_hp)
239 |         return val_score
240 | 
241 |     def train_valid(self, data, round_num):
242 |         y, train_mask, valid_mask, test_mask, label_weights = data.y, data.train_mask, data.valid_mask, data.test_mask, data.label_weights
243 | 
244 |         score_meter = AverageMeter()
245 |         patience = self.max_patience
246 |         best_valid_score = 0
247 |         for epoch in range(self.max_epochs):
248 | 
249 |             # train
250 |             self.train()
251 |             self.optimizer.zero_grad()
252 |             preds = self.forward(data)
253 |             loss = F.cross_entropy(preds[train_mask], y[train_mask], label_weights)
254 |             loss.backward()
255 |             self.optimizer.step()
256 | 
257 |             # valid
258 |             self.eval()
259 |             with torch.no_grad():
260 |                 preds = F.softmax(self.forward(data), dim=-1)
261 |                 valid_preds, test_preds = preds[valid_mask], preds[test_mask]
262 |                 valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro')
263 | 
264 |             score_meter.update(valid_score)
265 | 
266 |             # patience
267 |             if score_meter.avg > best_valid_score:
268 |                 best_valid_score = score_meter.avg
269 |                 self.current_round_best_preds = test_preds
270 |                 patience = self.max_patience
271 |             else:
272 |                 patience -= 1
273 | 
274 |             if patience == 0:
275 |                 break
276 | 
277 |         return best_valid_score
278 | 
279 |     def predict(self):
280 |         if self.current_round_best_preds is not None:
281 |             return self.current_round_best_preds.cpu().numpy()
282 |         else:
283 |             return None
284 | 
285 |     def __repr__(self):
286 |         return self.__class__.__name__
287 | 


--------------------------------------------------------------------------------
/code_submission/model_lib/resgcn.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from torch.nn import Linear
  4 | import torch.nn as nn
  5 | from torch_geometric.nn import GCNConv
  6 | from sklearn.metrics import f1_score
  7 | from utils.tools import fix_seed, AverageMeter
  8 | from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner
  9 | from torch_geometric.utils import dropout_adj
 10 | import copy
 11 | fix_seed(1234)
 12 | 
 13 | class GraphBaseBlock(torch.nn.Module):
 14 |     """
 15 |     The base block for Multi-layer GCN / ResGCN / Dense GCN 
 16 |     """
 17 | 
 18 |     def __init__(self, in_features, out_features, nbaselayer,
 19 |                  withbn=True, withloop=True, activation=F.relu, dropout=0.5,
 20 |                  aggrmethod="concat", dense=False):
 21 |         """
 22 |         The base block for constructing DeepGCN model.
 23 |         :param in_features: the input feature dimension.
 24 |         :param out_features: the hidden feature dimension.
 25 |         :param nbaselayer: the number of layers in the base block.
 26 |         :param withbn: using batch normalization in graph convolution.
 27 |         :param withloop: using self feature modeling in graph convolution.
 28 |         :param activation: the activation function, default is ReLu.
 29 |         :param dropout: the dropout ratio.
 30 |         :param aggrmethod: the aggregation function for baseblock, can be "concat" and "add". For "resgcn", the default
 31 |                            is "add", for others the default is "concat".
 32 |         :param dense: enable dense connection
 33 |         """
 34 |         super(GraphBaseBlock, self).__init__()
 35 |         self.in_features = in_features
 36 |         self.hiddendim = out_features
 37 |         self.nhiddenlayer = nbaselayer
 38 |         self.activation = activation
 39 |         self.aggrmethod = aggrmethod
 40 |         self.dense = dense
 41 |         self.dropout = dropout
 42 |         self.hiddenlayers = nn.ModuleList()
 43 |         self.__makehidden()
 44 | 
 45 |         if self.aggrmethod == "concat" and dense == False:
 46 |             self.out_features = in_features + out_features
 47 |         elif self.aggrmethod == "concat" and dense == True:
 48 |             self.out_features = in_features + out_features * nbaselayer
 49 |         elif self.aggrmethod == "add":
 50 |             if in_features != self.hiddendim:
 51 |                 raise RuntimeError("The dimension of in_features and hiddendim should be matched in add model.")
 52 |             self.out_features = out_features
 53 |         elif self.aggrmethod == "nores":
 54 |             self.out_features = out_features
 55 |         else:
 56 |             raise NotImplementedError("The aggregation method only support 'concat','add' and 'nores'.")
 57 | 
 58 |     def __makehidden(self):
 59 |         for i in range(self.nhiddenlayer):
 60 |             if i == 0:
 61 |                 layer = GCNConv(self.in_features, self.hiddendim)
 62 |             else:
 63 |                 layer = GCNConv(self.hiddendim, self.hiddendim)
 64 |             self.hiddenlayers.append(layer)
 65 | 
 66 |     def _doconcat(self, x, subx):
 67 |         if x is None:
 68 |             return subx
 69 |         if self.aggrmethod == "concat":
 70 |             return torch.cat((x, subx), 1)
 71 |         elif self.aggrmethod == "add":
 72 |             return x + subx
 73 |         elif self.aggrmethod == "nores":
 74 |             return x
 75 | 
 76 |     def forward(self, input, edge_index, edge_weight):
 77 |         x = input
 78 |         denseout = None
 79 |         # Here out is the result in all levels.
 80 |         for gc in self.hiddenlayers:
 81 |             denseout = self._doconcat(denseout, x)
 82 |             x = self.activation(gc(x, edge_index, edge_weight))
 83 |             x = F.dropout(x, self.dropout, training=self.training)
 84 | 
 85 |         if not self.dense:
 86 |             return self._doconcat(x, input)
 87 |         return self._doconcat(x, denseout)
 88 | 
 89 |     def get_outdim(self):
 90 |         return self.out_features
 91 | 
 92 |     def __repr__(self):
 93 |         return "%s %s (%d - [%d:%d] > %d)" % (self.__class__.__name__,
 94 |                                               self.aggrmethod,
 95 |                                               self.in_features,
 96 |                                               self.hiddendim,
 97 |                                               self.nhiddenlayer,
 98 |                                               self.out_features)
 99 | 
100 | class ResGCNBlock(torch.nn.Module):
101 |     """
102 |     The multiple layer GCN with residual connection block.
103 |     """
104 | 
105 |     def __init__(self, in_features, out_features, nbaselayer,
106 |                  withbn=True, withloop=True, activation=F.relu, dropout=True,
107 |                  aggrmethod=None, dense=None):
108 |         """
109 |         The multiple layer GCN with residual connection block.
110 |         :param in_features: the input feature dimension.
111 |         :param out_features: the hidden feature dimension.
112 |         :param nbaselayer: the number of layers in the base block.
113 |         :param withbn: using batch normalization in graph convolution.
114 |         :param withloop: using self feature modeling in graph convolution.
115 |         :param activation: the activation function, default is ReLu.
116 |         :param dropout: the dropout ratio.
117 |         :param aggrmethod: not applied.
118 |         :param dense: not applied.
119 |         """
120 |         super(ResGCNBlock, self).__init__()
121 |         self.model = GraphBaseBlock(in_features=in_features,
122 |                                     out_features=out_features,
123 |                                     nbaselayer=nbaselayer,
124 |                                     withbn=withbn,
125 |                                     withloop=withloop,
126 |                                     activation=activation,
127 |                                     dropout=dropout,
128 |                                     dense=False,
129 |                                     aggrmethod="add")
130 | 
131 |     def forward(self, input, edge_index, edge_weight):
132 |         return self.model.forward(input, edge_index, edge_weight)
133 | 
134 |     def get_outdim(self):
135 |         return self.model.get_outdim()
136 | 
137 |     def __repr__(self):
138 |         return "%s %s (%d - [%d:%d] > %d)" % (self.__class__.__name__,
139 |                                               self.aggrmethod,
140 |                                               self.model.in_features,
141 |                                               self.model.hiddendim,
142 |                                               self.model.nhiddenlayer,
143 |                                               self.model.out_features)
144 | 
145 | class ResGCN(torch.nn.Module):
146 | 
147 |     def __init__(self, info):
148 |         super(ResGCN, self).__init__()
149 |         self.info = info
150 |         self.best_score = 0
151 |         self.hist_score = []
152 | 
153 |         self.best_preds = None
154 |         self.current_round_best_preds = None
155 |         self.best_valid_score = 0
156 |         self.max_patience = 100
157 |         self.max_epochs = 1600
158 |         
159 |         self.name = 'ResGCN'
160 | 
161 |         self.tuner = HyperoptTuner(algorithm_name='tpe', optimize_mode='maximize')
162 |         search_space = {
163 |                 "dropedge_rate": {
164 |                     "_type": "choice",
165 |                     "_value": [self.info['dropedge_rate']]
166 |                 },
167 |                 "dropout_rate": {
168 |                     "_type": "choice",
169 |                     "_value": [self.info['dropout_rate']]
170 |                 },
171 |                 "num_layers": {
172 |                     "_type": "quniform",
173 |                     "_value": [2, 4, 1]
174 |                 },
175 |                 "hidden": {
176 |                     "_type": "quniform",
177 |                     "_value": [4, 7, 1]
178 |                 },
179 |                 "lr":{
180 |                     "_type": "choice",
181 |                     "_value": [0.005]
182 |                 }
183 |             }
184 |         self.tuner.update_search_space(search_space)
185 |         self.hyperparameters = {
186 |             'num_layers': self.info['num_layers'],
187 |             'lr': 0.005,
188 |             'dropedge_rate':self.info['dropedge_rate'],
189 |             'dropout_rate':0.5,
190 |             'hidden': self.info['init_hidden_size']
191 |         }
192 |         self.best_hp = {
193 |             'num_layers': self.info['num_layers'],
194 |             'lr': 0.005,
195 |             'dropedge_rate':self.info['dropedge_rate'],
196 |             'dropout_rate':0.5,
197 |             'hidden': self.info['init_hidden_size']
198 |         }
199 | 
200 |     def init_model(self, n_class, features_num):
201 |         hidden = int(2 ** self.hyperparameters['hidden'])
202 |         num_layers = int(self.hyperparameters['num_layers'])
203 |         self.in_lin = nn.Linear(features_num, hidden)
204 |         self.res_conv = ResGCNBlock(hidden, hidden, nbaselayer=num_layers, dropout=self.hyperparameters['dropout_rate'])
205 |         self.out_lin = nn.Linear(self.res_conv.get_outdim(), n_class)
206 | 
207 |         self.optimizer = torch.optim.Adam(self.parameters(), lr=self.hyperparameters['lr'], weight_decay=5e-4)
208 | 
209 |         self = self.to('cuda')
210 | 
211 |         torch.cuda.empty_cache()
212 | 
213 |     def forward(self, data):
214 |         x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight
215 |         if self.hyperparameters['dropedge_rate'] is not None:
216 |             edge_index, edge_weight = dropout_adj(edge_index, edge_weight, p=self.hyperparameters['dropedge_rate'],\
217 |                  force_undirected=False, num_nodes=None, training=self.training)
218 |         x = self.in_lin(x)
219 |         x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training)
220 |         x = self.res_conv(x, edge_index, edge_weight)
221 |         x = self.out_lin(x)
222 |         return x
223 | 
224 |     def trial(self, data, round_num):
225 |         n_class, feature_num = self.info['n_class'], data.x.shape[1]
226 |         if round_num >= 2:
227 |             self.hyperparameters = self.tuner.generate_parameters(round_num-1)
228 |         print(self.hyperparameters)    
229 |            
230 |         while True:
231 |             try:
232 |                 self.init_model(n_class, feature_num)
233 |                 val_score = self.train_valid(data, round_num)
234 |                 if round_num > 1:
235 |                     self.tuner.receive_trial_result(round_num-1,self.hyperparameters,val_score)
236 |                 if val_score > self.best_score:
237 |                     self.best_hp = copy.deepcopy(self.hyperparameters)
238 |                 break
239 |             except RuntimeError as e:
240 |                 print(self.name,e, 'OOM with Hidden Size', self.hyperparameters['hidden'])
241 |                 if round_num > 1:
242 |                     self.tuner.receive_trial_result(round_num-1,self.hyperparameters,0)
243 |                 return 0
244 |         print("Best Hyperparameters of", self.name, self.best_hp)
245 |         return val_score
246 | 
247 |     def train_valid(self, data, round_num):
248 |         y, train_mask, valid_mask, test_mask, label_weights = data.y, data.train_mask, data.valid_mask, data.test_mask, data.label_weights
249 | 
250 |         score_meter = AverageMeter()
251 |         patience = self.max_patience
252 |         best_valid_score = 0
253 |         for epoch in range(self.max_epochs):
254 | 
255 |             # train
256 |             self.train()
257 |             self.optimizer.zero_grad()
258 |             preds = self.forward(data)
259 |             loss = F.cross_entropy(preds[train_mask], y[train_mask], label_weights)
260 |             loss.backward()
261 |             self.optimizer.step()
262 | 
263 |             # valid
264 |             self.eval()
265 |             with torch.no_grad():
266 |                 preds = F.softmax(self.forward(data), dim=-1)
267 |                 valid_preds, test_preds = preds[valid_mask], preds[test_mask]
268 |                 valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro')
269 | 
270 |             score_meter.update(valid_score)
271 | 
272 |             # patience
273 |             if score_meter.avg > best_valid_score:
274 |                 best_valid_score = score_meter.avg
275 |                 self.current_round_best_preds = test_preds
276 |                 patience = self.max_patience
277 |             else:
278 |                 patience -= 1
279 | 
280 |             if patience == 0:
281 |                 break
282 | 
283 |         return best_valid_score
284 | 
285 |     def predict(self):
286 |         if self.current_round_best_preds is not None:
287 |             return self.current_round_best_preds.cpu().numpy()
288 |         else:
289 |             return None
290 | 
291 |     def __repr__(self):
292 |         return self.__class__.__name__
293 | 


--------------------------------------------------------------------------------
/code_submission/model_lib/sg.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from torch.nn import Linear
  4 | from torch_geometric.nn import SGConv
  5 | import copy
  6 | from sklearn.metrics import f1_score
  7 | from utils.tools import fix_seed,AverageMeter
  8 | from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner
  9 | from torch_geometric.utils import dropout_adj
 10 | import random
 11 | 
 12 | fix_seed(1234)
 13 | class SG(torch.nn.Module):
 14 | 
 15 |     def __init__(self, info):
 16 |         super(SG, self).__init__()
 17 | 
 18 |         self.info = info
 19 |         self.best_score = 0
 20 |         self.hist_score = []
 21 |         self.best_preds = None
 22 |         self.current_round_best_preds = None
 23 |         self.max_patience = 100
 24 |         self.max_epochs = 1600
 25 |         self.name = 'SG'
 26 | 
 27 |         self.hyperparameters = {
 28 |             'num_layers': self.info['num_layers'],
 29 |             'lr': 0.005,
 30 |             'K': 3,
 31 |             'dropedge_rate':self.info['dropedge_rate'],
 32 |             'dropout_rate':0.5,
 33 |             'hidden': self.info['init_hidden_size'],
 34 |             'use_linear':self.info['use_linear']
 35 |         }
 36 | 
 37 |         self.best_hp =  None
 38 |         self.tuner = HyperoptTuner(algorithm_name='tpe', optimize_mode='maximize')
 39 |         search_space = {
 40 |                 "dropedge_rate": {
 41 |                     "_type": "choice",
 42 |                     "_value": [self.info['dropedge_rate']]
 43 |                 },
 44 |                 "dropout_rate": {
 45 |                     "_type": "choice",
 46 |                     "_value": [self.info['dropout_rate']]
 47 |                 },
 48 |                 "num_layers": {
 49 |                     "_type": "quniform",
 50 |                     "_value": [1, 3, 1]
 51 |                 },
 52 |                 "hidden": {
 53 |                     "_type": "quniform",
 54 |                     "_value": [4, 7, 1]
 55 |                 },
 56 |                 "lr":{
 57 |                     "_type": "choice",
 58 |                     "_value": [0.005]
 59 |                 },
 60 |                 'K' :{
 61 |                     "_type": "quniform",
 62 |                     "_value": [1, 6, 1]
 63 |                 },
 64 |                 "use_linear":{
 65 |                     "_type": "choice",
 66 |                     "_value": [True,False]
 67 |                 }
 68 |             }
 69 |         self.tuner.update_search_space(search_space)
 70 | 
 71 |     def init_model(self, n_class, feature_num):
 72 |         num_layers = int(self.hyperparameters['num_layers'])
 73 |         hidden_size = int(2 ** self.hyperparameters['hidden'])
 74 |         K = int(self.hyperparameters['K'])
 75 |         lr = self.hyperparameters['lr']
 76 |         if self.hyperparameters['use_linear']:
 77 |             self.input_lin = Linear(feature_num, hidden_size)
 78 |             self.convs = torch.nn.ModuleList()
 79 |             for i in range(num_layers):
 80 |                 self.convs.append(SGConv(in_channels=hidden_size, out_channels=hidden_size, K=K, cached=False))
 81 |             self.output_lin = Linear(hidden_size, n_class)
 82 |         else:
 83 |             if num_layers == 1:
 84 |                 self.conv1 = SGConv(in_channels=feature_num, out_channels=n_class, K=K, cached=False)
 85 |             else:    
 86 |                 self.conv1 = SGConv(in_channels=feature_num, out_channels=hidden_size, K=K, cached=False)
 87 |                 self.convs = torch.nn.ModuleList()
 88 |                 for i in range(num_layers - 2):
 89 |                     self.convs.append(SGConv(in_channels=hidden_size, out_channels=hidden_size, K=K, cached=False))
 90 |                 self.conv2 = SGConv(in_channels=hidden_size, out_channels=n_class, K=K, cached=False)
 91 | 
 92 |         self.optimizer = torch.optim.Adam(self.parameters(), lr=lr, weight_decay=5e-4)
 93 | 
 94 |         self = self.to('cuda')
 95 | 
 96 |         torch.cuda.empty_cache()
 97 | 
 98 |     def forward(self, data):
 99 |         x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight
100 |         if self.hyperparameters['dropedge_rate'] is not None:
101 |             edge_index, edge_weight = dropout_adj(edge_index, edge_weight, p=self.hyperparameters['dropedge_rate'],\
102 |                  force_undirected=False, num_nodes=None, training=self.training)
103 |         
104 |         if self.hyperparameters['use_linear']:
105 |             x = F.relu(self.input_lin(x))
106 |         else:
107 |             x = F.relu(self.conv1(x, edge_index,edge_weight))
108 |             if self.hyperparameters['num_layers'] == 1:
109 |                 return x
110 |         x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training)
111 |         for conv in self.convs:
112 |             x = F.relu(conv(x, edge_index, edge_weight=edge_weight))
113 |             x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training)
114 |         if self.hyperparameters['use_linear']:
115 |             x = self.output_lin(x)
116 |         else:
117 |             x = self.conv2(x, edge_index,edge_weight)
118 |         return x
119 | 
120 |     def trial(self, data, round_num):
121 |         n_class, feature_num = self.info['n_class'], data.x.shape[1]
122 |         if round_num >= 2:
123 |             self.hyperparameters = self.tuner.generate_parameters(round_num-1)
124 |         print(self.hyperparameters)    
125 |            
126 |         while True:
127 |             try:
128 |                 self.init_model(n_class, feature_num)
129 |                 val_score = self.train_valid(data, round_num)
130 |                 if round_num > 1:
131 |                     self.tuner.receive_trial_result(round_num-1,self.hyperparameters,val_score)
132 |                 if val_score > self.best_score:
133 |                     self.best_hp = copy.deepcopy(self.hyperparameters)
134 |                 break
135 |             except RuntimeError as e:
136 |                 print(self.name,e, 'OOM with Hidden Size', self.hyperparameters['hidden'])
137 |                 if round_num > 1:
138 |                     self.tuner.receive_trial_result(round_num-1,self.hyperparameters,0)
139 |                 return 0
140 |         print("Best Hyperparameters of", self.name, self.best_hp)
141 |         return val_score
142 | 
143 |     def train_valid(self, data, round_num):
144 |         y, train_mask, valid_mask, test_mask, label_weights = data.y, data.train_mask, data.valid_mask, data.test_mask, data.label_weights
145 |         patience = self.max_patience
146 |         best_valid_score = 0
147 |         valid_acc_meter = AverageMeter()
148 |         for epoch in range(self.max_epochs):
149 | 
150 |             # train
151 |             self.train()
152 |             self.optimizer.zero_grad()
153 |             preds = self.forward(data)
154 |             loss = F.cross_entropy(preds[train_mask], y[train_mask], label_weights)
155 |             loss.backward()
156 |             self.optimizer.step()
157 | 
158 |             # valid
159 |             self.eval()
160 |             with torch.no_grad():
161 |                 preds = F.softmax(self.forward(data), dim=-1)
162 |                 valid_preds, test_preds = preds[valid_mask], preds[test_mask]
163 |                 valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro')
164 |             valid_acc_meter.update(valid_score)
165 |             # patience
166 |             if valid_acc_meter.avg > best_valid_score:
167 |                 best_valid_score = valid_acc_meter.avg
168 |                 self.current_round_best_preds = test_preds
169 |                 patience = self.max_patience
170 |             else:
171 |                 patience -= 1
172 | 
173 |             if patience == 0:
174 |                 break
175 | 
176 |         return best_valid_score
177 | 
178 |     def predict(self):
179 |         return self.current_round_best_preds.cpu().numpy()
180 | 
181 |     def __repr__(self):
182 |         return self.__class__.__name__


--------------------------------------------------------------------------------
/code_submission/model_lib/tag.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from torch.nn import Linear
  4 | from torch_geometric.nn import TAGConv
  5 | import copy
  6 | from sklearn.metrics import f1_score
  7 | from utils.tools import fix_seed, AverageMeter
  8 | from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner
  9 | from torch_geometric.utils import dropout_adj
 10 | import random
 11 | 
 12 | fix_seed(1234)
 13 | class TAG(torch.nn.Module):
 14 | 
 15 |     def __init__(self, info):
 16 |         super(TAG, self).__init__()
 17 | 
 18 |         self.info = info
 19 |         self.hyperparameters ={
 20 |             'num_layers': self.info['num_layers'],
 21 |             'lr': self.info['lr'],
 22 |             'dropedge_rate': self.info['dropedge_rate'],
 23 |             'dropout_rate': self.info['dropout_rate'],
 24 |             'K': 3,
 25 |             'hidden': self.info['init_hidden_size'],
 26 |             'use_linear': self.info['use_linear']
 27 |         }
 28 | 
 29 |         self.best_score = 0
 30 |         self.hist_score = []
 31 |         self.best_preds = None
 32 |         self.current_round_best_preds = None
 33 |         self.best_valid_score = 0
 34 |         self.max_patience = 100
 35 |         self.max_epochs = 1600
 36 |         
 37 |         self.name = 'TAG'
 38 |         self.best_hp = None
 39 |         self.tuner = HyperoptTuner(algorithm_name='tpe', optimize_mode='maximize')
 40 |         search_space = {
 41 |                 "dropedge_rate": {
 42 |                     "_type": "choice",
 43 |                     "_value": [self.info['dropedge_rate']]
 44 |                 },
 45 |                 "dropout_rate": {
 46 |                     "_type": "choice",
 47 |                     "_value": [self.info['dropout_rate']]
 48 |                 },
 49 |                 "num_layers": {
 50 |                     "_type": "quniform",
 51 |                     "_value": [1, 3, 1]
 52 |                 },
 53 |                 "hidden": {
 54 |                     "_type": "quniform",
 55 |                     "_value": [4, 7, 1]
 56 |                 },
 57 |                 "lr":{
 58 |                     "_type": "choice",
 59 |                     "_value": [0.005]
 60 |                 },
 61 |                 'K' :{
 62 |                     "_type": "quniform",
 63 |                     "_value": [1, 6, 1]
 64 |                 },
 65 |                 "use_linear":{
 66 |                     "_type": "choice",
 67 |                     "_value": [True, False]
 68 |                 }
 69 |             }
 70 |         self.tuner.update_search_space(search_space)
 71 | 
 72 |     def init_model(self, n_class, feature_num):
 73 |         num_layers = int(self.hyperparameters['num_layers'])
 74 |         hidden_size = int(2 ** self.hyperparameters['hidden'])
 75 |         lr = self.hyperparameters['lr']
 76 |         K = int(self.hyperparameters['K'])
 77 | 
 78 |         if self.hyperparameters['use_linear']:
 79 |             self.input_lin = Linear(feature_num, hidden_size)
 80 |             self.convs = torch.nn.ModuleList()
 81 |             for i in range(num_layers):
 82 |                 self.convs.append(TAGConv(in_channels=hidden_size, out_channels=hidden_size, K=K))
 83 |             self.output_lin = Linear(hidden_size, n_class)
 84 |         else:
 85 |             if num_layers == 1:
 86 |                 self.conv1 = TAGConv(in_channels=feature_num, out_channels=n_class, K=K)
 87 |             else:    
 88 |                 self.conv1 = TAGConv(in_channels=feature_num, out_channels=hidden_size, K=K)
 89 |                 self.convs = torch.nn.ModuleList()
 90 |                 for i in range(num_layers - 2):
 91 |                     self.convs.append(TAGConv(in_channels=hidden_size, out_channels=hidden_size, K=K))
 92 |                 self.conv2 = TAGConv(in_channels=hidden_size, out_channels=n_class, K=K)
 93 | 
 94 |         self.optimizer = torch.optim.Adam(self.parameters(), lr=lr, weight_decay=5e-4)
 95 | 
 96 |         self = self.to('cuda')
 97 | 
 98 |         torch.cuda.empty_cache()
 99 | 
100 |     def forward(self, data):
101 |         x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight
102 |         if self.hyperparameters['dropedge_rate'] is not None:
103 |             edge_index, edge_weight = dropout_adj(edge_index, edge_weight, p=self.hyperparameters['dropedge_rate'],\
104 |                  force_undirected=False, num_nodes=None, training=self.training)
105 |         
106 |         if self.hyperparameters['use_linear']:
107 |             x = F.relu(self.input_lin(x))
108 |         else:
109 |             x = F.relu(self.conv1(x, edge_index,edge_weight))
110 |             if self.hyperparameters['num_layers'] == 1:
111 |                 return x
112 |         x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training)
113 |         for conv in self.convs:
114 |             x = F.relu(conv(x, edge_index, edge_weight=edge_weight))
115 |             x = F.dropout(x, p=self.hyperparameters['dropout_rate'], training=self.training)
116 |         if self.hyperparameters['use_linear']:
117 |             x = self.output_lin(x)
118 |         else:
119 |             x = self.conv2(x, edge_index,edge_weight)
120 |         return x
121 | 
122 |     def trial(self, data, round_num):
123 |         n_class, feature_num = self.info['n_class'], data.x.shape[1]
124 |         if round_num >= 2:
125 |             self.hyperparameters = self.tuner.generate_parameters(round_num-1)
126 |         print(self.hyperparameters)    
127 |            
128 |         while True:
129 |             try:
130 |                 self.init_model(n_class, feature_num)
131 |                 val_score = self.train_valid(data, round_num)
132 |                 if round_num > 1:
133 |                     self.tuner.receive_trial_result(round_num-1,self.hyperparameters,val_score)
134 |                 if val_score > self.best_score:
135 |                     self.best_hp = copy.deepcopy(self.hyperparameters)
136 |                 break
137 |             except RuntimeError as e:
138 |                 print(self.name,e, 'OOM with Hidden Size', self.hyperparameters['hidden'])
139 |                 if round_num > 1:
140 |                     self.tuner.receive_trial_result(round_num-1,self.hyperparameters,0)
141 |                 return 0
142 |         print("Best Hyperparameters of", self.name, self.best_hp)
143 |         return val_score
144 | 
145 |     def train_valid(self, data, round_num):
146 |         y, train_mask, valid_mask, test_mask, label_weights = data.y, data.train_mask, data.valid_mask, data.test_mask, data.label_weights
147 |         patience = self.max_patience
148 |         best_valid_score = 0
149 |         valid_acc_meter = AverageMeter()
150 |         for epoch in range(self.max_epochs):
151 | 
152 |             # train
153 |             self.train()
154 |             self.optimizer.zero_grad()
155 |             preds = self.forward(data)
156 |             loss = F.cross_entropy(preds[train_mask], y[train_mask], label_weights)
157 |             loss.backward()
158 |             self.optimizer.step()
159 | 
160 |             # valid
161 |             self.eval()
162 |             with torch.no_grad():
163 |                 preds = F.softmax(self.forward(data), dim=-1)
164 |                 valid_preds, test_preds = preds[valid_mask], preds[test_mask]
165 |                 valid_score = f1_score(y[valid_mask].cpu(), valid_preds.max(1)[1].flatten().cpu(), average='micro')
166 |             valid_acc_meter.update(valid_score)
167 |             # patience
168 |             if valid_acc_meter.avg > best_valid_score:
169 |                 best_valid_score = valid_acc_meter.avg
170 |                 self.current_round_best_preds = test_preds
171 |                 patience = self.max_patience
172 |             else:
173 |                 patience -= 1
174 | 
175 |             if patience == 0:
176 |                 break
177 | 
178 |         return best_valid_score
179 | 
180 |     def predict(self):
181 |         return self.current_round_best_preds.cpu().numpy()
182 | 
183 |     def __repr__(self):
184 |         return self.__class__.__name__


--------------------------------------------------------------------------------
/code_submission/model_space.py:
--------------------------------------------------------------------------------
 1 | from model_lib import *
 2 | 
 3 | class ModelSpace:
 4 |     """
 5 |     Model space which contains all the base models in model_lib
 6 |     Parameters:
 7 |     ----------
 8 |     info: dict
 9 |         The eda infomation generated by AutoEDA
10 |     ----------
11 |     """
12 |     def __init__(self, info):
13 |         self.info = info
14 | 
15 |         self.model_prior = self.info['chosen_models']
16 | 
17 |         self.model_lib = {
18 |             'GraphConvNet': GraphConvNet,
19 |             'GraphSAGE': GraphSAGE,
20 |             'GAT': GAT,
21 |             'GCN': GCN,
22 |             'APPNP': APPNPNet,
23 |             'ARMA': ARMA,
24 |             'GatedGraph': GatedGraphNet,
25 |             'GIN': GIN,
26 |             'SG': SG,
27 |             'TAG': TAG,
28 |             'IncepGCN': IncepGCN,
29 |             'ResGCN': ResGCN,
30 |             'JKGCN': JKGCN
31 |         }
32 | 
33 |     def get_models(self):
34 |         ret = {}
35 |         for model_name in self.model_prior:
36 |             ret[model_name] = self.model_lib[model_name](self.info)
37 |         return ret
38 | 


--------------------------------------------------------------------------------
/code_submission/timer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import time
 4 | import threading
 5 | import signal
 6 | 
 7 | time_budget, pid = int(sys.argv[1]), int(sys.argv[2])
 8 | 
 9 | def raise_timeout_exception(pid_to_kill):
10 |     """
11 |     Helper function to inform the main process
12 |     that time has ran out.
13 |     Parameters:
14 |     ----------
15 |     pid_to_kill: int
16 |         the pid of main process
17 |     ----------
18 |     """
19 |     os.kill(pid_to_kill, signal.SIGTSTP)
20 | 
21 | # start a timer for timing.
22 | timer = threading.Timer(time_budget, raise_timeout_exception, [pid])
23 | timer.start()
24 | 


--------------------------------------------------------------------------------
/code_submission/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JunweiSUN/AutoGL/e1743e4571d88889ce87c14cb3bce63a0a2a505d/code_submission/utils/__init__.py


--------------------------------------------------------------------------------
/code_submission/utils/eda.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import sys
  4 | import gc
  5 | import torch
  6 | from .tools import fix_seed
  7 | from torch_geometric.utils import is_undirected
  8 | fix_seed(1234)
  9 | class AutoEDA(object):
 10 |     """
 11 |     A tool box for Exploratory Data Analysis (EDA)
 12 |     Parameters:
 13 |     ----------
 14 |     n_class: int
 15 |         number of classes
 16 |     ----------
 17 |     """
 18 |     def __init__(self, n_class):
 19 |         self.info = {'n_class': n_class}
 20 | 
 21 |     def get_info(self, data):
 22 |         self.get_feature_info(data['fea_table'])
 23 |         self.get_edge_info(data['edge_file'])
 24 |         self.set_priori_knowledges()
 25 |         self.get_label_weights(data, reweighting=True)
 26 |         return self.info
 27 | 
 28 |     def get_feature_info(self, df):
 29 |         """
 30 |         Get information of the original node features: number of nodes, number of features, etc.
 31 |         Remove those features which have only one value.
 32 |         """
 33 |         unique_counts = df.nunique()
 34 |         unique_counts = unique_counts[unique_counts == 1]
 35 |         df.drop(unique_counts.index, axis=1, inplace=True)
 36 | 
 37 |         self.info['num_nodes'] = df.shape[0]
 38 |         self.info['num_features'] = df.shape[1] - 1
 39 | 
 40 |         print('Number of Nodes:', self.info['num_nodes'])
 41 |         print('Number of Original Features:', self.info['num_features'])
 42 | 
 43 |     def get_edge_info(self, df):
 44 |         """
 45 |         Get information of the edges: number of edges, if weighted, if directed, Max / Min weight, etc.
 46 |         """
 47 |         self.info['num_edges'] = df.shape[0]
 48 |         min_weight, max_weight = df['edge_weight'].min(), df['edge_weight'].max()
 49 |         if min_weight != max_weight:
 50 |             self.info['weighted'] = True
 51 |         else:
 52 |             self.info['weighted'] = False
 53 | 
 54 |         edge_index = df[['src_idx', 'dst_idx']].to_numpy()
 55 |         edge_index = sorted(edge_index, key=lambda d: d[0])
 56 |         edge_index = torch.tensor(edge_index, dtype=torch.long).transpose(0, 1)
 57 | 
 58 |         self.info['directed'] = not is_undirected(edge_index, num_nodes=self.info['num_nodes'])
 59 | 
 60 |         print('Number of Edges:', self.info['num_edges'])
 61 |         print('Is Directed Graph:', self.info['directed'])
 62 |         print('Is Weighted Graph:',self.info['weighted'])
 63 |         print('Max Weight:', max_weight, 'Min Weight:', min_weight)
 64 | 
 65 |     def set_priori_knowledges(self):
 66 |         """
 67 |         Set some hyper parameters to their initial value according to some priori knowledges.
 68 |         """
 69 |         if self.info['num_features'] == 0:
 70 |             if self.info['directed']:
 71 |                 self.info['dropedge_rate'] = 0.5
 72 |                 self.info['chosen_models'] = ['ResGCN', 'GraphConvNet', 'GraphSAGE']
 73 |                 self.info['ensemble_threshold'] = 0.01
 74 |             else:
 75 |                 self.info['dropedge_rate'] = 0
 76 |                 self.info['chosen_models'] = ['GraphConvNet','GIN','GraphSAGE']
 77 |                 self.info['ensemble_threshold'] = 0.01
 78 | 
 79 |         else:
 80 |             if self.info['directed']:
 81 |                 self.info['dropedge_rate'] = 0.5
 82 |                 self.info['chosen_models'] = ['GraphConvNet','GraphSAGE','ResGCN']
 83 |                 self.info['ensemble_threshold'] = 0.02
 84 |             else:
 85 |                 if self.info['num_edges'] / self.info['num_nodes']>= 10:
 86 |                     self.info['dropedge_rate'] = 0.5
 87 |                     self.info['chosen_models'] = ['ARMA','GraphSAGE', 'IncepGCN']
 88 |                     self.info['ensemble_threshold'] = 0.02
 89 |                 else:
 90 |                     self.info['dropedge_rate'] = 0.5
 91 |                     self.info['chosen_models'] = ['ARMA','IncepGCN','GraphConvNet','SG']
 92 |                     self.info['ensemble_threshold'] = 0.03
 93 | 
 94 |         if  self.info['num_edges'] / self.info['num_nodes'] >= 200:
 95 |             self.info['num_layers'] = 1
 96 |             self.info['init_hidden_size'] = 5
 97 |         elif self.info['num_edges'] / self.info['num_nodes'] >= 100:
 98 |             self.info['num_layers'] = 2
 99 |             self.info['init_hidden_size'] = 5
100 |         else:
101 |             self.info['num_layers'] = 2
102 |             self.info['init_hidden_size'] = 7
103 | 
104 |         if self.info['num_edges'] / self.info['num_nodes'] >= 10:
105 |             self.info['use_linear'] = True
106 |             self.info['dropout_rate'] = 0.2
107 |         else:
108 |             self.info['use_linear'] = False
109 |             self.info['dropout_rate'] = 0.5 
110 | 
111 |         self.info['lr'] = 0.005
112 | 
113 |         if self.info['num_features'] == 0:
114 |             self.info['feature_type'] = ['svd']  # one_hot / svd / degree / node2vec / adj
115 |         else:
116 |             self.info['feature_type'] = ['original', 'svd']
117 | 
118 |         self.info['normalize_features'] = 'None'
119 | 
120 |     def get_label_weights(self, data, reweighting=True):
121 |         """
122 |         Compute the weights of labels as the weight when computing loss.
123 |         """
124 |         if not reweighting:
125 |             self.info['label_weights'] = None
126 |             return
127 | 
128 |         groupby_data_orginal = data['train_label'].groupby('label').count()
129 |         label_weights = groupby_data_orginal.iloc[:,0]
130 |         
131 |         if len(label_weights) < 10 or max(label_weights) < min(label_weights) * 10:
132 |             self.info['label_weights'] = None
133 |             return
134 | 
135 |         label_weights = 1 / np.sqrt(label_weights)
136 |         self.info['label_weights'] = torch.tensor(label_weights.values,dtype=torch.float32)
137 |         print('Label Weights:', self.info['label_weights'])
138 | 
139 | 
140 | 
141 | 
142 | 
143 | 


--------------------------------------------------------------------------------
/code_submission/utils/tools.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | import torch
 4 | 
 5 | def fix_seed(seed):
 6 |     """
 7 |     Fix all the random seeds.
 8 |     """
 9 |     random.seed(seed)
10 |     np.random.seed(seed)
11 |     torch.manual_seed(seed)
12 |     torch.cuda.manual_seed_all(seed)
13 |     torch.backends.cudnn.deterministic = True
14 |     torch.backends.cudnn.enabled = False
15 | 
16 | class AverageMeter(object):
17 |     """
18 |     Compute and store the current value and the average value in a momentum-line way.
19 |     """
20 |     def __init__(self):
21 |         self.val, self.avg, self.sum, self.count = 0, 0, 0, 0
22 |         
23 |     def update(self, val, factor=0.1, n=1): # factor like momentum
24 |         self.val = val
25 |         self.avg = self.val*factor + self.avg*(1-factor)
26 |         self.sum += val * n
27 |         self.count += n
28 |         if self.count == 0:
29 |             self.avg = self.val
30 | 


--------------------------------------------------------------------------------
/data/demo/test_label.tsv:
--------------------------------------------------------------------------------
  1 | node_index	label
  2 | 4	3
  3 | 10	0
  4 | 15	3
  5 | 17	3
  6 | 20	5
  7 | 38	4
  8 | 48	3
  9 | 61	0
 10 | 69	6
 11 | 75	4
 12 | 84	4
 13 | 90	2
 14 | 91	4
 15 | 92	6
 16 | 102	1
 17 | 106	6
 18 | 108	6
 19 | 114	5
 20 | 119	5
 21 | 121	1
 22 | 125	5
 23 | 130	5
 24 | 133	1
 25 | 138	1
 26 | 144	6
 27 | 145	6
 28 | 148	4
 29 | 156	2
 30 | 161	0
 31 | 167	3
 32 | 175	6
 33 | 191	5
 34 | 194	4
 35 | 203	4
 36 | 210	4
 37 | 211	0
 38 | 217	4
 39 | 223	2
 40 | 224	4
 41 | 226	3
 42 | 230	3
 43 | 232	3
 44 | 240	3
 45 | 241	5
 46 | 248	4
 47 | 255	4
 48 | 256	3
 49 | 257	0
 50 | 262	6
 51 | 264	5
 52 | 269	3
 53 | 278	3
 54 | 282	5
 55 | 292	0
 56 | 301	2
 57 | 310	3
 58 | 324	0
 59 | 326	2
 60 | 330	4
 61 | 337	2
 62 | 338	3
 63 | 341	2
 64 | 343	3
 65 | 344	5
 66 | 346	2
 67 | 348	4
 68 | 350	1
 69 | 352	3
 70 | 358	2
 71 | 365	0
 72 | 369	4
 73 | 374	3
 74 | 386	3
 75 | 409	1
 76 | 430	4
 77 | 442	4
 78 | 446	0
 79 | 447	6
 80 | 451	5
 81 | 458	5
 82 | 488	4
 83 | 490	5
 84 | 500	3
 85 | 506	6
 86 | 507	3
 87 | 516	5
 88 | 517	5
 89 | 519	1
 90 | 526	2
 91 | 528	5
 92 | 546	3
 93 | 553	3
 94 | 555	3
 95 | 562	2
 96 | 563	1
 97 | 564	2
 98 | 566	3
 99 | 568	3
100 | 569	2
101 | 574	3
102 | 577	4
103 | 579	3
104 | 587	5
105 | 588	4
106 | 591	0
107 | 595	3
108 | 600	6
109 | 601	3
110 | 604	6
111 | 606	0
112 | 610	5
113 | 611	6
114 | 614	0
115 | 615	2
116 | 616	2
117 | 620	3
118 | 626	2
119 | 630	3
120 | 632	2
121 | 638	4
122 | 639	3
123 | 640	3
124 | 649	3
125 | 652	4
126 | 654	4
127 | 656	1
128 | 660	1
129 | 671	5
130 | 675	4
131 | 682	6
132 | 683	1
133 | 684	2
134 | 686	2
135 | 690	3
136 | 700	3
137 | 701	5
138 | 707	0
139 | 711	0
140 | 713	3
141 | 717	0
142 | 720	5
143 | 724	6
144 | 729	3
145 | 742	2
146 | 744	2
147 | 756	3
148 | 767	0
149 | 768	3
150 | 769	0
151 | 770	3
152 | 773	1
153 | 777	0
154 | 779	6
155 | 780	4
156 | 785	3
157 | 788	3
158 | 789	4
159 | 810	4
160 | 812	3
161 | 815	0
162 | 817	2
163 | 820	2
164 | 825	2
165 | 829	1
166 | 832	3
167 | 838	0
168 | 841	3
169 | 844	5
170 | 850	6
171 | 854	4
172 | 859	2
173 | 863	0
174 | 864	1
175 | 870	3
176 | 871	2
177 | 872	4
178 | 881	2
179 | 885	5
180 | 887	1
181 | 888	0
182 | 890	5
183 | 892	3
184 | 903	5
185 | 922	4
186 | 927	3
187 | 930	3
188 | 932	3
189 | 943	4
190 | 947	4
191 | 965	0
192 | 969	2
193 | 970	2
194 | 974	3
195 | 976	2
196 | 979	3
197 | 982	3
198 | 987	3
199 | 991	6
200 | 994	3
201 | 1009	1
202 | 1011	1
203 | 1017	3
204 | 1018	4
205 | 1023	3
206 | 1036	0
207 | 1038	2
208 | 1041	2
209 | 1043	5
210 | 1045	1
211 | 1049	3
212 | 1053	3
213 | 1057	3
214 | 1062	3
215 | 1064	4
216 | 1068	4
217 | 1071	4
218 | 1074	6
219 | 1077	5
220 | 1078	0
221 | 1081	4
222 | 1084	3
223 | 1087	4
224 | 1091	3
225 | 1095	3
226 | 1105	5
227 | 1108	3
228 | 1113	5
229 | 1114	2
230 | 1118	0
231 | 1129	4
232 | 1130	3
233 | 1132	3
234 | 1137	0
235 | 1148	2
236 | 1151	3
237 | 1152	0
238 | 1153	4
239 | 1156	0
240 | 1157	4
241 | 1166	3
242 | 1189	2
243 | 1190	5
244 | 1193	1
245 | 1202	4
246 | 1207	4
247 | 1210	2
248 | 1214	2
249 | 1216	2
250 | 1217	3
251 | 1220	0
252 | 1222	5
253 | 1225	3
254 | 1226	5
255 | 1236	6
256 | 1239	3
257 | 1241	3
258 | 1247	3
259 | 1249	2
260 | 1254	2
261 | 1260	4
262 | 1264	4
263 | 1266	4
264 | 1267	5
265 | 1282	2
266 | 1283	1
267 | 1285	0
268 | 1287	2
269 | 1290	3
270 | 1293	0
271 | 1298	3
272 | 1305	2
273 | 1321	3
274 | 1331	3
275 | 1342	4
276 | 1348	3
277 | 1351	6
278 | 1360	5
279 | 1365	3
280 | 1367	1
281 | 1383	2
282 | 1384	2
283 | 1400	3
284 | 1401	4
285 | 1402	6
286 | 1403	3
287 | 1405	3
288 | 1406	4
289 | 1410	0
290 | 1413	3
291 | 1417	4
292 | 1420	6
293 | 1423	3
294 | 1424	3
295 | 1425	4
296 | 1426	2
297 | 1428	4
298 | 1434	3
299 | 1437	4
300 | 1438	3
301 | 1440	3
302 | 1442	3
303 | 1450	3
304 | 1453	3
305 | 1455	5
306 | 1456	3
307 | 1458	3
308 | 1465	3
309 | 1470	5
310 | 1478	5
311 | 1489	2
312 | 1500	5
313 | 1506	2
314 | 1513	2
315 | 1520	2
316 | 1523	0
317 | 1524	0
318 | 1527	0
319 | 1528	2
320 | 1536	0
321 | 1551	1
322 | 1554	3
323 | 1565	2
324 | 1571	1
325 | 1581	3
326 | 1584	1
327 | 1588	0
328 | 1594	5
329 | 1595	3
330 | 1596	2
331 | 1597	3
332 | 1598	3
333 | 1607	3
334 | 1608	3
335 | 1614	3
336 | 1616	6
337 | 1622	3
338 | 1626	4
339 | 1636	3
340 | 1648	2
341 | 1655	0
342 | 1658	0
343 | 1660	6
344 | 1663	3
345 | 1664	3
346 | 1666	3
347 | 1669	0
348 | 1677	5
349 | 1678	3
350 | 1680	0
351 | 1683	6
352 | 1684	3
353 | 1686	4
354 | 1703	4
355 | 1709	2
356 | 1710	2
357 | 1711	2
358 | 1721	2
359 | 1726	2
360 | 1729	2
361 | 1737	2
362 | 1747	2
363 | 1748	2
364 | 1749	2
365 | 1752	2
366 | 1753	2
367 | 1761	2
368 | 1764	5
369 | 1766	2
370 | 1776	1
371 | 1779	1
372 | 1787	1
373 | 1789	1
374 | 1790	1
375 | 1791	1
376 | 1793	4
377 | 1799	3
378 | 1810	3
379 | 1812	3
380 | 1816	3
381 | 1817	3
382 | 1818	3
383 | 1827	5
384 | 1828	5
385 | 1833	2
386 | 1835	6
387 | 1838	0
388 | 1842	5
389 | 1844	3
390 | 1848	0
391 | 1854	1
392 | 1855	3
393 | 1856	1
394 | 1859	3
395 | 1879	5
396 | 1885	5
397 | 1886	2
398 | 1888	2
399 | 1890	4
400 | 1896	5
401 | 1901	5
402 | 1905	0
403 | 1906	4
404 | 1907	4
405 | 1912	0
406 | 1918	6
407 | 1922	0
408 | 1924	3
409 | 1928	3
410 | 1932	3
411 | 1937	3
412 | 1943	3
413 | 1958	5
414 | 1964	4
415 | 1971	4
416 | 1975	6
417 | 1976	6
418 | 1980	5
419 | 1981	0
420 | 1991	1
421 | 1997	3
422 | 2003	3
423 | 2012	3
424 | 2017	5
425 | 2019	0
426 | 2024	5
427 | 2028	3
428 | 2030	6
429 | 2036	3
430 | 2037	3
431 | 2047	0
432 | 2051	0
433 | 2053	0
434 | 2060	5
435 | 2063	3
436 | 2068	0
437 | 2081	1
438 | 2087	1
439 | 2095	1
440 | 2097	0
441 | 2102	5
442 | 2103	5
443 | 2111	5
444 | 2118	3
445 | 2123	4
446 | 2131	0
447 | 2142	3
448 | 2144	1
449 | 2146	3
450 | 2152	3
451 | 2153	4
452 | 2163	3
453 | 2170	1
454 | 2171	0
455 | 2181	0
456 | 2193	0
457 | 2195	0
458 | 2202	0
459 | 2217	6
460 | 2221	2
461 | 2224	4
462 | 2228	6
463 | 2232	0
464 | 2259	4
465 | 2263	4
466 | 2264	4
467 | 2266	0
468 | 2272	5
469 | 2273	5
470 | 2276	3
471 | 2283	3
472 | 2287	5
473 | 2289	4
474 | 2292	4
475 | 2295	0
476 | 2300	1
477 | 2306	4
478 | 2319	1
479 | 2329	1
480 | 2333	0
481 | 2335	4
482 | 2347	0
483 | 2356	0
484 | 2368	3
485 | 2371	3
486 | 2376	4
487 | 2391	2
488 | 2394	6
489 | 2402	1
490 | 2404	0
491 | 2410	3
492 | 2425	3
493 | 2435	4
494 | 2437	3
495 | 2439	2
496 | 2445	2
497 | 2447	2
498 | 2463	4
499 | 2466	2
500 | 2470	0
501 | 2473	4
502 | 2479	6
503 | 2482	3
504 | 2489	3
505 | 2490	4
506 | 2492	2
507 | 2494	3
508 | 2495	3
509 | 2496	4
510 | 2500	3
511 | 2502	0
512 | 2507	2
513 | 2511	0
514 | 2522	2
515 | 2523	2
516 | 2530	2
517 | 2532	1
518 | 2538	4
519 | 2542	3
520 | 2548	4
521 | 2564	4
522 | 2574	3
523 | 2582	3
524 | 2586	5
525 | 2588	3
526 | 2598	1
527 | 2606	3
528 | 2623	4
529 | 2624	3
530 | 2629	3
531 | 2632	3
532 | 2638	6
533 | 2640	3
534 | 2642	2
535 | 2643	2
536 | 2653	0
537 | 2670	4
538 | 2673	4
539 | 2676	3
540 | 2679	4
541 | 2686	3
542 | 2693	3
543 | 2694	4
544 | 2696	3
545 | 2697	3
546 | 2703	3
547 | 


--------------------------------------------------------------------------------
/data/demo/train.data/test_node_id.txt:
--------------------------------------------------------------------------------
  1 | 4
  2 | 10
  3 | 15
  4 | 17
  5 | 20
  6 | 38
  7 | 48
  8 | 61
  9 | 69
 10 | 75
 11 | 84
 12 | 90
 13 | 91
 14 | 92
 15 | 102
 16 | 106
 17 | 108
 18 | 114
 19 | 119
 20 | 121
 21 | 125
 22 | 130
 23 | 133
 24 | 138
 25 | 144
 26 | 145
 27 | 148
 28 | 156
 29 | 161
 30 | 167
 31 | 175
 32 | 191
 33 | 194
 34 | 203
 35 | 210
 36 | 211
 37 | 217
 38 | 223
 39 | 224
 40 | 226
 41 | 230
 42 | 232
 43 | 240
 44 | 241
 45 | 248
 46 | 255
 47 | 256
 48 | 257
 49 | 262
 50 | 264
 51 | 269
 52 | 278
 53 | 282
 54 | 292
 55 | 301
 56 | 310
 57 | 324
 58 | 326
 59 | 330
 60 | 337
 61 | 338
 62 | 341
 63 | 343
 64 | 344
 65 | 346
 66 | 348
 67 | 350
 68 | 352
 69 | 358
 70 | 365
 71 | 369
 72 | 374
 73 | 386
 74 | 409
 75 | 430
 76 | 442
 77 | 446
 78 | 447
 79 | 451
 80 | 458
 81 | 488
 82 | 490
 83 | 500
 84 | 506
 85 | 507
 86 | 516
 87 | 517
 88 | 519
 89 | 526
 90 | 528
 91 | 546
 92 | 553
 93 | 555
 94 | 562
 95 | 563
 96 | 564
 97 | 566
 98 | 568
 99 | 569
100 | 574
101 | 577
102 | 579
103 | 587
104 | 588
105 | 591
106 | 595
107 | 600
108 | 601
109 | 604
110 | 606
111 | 610
112 | 611
113 | 614
114 | 615
115 | 616
116 | 620
117 | 626
118 | 630
119 | 632
120 | 638
121 | 639
122 | 640
123 | 649
124 | 652
125 | 654
126 | 656
127 | 660
128 | 671
129 | 675
130 | 682
131 | 683
132 | 684
133 | 686
134 | 690
135 | 700
136 | 701
137 | 707
138 | 711
139 | 713
140 | 717
141 | 720
142 | 724
143 | 729
144 | 742
145 | 744
146 | 756
147 | 767
148 | 768
149 | 769
150 | 770
151 | 773
152 | 777
153 | 779
154 | 780
155 | 785
156 | 788
157 | 789
158 | 810
159 | 812
160 | 815
161 | 817
162 | 820
163 | 825
164 | 829
165 | 832
166 | 838
167 | 841
168 | 844
169 | 850
170 | 854
171 | 859
172 | 863
173 | 864
174 | 870
175 | 871
176 | 872
177 | 881
178 | 885
179 | 887
180 | 888
181 | 890
182 | 892
183 | 903
184 | 922
185 | 927
186 | 930
187 | 932
188 | 943
189 | 947
190 | 965
191 | 969
192 | 970
193 | 974
194 | 976
195 | 979
196 | 982
197 | 987
198 | 991
199 | 994
200 | 1009
201 | 1011
202 | 1017
203 | 1018
204 | 1023
205 | 1036
206 | 1038
207 | 1041
208 | 1043
209 | 1045
210 | 1049
211 | 1053
212 | 1057
213 | 1062
214 | 1064
215 | 1068
216 | 1071
217 | 1074
218 | 1077
219 | 1078
220 | 1081
221 | 1084
222 | 1087
223 | 1091
224 | 1095
225 | 1105
226 | 1108
227 | 1113
228 | 1114
229 | 1118
230 | 1129
231 | 1130
232 | 1132
233 | 1137
234 | 1148
235 | 1151
236 | 1152
237 | 1153
238 | 1156
239 | 1157
240 | 1166
241 | 1189
242 | 1190
243 | 1193
244 | 1202
245 | 1207
246 | 1210
247 | 1214
248 | 1216
249 | 1217
250 | 1220
251 | 1222
252 | 1225
253 | 1226
254 | 1236
255 | 1239
256 | 1241
257 | 1247
258 | 1249
259 | 1254
260 | 1260
261 | 1264
262 | 1266
263 | 1267
264 | 1282
265 | 1283
266 | 1285
267 | 1287
268 | 1290
269 | 1293
270 | 1298
271 | 1305
272 | 1321
273 | 1331
274 | 1342
275 | 1348
276 | 1351
277 | 1360
278 | 1365
279 | 1367
280 | 1383
281 | 1384
282 | 1400
283 | 1401
284 | 1402
285 | 1403
286 | 1405
287 | 1406
288 | 1410
289 | 1413
290 | 1417
291 | 1420
292 | 1423
293 | 1424
294 | 1425
295 | 1426
296 | 1428
297 | 1434
298 | 1437
299 | 1438
300 | 1440
301 | 1442
302 | 1450
303 | 1453
304 | 1455
305 | 1456
306 | 1458
307 | 1465
308 | 1470
309 | 1478
310 | 1489
311 | 1500
312 | 1506
313 | 1513
314 | 1520
315 | 1523
316 | 1524
317 | 1527
318 | 1528
319 | 1536
320 | 1551
321 | 1554
322 | 1565
323 | 1571
324 | 1581
325 | 1584
326 | 1588
327 | 1594
328 | 1595
329 | 1596
330 | 1597
331 | 1598
332 | 1607
333 | 1608
334 | 1614
335 | 1616
336 | 1622
337 | 1626
338 | 1636
339 | 1648
340 | 1655
341 | 1658
342 | 1660
343 | 1663
344 | 1664
345 | 1666
346 | 1669
347 | 1677
348 | 1678
349 | 1680
350 | 1683
351 | 1684
352 | 1686
353 | 1703
354 | 1709
355 | 1710
356 | 1711
357 | 1721
358 | 1726
359 | 1729
360 | 1737
361 | 1747
362 | 1748
363 | 1749
364 | 1752
365 | 1753
366 | 1761
367 | 1764
368 | 1766
369 | 1776
370 | 1779
371 | 1787
372 | 1789
373 | 1790
374 | 1791
375 | 1793
376 | 1799
377 | 1810
378 | 1812
379 | 1816
380 | 1817
381 | 1818
382 | 1827
383 | 1828
384 | 1833
385 | 1835
386 | 1838
387 | 1842
388 | 1844
389 | 1848
390 | 1854
391 | 1855
392 | 1856
393 | 1859
394 | 1879
395 | 1885
396 | 1886
397 | 1888
398 | 1890
399 | 1896
400 | 1901
401 | 1905
402 | 1906
403 | 1907
404 | 1912
405 | 1918
406 | 1922
407 | 1924
408 | 1928
409 | 1932
410 | 1937
411 | 1943
412 | 1958
413 | 1964
414 | 1971
415 | 1975
416 | 1976
417 | 1980
418 | 1981
419 | 1991
420 | 1997
421 | 2003
422 | 2012
423 | 2017
424 | 2019
425 | 2024
426 | 2028
427 | 2030
428 | 2036
429 | 2037
430 | 2047
431 | 2051
432 | 2053
433 | 2060
434 | 2063
435 | 2068
436 | 2081
437 | 2087
438 | 2095
439 | 2097
440 | 2102
441 | 2103
442 | 2111
443 | 2118
444 | 2123
445 | 2131
446 | 2142
447 | 2144
448 | 2146
449 | 2152
450 | 2153
451 | 2163
452 | 2170
453 | 2171
454 | 2181
455 | 2193
456 | 2195
457 | 2202
458 | 2217
459 | 2221
460 | 2224
461 | 2228
462 | 2232
463 | 2259
464 | 2263
465 | 2264
466 | 2266
467 | 2272
468 | 2273
469 | 2276
470 | 2283
471 | 2287
472 | 2289
473 | 2292
474 | 2295
475 | 2300
476 | 2306
477 | 2319
478 | 2329
479 | 2333
480 | 2335
481 | 2347
482 | 2356
483 | 2368
484 | 2371
485 | 2376
486 | 2391
487 | 2394
488 | 2402
489 | 2404
490 | 2410
491 | 2425
492 | 2435
493 | 2437
494 | 2439
495 | 2445
496 | 2447
497 | 2463
498 | 2466
499 | 2470
500 | 2473
501 | 2479
502 | 2482
503 | 2489
504 | 2490
505 | 2492
506 | 2494
507 | 2495
508 | 2496
509 | 2500
510 | 2502
511 | 2507
512 | 2511
513 | 2522
514 | 2523
515 | 2530
516 | 2532
517 | 2538
518 | 2542
519 | 2548
520 | 2564
521 | 2574
522 | 2582
523 | 2586
524 | 2588
525 | 2598
526 | 2606
527 | 2623
528 | 2624
529 | 2629
530 | 2632
531 | 2638
532 | 2640
533 | 2642
534 | 2643
535 | 2653
536 | 2670
537 | 2673
538 | 2676
539 | 2679
540 | 2686
541 | 2693
542 | 2694
543 | 2696
544 | 2697
545 | 2703
546 | 


--------------------------------------------------------------------------------
/data/demo/train.data/train_node_id.txt:
--------------------------------------------------------------------------------
   1 | 2
   2 | 5
   3 | 9
   4 | 14
   5 | 22
   6 | 25
   7 | 30
   8 | 33
   9 | 40
  10 | 46
  11 | 49
  12 | 51
  13 | 52
  14 | 53
  15 | 63
  16 | 70
  17 | 73
  18 | 80
  19 | 83
  20 | 85
  21 | 86
  22 | 96
  23 | 98
  24 | 99
  25 | 100
  26 | 110
  27 | 120
  28 | 122
  29 | 128
  30 | 134
  31 | 136
  32 | 142
  33 | 147
  34 | 150
  35 | 158
  36 | 160
  37 | 166
  38 | 168
  39 | 171
  40 | 174
  41 | 177
  42 | 181
  43 | 182
  44 | 189
  45 | 192
  46 | 195
  47 | 196
  48 | 197
  49 | 206
  50 | 207
  51 | 222
  52 | 228
  53 | 236
  54 | 237
  55 | 238
  56 | 242
  57 | 244
  58 | 245
  59 | 260
  60 | 263
  61 | 265
  62 | 266
  63 | 277
  64 | 280
  65 | 284
  66 | 289
  67 | 290
  68 | 294
  69 | 297
  70 | 308
  71 | 309
  72 | 313
  73 | 318
  74 | 319
  75 | 322
  76 | 328
  77 | 333
  78 | 345
  79 | 354
  80 | 355
  81 | 356
  82 | 359
  83 | 370
  84 | 371
  85 | 384
  86 | 387
  87 | 391
  88 | 395
  89 | 398
  90 | 403
  91 | 404
  92 | 405
  93 | 408
  94 | 411
  95 | 412
  96 | 416
  97 | 418
  98 | 419
  99 | 421
 100 | 422
 101 | 423
 102 | 424
 103 | 433
 104 | 437
 105 | 439
 106 | 441
 107 | 448
 108 | 449
 109 | 453
 110 | 456
 111 | 457
 112 | 459
 113 | 460
 114 | 468
 115 | 471
 116 | 478
 117 | 481
 118 | 482
 119 | 483
 120 | 484
 121 | 505
 122 | 508
 123 | 510
 124 | 513
 125 | 514
 126 | 520
 127 | 527
 128 | 531
 129 | 534
 130 | 537
 131 | 540
 132 | 544
 133 | 548
 134 | 557
 135 | 559
 136 | 560
 137 | 580
 138 | 582
 139 | 593
 140 | 596
 141 | 597
 142 | 599
 143 | 613
 144 | 633
 145 | 635
 146 | 647
 147 | 648
 148 | 664
 149 | 665
 150 | 667
 151 | 674
 152 | 678
 153 | 680
 154 | 687
 155 | 689
 156 | 691
 157 | 692
 158 | 693
 159 | 695
 160 | 699
 161 | 703
 162 | 722
 163 | 726
 164 | 730
 165 | 731
 166 | 737
 167 | 749
 168 | 750
 169 | 751
 170 | 752
 171 | 755
 172 | 759
 173 | 786
 174 | 790
 175 | 795
 176 | 799
 177 | 802
 178 | 803
 179 | 805
 180 | 818
 181 | 819
 182 | 827
 183 | 830
 184 | 836
 185 | 842
 186 | 846
 187 | 851
 188 | 853
 189 | 857
 190 | 874
 191 | 876
 192 | 880
 193 | 883
 194 | 886
 195 | 899
 196 | 901
 197 | 910
 198 | 917
 199 | 937
 200 | 942
 201 | 945
 202 | 949
 203 | 951
 204 | 953
 205 | 955
 206 | 961
 207 | 964
 208 | 975
 209 | 977
 210 | 980
 211 | 981
 212 | 984
 213 | 986
 214 | 992
 215 | 993
 216 | 996
 217 | 1002
 218 | 1003
 219 | 1008
 220 | 1012
 221 | 1013
 222 | 1015
 223 | 1016
 224 | 1019
 225 | 1027
 226 | 1028
 227 | 1029
 228 | 1040
 229 | 1042
 230 | 1048
 231 | 1066
 232 | 1067
 233 | 1103
 234 | 1111
 235 | 1115
 236 | 1121
 237 | 1125
 238 | 1128
 239 | 1133
 240 | 1139
 241 | 1164
 242 | 1168
 243 | 1170
 244 | 1172
 245 | 1173
 246 | 1178
 247 | 1188
 248 | 1199
 249 | 1200
 250 | 1201
 251 | 1205
 252 | 1208
 253 | 1212
 254 | 1219
 255 | 1221
 256 | 1223
 257 | 1232
 258 | 1233
 259 | 1234
 260 | 1235
 261 | 1238
 262 | 1240
 263 | 1242
 264 | 1243
 265 | 1245
 266 | 1253
 267 | 1257
 268 | 1271
 269 | 1272
 270 | 1274
 271 | 1279
 272 | 1280
 273 | 1281
 274 | 1286
 275 | 1288
 276 | 1291
 277 | 1301
 278 | 1308
 279 | 1316
 280 | 1318
 281 | 1327
 282 | 1333
 283 | 1336
 284 | 1337
 285 | 1350
 286 | 1354
 287 | 1371
 288 | 1373
 289 | 1374
 290 | 1380
 291 | 1381
 292 | 1387
 293 | 1388
 294 | 1393
 295 | 1407
 296 | 1409
 297 | 1414
 298 | 1418
 299 | 1419
 300 | 1421
 301 | 1422
 302 | 1429
 303 | 1431
 304 | 1443
 305 | 1444
 306 | 1448
 307 | 1451
 308 | 1457
 309 | 1469
 310 | 1475
 311 | 1479
 312 | 1490
 313 | 1495
 314 | 1497
 315 | 1498
 316 | 1499
 317 | 1514
 318 | 1517
 319 | 1530
 320 | 1532
 321 | 1534
 322 | 1537
 323 | 1542
 324 | 1546
 325 | 1549
 326 | 1577
 327 | 1578
 328 | 1579
 329 | 1585
 330 | 1591
 331 | 1605
 332 | 1609
 333 | 1612
 334 | 1613
 335 | 1619
 336 | 1625
 337 | 1628
 338 | 1630
 339 | 1631
 340 | 1633
 341 | 1639
 342 | 1642
 343 | 1644
 344 | 1647
 345 | 1661
 346 | 1674
 347 | 1676
 348 | 1690
 349 | 1693
 350 | 1697
 351 | 1700
 352 | 1717
 353 | 1728
 354 | 1731
 355 | 1741
 356 | 1750
 357 | 1754
 358 | 1757
 359 | 1762
 360 | 1767
 361 | 1771
 362 | 1772
 363 | 1774
 364 | 1777
 365 | 1788
 366 | 1795
 367 | 1796
 368 | 1802
 369 | 1804
 370 | 1806
 371 | 1813
 372 | 1826
 373 | 1829
 374 | 1832
 375 | 1836
 376 | 1847
 377 | 1857
 378 | 1860
 379 | 1865
 380 | 1866
 381 | 1872
 382 | 1877
 383 | 1884
 384 | 1891
 385 | 1894
 386 | 1897
 387 | 1899
 388 | 1902
 389 | 1910
 390 | 1911
 391 | 1913
 392 | 1914
 393 | 1931
 394 | 1934
 395 | 1936
 396 | 1940
 397 | 1945
 398 | 1957
 399 | 1960
 400 | 1962
 401 | 1972
 402 | 1985
 403 | 1993
 404 | 1994
 405 | 1995
 406 | 1996
 407 | 2002
 408 | 2007
 409 | 2008
 410 | 2020
 411 | 2022
 412 | 2023
 413 | 2027
 414 | 2039
 415 | 2041
 416 | 2043
 417 | 2046
 418 | 2048
 419 | 2065
 420 | 2073
 421 | 2076
 422 | 2084
 423 | 2088
 424 | 2091
 425 | 2107
 426 | 2128
 427 | 2143
 428 | 2147
 429 | 2148
 430 | 2150
 431 | 2157
 432 | 2161
 433 | 2165
 434 | 2169
 435 | 2172
 436 | 2176
 437 | 2177
 438 | 2180
 439 | 2184
 440 | 2187
 441 | 2192
 442 | 2197
 443 | 2201
 444 | 2211
 445 | 2214
 446 | 2218
 447 | 2222
 448 | 2227
 449 | 2230
 450 | 2233
 451 | 2238
 452 | 2241
 453 | 2245
 454 | 2246
 455 | 2250
 456 | 2252
 457 | 2254
 458 | 2257
 459 | 2258
 460 | 2260
 461 | 2262
 462 | 2265
 463 | 2268
 464 | 2269
 465 | 2277
 466 | 2279
 467 | 2282
 468 | 2284
 469 | 2286
 470 | 2288
 471 | 2308
 472 | 2316
 473 | 2323
 474 | 2328
 475 | 2336
 476 | 2346
 477 | 2348
 478 | 2352
 479 | 2354
 480 | 2355
 481 | 2358
 482 | 2360
 483 | 2361
 484 | 2363
 485 | 2364
 486 | 2367
 487 | 2380
 488 | 2383
 489 | 2388
 490 | 2389
 491 | 2397
 492 | 2406
 493 | 2412
 494 | 2416
 495 | 2433
 496 | 2448
 497 | 2450
 498 | 2451
 499 | 2452
 500 | 2453
 501 | 2454
 502 | 2462
 503 | 2475
 504 | 2487
 505 | 2499
 506 | 2501
 507 | 2504
 508 | 2508
 509 | 2513
 510 | 2515
 511 | 2529
 512 | 2531
 513 | 2540
 514 | 2544
 515 | 2546
 516 | 2555
 517 | 2567
 518 | 2568
 519 | 2597
 520 | 2601
 521 | 2607
 522 | 2613
 523 | 2614
 524 | 2615
 525 | 2620
 526 | 2626
 527 | 2631
 528 | 2634
 529 | 2637
 530 | 2639
 531 | 2641
 532 | 2645
 533 | 2646
 534 | 2649
 535 | 2650
 536 | 2655
 537 | 2663
 538 | 2666
 539 | 2672
 540 | 2678
 541 | 2690
 542 | 2699
 543 | 2702
 544 | 0
 545 | 1
 546 | 3
 547 | 8
 548 | 12
 549 | 16
 550 | 18
 551 | 19
 552 | 26
 553 | 27
 554 | 32
 555 | 36
 556 | 39
 557 | 41
 558 | 42
 559 | 43
 560 | 44
 561 | 47
 562 | 50
 563 | 54
 564 | 56
 565 | 58
 566 | 65
 567 | 67
 568 | 76
 569 | 77
 570 | 79
 571 | 89
 572 | 105
 573 | 111
 574 | 112
 575 | 118
 576 | 124
 577 | 126
 578 | 127
 579 | 131
 580 | 137
 581 | 143
 582 | 149
 583 | 153
 584 | 159
 585 | 165
 586 | 169
 587 | 172
 588 | 173
 589 | 179
 590 | 193
 591 | 204
 592 | 209
 593 | 219
 594 | 220
 595 | 225
 596 | 229
 597 | 231
 598 | 258
 599 | 268
 600 | 271
 601 | 272
 602 | 274
 603 | 286
 604 | 288
 605 | 291
 606 | 299
 607 | 312
 608 | 314
 609 | 321
 610 | 323
 611 | 334
 612 | 349
 613 | 357
 614 | 363
 615 | 364
 616 | 366
 617 | 367
 618 | 368
 619 | 376
 620 | 377
 621 | 378
 622 | 380
 623 | 381
 624 | 390
 625 | 392
 626 | 393
 627 | 394
 628 | 397
 629 | 401
 630 | 406
 631 | 413
 632 | 431
 633 | 435
 634 | 440
 635 | 445
 636 | 464
 637 | 466
 638 | 467
 639 | 469
 640 | 474
 641 | 479
 642 | 487
 643 | 491
 644 | 496
 645 | 498
 646 | 501
 647 | 504
 648 | 511
 649 | 521
 650 | 524
 651 | 532
 652 | 533
 653 | 538
 654 | 542
 655 | 543
 656 | 545
 657 | 552
 658 | 556
 659 | 583
 660 | 585
 661 | 605
 662 | 623
 663 | 629
 664 | 650
 665 | 653
 666 | 659
 667 | 661
 668 | 663
 669 | 668
 670 | 672
 671 | 673
 672 | 679
 673 | 705
 674 | 709
 675 | 710
 676 | 712
 677 | 716
 678 | 718
 679 | 725
 680 | 727
 681 | 738
 682 | 745
 683 | 747
 684 | 764
 685 | 766
 686 | 772
 687 | 774
 688 | 776
 689 | 783
 690 | 784
 691 | 791
 692 | 792
 693 | 794
 694 | 797
 695 | 801
 696 | 804
 697 | 809
 698 | 813
 699 | 814
 700 | 822
 701 | 824
 702 | 828
 703 | 840
 704 | 843
 705 | 845
 706 | 849
 707 | 869
 708 | 873
 709 | 893
 710 | 895
 711 | 900
 712 | 904
 713 | 909
 714 | 913
 715 | 916
 716 | 919
 717 | 924
 718 | 928
 719 | 934
 720 | 939
 721 | 941
 722 | 956
 723 | 962
 724 | 967
 725 | 971
 726 | 973
 727 | 988
 728 | 989
 729 | 995
 730 | 1014
 731 | 1024
 732 | 1025
 733 | 1030
 734 | 1032
 735 | 1047
 736 | 1050
 737 | 1051
 738 | 1052
 739 | 1073
 740 | 1083
 741 | 1085
 742 | 1094
 743 | 1096
 744 | 1099
 745 | 1100
 746 | 1101
 747 | 1104
 748 | 1106
 749 | 1109
 750 | 1110
 751 | 1119
 752 | 1120
 753 | 1122
 754 | 1124
 755 | 1127
 756 | 1134
 757 | 1138
 758 | 1143
 759 | 1144
 760 | 1147
 761 | 1149
 762 | 1155
 763 | 1158
 764 | 1159
 765 | 1161
 766 | 1162
 767 | 1169
 768 | 1175
 769 | 1176
 770 | 1177
 771 | 1180
 772 | 1181
 773 | 1182
 774 | 1184
 775 | 1196
 776 | 1206
 777 | 1211
 778 | 1215
 779 | 1218
 780 | 1227
 781 | 1237
 782 | 1250
 783 | 1251
 784 | 1255
 785 | 1256
 786 | 1263
 787 | 1270
 788 | 1275
 789 | 1276
 790 | 1277
 791 | 1289
 792 | 1292
 793 | 1294
 794 | 1295
 795 | 1299
 796 | 1306
 797 | 1317
 798 | 1323
 799 | 1325
 800 | 1329
 801 | 1330
 802 | 1332
 803 | 1334
 804 | 1338
 805 | 1346
 806 | 1352
 807 | 1357
 808 | 1358
 809 | 1363
 810 | 1364
 811 | 1366
 812 | 1368
 813 | 1370
 814 | 1372
 815 | 1382
 816 | 1385
 817 | 1386
 818 | 1392
 819 | 1394
 820 | 1398
 821 | 1399
 822 | 1404
 823 | 1412
 824 | 1427
 825 | 1430
 826 | 1439
 827 | 1441
 828 | 1445
 829 | 1449
 830 | 1454
 831 | 1459
 832 | 1467
 833 | 1471
 834 | 1473
 835 | 1474
 836 | 1484
 837 | 1485
 838 | 1492
 839 | 1493
 840 | 1496
 841 | 1503
 842 | 1504
 843 | 1507
 844 | 1512
 845 | 1516
 846 | 1529
 847 | 1535
 848 | 1539
 849 | 1541
 850 | 1547
 851 | 1550
 852 | 1559
 853 | 1561
 854 | 1570
 855 | 1576
 856 | 1580
 857 | 1582
 858 | 1589
 859 | 1604
 860 | 1606
 861 | 1610
 862 | 1611
 863 | 1615
 864 | 1618
 865 | 1621
 866 | 1623
 867 | 1632
 868 | 1637
 869 | 1643
 870 | 1650
 871 | 1651
 872 | 1654
 873 | 1659
 874 | 1667
 875 | 1672
 876 | 1675
 877 | 1681
 878 | 1682
 879 | 1685
 880 | 1687
 881 | 1692
 882 | 1699
 883 | 1701
 884 | 1705
 885 | 1707
 886 | 1708
 887 | 1713
 888 | 1716
 889 | 1722
 890 | 1724
 891 | 1730
 892 | 1735
 893 | 1736
 894 | 1738
 895 | 1739
 896 | 1740
 897 | 1742
 898 | 1745
 899 | 1746
 900 | 1755
 901 | 1756
 902 | 1759
 903 | 1760
 904 | 1763
 905 | 1765
 906 | 1769
 907 | 1780
 908 | 1797
 909 | 1800
 910 | 1801
 911 | 1805
 912 | 1814
 913 | 1821
 914 | 1823
 915 | 1825
 916 | 1830
 917 | 1834
 918 | 1841
 919 | 1852
 920 | 1853
 921 | 1858
 922 | 1862
 923 | 1863
 924 | 1868
 925 | 1870
 926 | 1871
 927 | 1878
 928 | 1881
 929 | 1908
 930 | 1909
 931 | 1916
 932 | 1917
 933 | 1920
 934 | 1921
 935 | 1925
 936 | 1926
 937 | 1929
 938 | 1947
 939 | 1948
 940 | 1949
 941 | 1950
 942 | 1953
 943 | 1963
 944 | 1967
 945 | 1969
 946 | 1970
 947 | 1982
 948 | 1983
 949 | 1988
 950 | 1998
 951 | 2013
 952 | 2032
 953 | 2042
 954 | 2045
 955 | 2049
 956 | 2064
 957 | 2066
 958 | 2067
 959 | 2069
 960 | 2072
 961 | 2079
 962 | 2083
 963 | 2085
 964 | 2086
 965 | 2098
 966 | 2104
 967 | 2108
 968 | 2109
 969 | 2110
 970 | 2114
 971 | 2120
 972 | 2130
 973 | 2133
 974 | 2151
 975 | 2155
 976 | 2159
 977 | 2160
 978 | 2168
 979 | 2174
 980 | 2175
 981 | 2178
 982 | 2186
 983 | 2196
 984 | 2204
 985 | 2210
 986 | 2216
 987 | 2219
 988 | 2226
 989 | 2235
 990 | 2237
 991 | 2240
 992 | 2244
 993 | 2247
 994 | 2251
 995 | 2270
 996 | 2271
 997 | 2275
 998 | 2280
 999 | 2290
1000 | 2293
1001 | 2305
1002 | 2309
1003 | 2312
1004 | 2317
1005 | 2320
1006 | 2324
1007 | 2340
1008 | 2342
1009 | 2357
1010 | 2377
1011 | 2384
1012 | 2390
1013 | 2395
1014 | 2400
1015 | 2401
1016 | 2403
1017 | 2405
1018 | 2407
1019 | 2409
1020 | 2413
1021 | 2418
1022 | 2419
1023 | 2420
1024 | 2428
1025 | 2434
1026 | 2438
1027 | 2444
1028 | 2446
1029 | 2457
1030 | 2471
1031 | 2476
1032 | 2483
1033 | 2485
1034 | 2491
1035 | 2512
1036 | 2521
1037 | 2524
1038 | 2527
1039 | 2535
1040 | 2541
1041 | 2549
1042 | 2550
1043 | 2551
1044 | 2558
1045 | 2561
1046 | 2565
1047 | 2571
1048 | 2575
1049 | 2576
1050 | 2579
1051 | 2580
1052 | 2583
1053 | 2584
1054 | 2590
1055 | 2594
1056 | 2599
1057 | 2604
1058 | 2608
1059 | 2611
1060 | 2612
1061 | 2617
1062 | 2621
1063 | 2622
1064 | 2636
1065 | 2648
1066 | 2654
1067 | 2656
1068 | 2658
1069 | 2661
1070 | 2664
1071 | 2665
1072 | 2667
1073 | 2668
1074 | 2669
1075 | 2671
1076 | 2675
1077 | 2677
1078 | 2680
1079 | 2687
1080 | 2688
1081 | 2691
1082 | 2695
1083 | 2700
1084 | 2704
1085 | 2706
1086 | 7
1087 | 21
1088 | 24
1089 | 28
1090 | 34
1091 | 35
1092 | 37
1093 | 59
1094 | 60
1095 | 62
1096 | 71
1097 | 78
1098 | 81
1099 | 93
1100 | 101
1101 | 103
1102 | 109
1103 | 113
1104 | 117
1105 | 129
1106 | 135
1107 | 140
1108 | 146
1109 | 152
1110 | 155
1111 | 162
1112 | 170
1113 | 180
1114 | 183
1115 | 184
1116 | 190
1117 | 198
1118 | 200
1119 | 205
1120 | 213
1121 | 214
1122 | 215
1123 | 216
1124 | 218
1125 | 221
1126 | 227
1127 | 235
1128 | 246
1129 | 249
1130 | 251
1131 | 252
1132 | 259
1133 | 267
1134 | 270
1135 | 273
1136 | 279
1137 | 281
1138 | 283
1139 | 287
1140 | 296
1141 | 302
1142 | 303
1143 | 305
1144 | 311
1145 | 315
1146 | 316
1147 | 331
1148 | 332
1149 | 335
1150 | 336
1151 | 339
1152 | 340
1153 | 353
1154 | 361
1155 | 382
1156 | 388
1157 | 402
1158 | 415
1159 | 425
1160 | 426
1161 | 436
1162 | 438
1163 | 444
1164 | 450
1165 | 455
1166 | 461
1167 | 470
1168 | 473
1169 | 475
1170 | 477
1171 | 480
1172 | 485
1173 | 486
1174 | 492
1175 | 493
1176 | 494
1177 | 497
1178 | 512
1179 | 522
1180 | 523
1181 | 525
1182 | 529
1183 | 547
1184 | 549
1185 | 551
1186 | 558
1187 | 565
1188 | 570
1189 | 572
1190 | 573
1191 | 575
1192 | 576
1193 | 586
1194 | 594
1195 | 618
1196 | 619
1197 | 624
1198 | 627
1199 | 634
1200 | 636
1201 | 637
1202 | 651
1203 | 655
1204 | 657
1205 | 658
1206 | 662
1207 | 666
1208 | 676
1209 | 677
1210 | 688
1211 | 698
1212 | 702
1213 | 704
1214 | 714
1215 | 719
1216 | 728
1217 | 741
1218 | 743
1219 | 746
1220 | 748
1221 | 753
1222 | 757
1223 | 760
1224 | 762
1225 | 771
1226 | 775
1227 | 782
1228 | 787
1229 | 806
1230 | 807
1231 | 816
1232 | 821
1233 | 823
1234 | 831
1235 | 833
1236 | 835
1237 | 839
1238 | 847
1239 | 848
1240 | 852
1241 | 860
1242 | 861
1243 | 868
1244 | 877
1245 | 879
1246 | 889
1247 | 894
1248 | 896
1249 | 897
1250 | 898
1251 | 905
1252 | 906
1253 | 908
1254 | 915
1255 | 918
1256 | 920
1257 | 921
1258 | 925
1259 | 929
1260 | 931
1261 | 935
1262 | 936
1263 | 944
1264 | 946
1265 | 950
1266 | 952
1267 | 957
1268 | 966
1269 | 972
1270 | 983
1271 | 985
1272 | 990
1273 | 997
1274 | 998
1275 | 999
1276 | 1000
1277 | 1004
1278 | 1005
1279 | 1010
1280 | 1021
1281 | 1022
1282 | 1026
1283 | 1031
1284 | 1033
1285 | 1037
1286 | 1039
1287 | 1044
1288 | 1046
1289 | 1058
1290 | 1059
1291 | 1061
1292 | 1063
1293 | 1069
1294 | 1075
1295 | 1080
1296 | 1086
1297 | 1088
1298 | 1090
1299 | 1092
1300 | 1097
1301 | 1112
1302 | 1117
1303 | 1123
1304 | 1131
1305 | 1140
1306 | 1142
1307 | 1146
1308 | 1154
1309 | 1160
1310 | 1167
1311 | 1171
1312 | 1174
1313 | 1179
1314 | 1185
1315 | 1186
1316 | 1187
1317 | 1192
1318 | 1194
1319 | 1197
1320 | 1209
1321 | 1213
1322 | 1224
1323 | 1228
1324 | 1229
1325 | 1230
1326 | 1246
1327 | 1248
1328 | 1252
1329 | 1258
1330 | 1259
1331 | 1278
1332 | 1296
1333 | 1297
1334 | 1300
1335 | 1303
1336 | 1307
1337 | 1309
1338 | 1311
1339 | 1312
1340 | 1313
1341 | 1314
1342 | 1326
1343 | 1339
1344 | 1355
1345 | 1375
1346 | 1376
1347 | 1377
1348 | 1379
1349 | 1390
1350 | 1397
1351 | 1411
1352 | 1433
1353 | 1435
1354 | 1436
1355 | 1461
1356 | 1462
1357 | 1463
1358 | 1464
1359 | 1468
1360 | 1472
1361 | 1476
1362 | 1480
1363 | 1482
1364 | 1487
1365 | 1491
1366 | 1494
1367 | 1501
1368 | 1502
1369 | 1505
1370 | 1509
1371 | 1510
1372 | 1515
1373 | 1518
1374 | 1519
1375 | 1521
1376 | 1525
1377 | 1526
1378 | 1533
1379 | 1540
1380 | 1544
1381 | 1545
1382 | 1553
1383 | 1555
1384 | 1558
1385 | 1560
1386 | 1562
1387 | 1564
1388 | 1566
1389 | 1569
1390 | 1572
1391 | 1573
1392 | 1575
1393 | 1583
1394 | 1587
1395 | 1592
1396 | 1593
1397 | 1603
1398 | 1617
1399 | 1624
1400 | 1627
1401 | 1629
1402 | 1635
1403 | 1638
1404 | 1645
1405 | 1646
1406 | 1652
1407 | 1656
1408 | 1665
1409 | 1668
1410 | 1670
1411 | 1689
1412 | 1691
1413 | 1715
1414 | 1719
1415 | 1720
1416 | 1723
1417 | 1725
1418 | 1733
1419 | 1743
1420 | 1744
1421 | 1758
1422 | 1768
1423 | 1778
1424 | 1782
1425 | 1783
1426 | 1784
1427 | 1785
1428 | 1792
1429 | 1794
1430 | 1807
1431 | 1809
1432 | 1819
1433 | 1822
1434 | 1831
1435 | 1840
1436 | 1845
1437 | 1846
1438 | 1850
1439 | 1851
1440 | 1861
1441 | 1864
1442 | 1869
1443 | 1875
1444 | 1876
1445 | 1882
1446 | 1883
1447 | 1892
1448 | 1893
1449 | 1898
1450 | 1903
1451 | 1915
1452 | 1939
1453 | 1944
1454 | 1952
1455 | 1954
1456 | 1955
1457 | 1956
1458 | 1965
1459 | 1966
1460 | 1968
1461 | 1973
1462 | 1974
1463 | 1977
1464 | 1986
1465 | 1987
1466 | 1999
1467 | 2000
1468 | 2005
1469 | 2009
1470 | 2011
1471 | 2015
1472 | 2021
1473 | 2033
1474 | 2035
1475 | 2038
1476 | 2050
1477 | 2059
1478 | 2061
1479 | 2071
1480 | 2077
1481 | 2080
1482 | 2082
1483 | 2089
1484 | 2090
1485 | 2093
1486 | 2100
1487 | 2101
1488 | 2106
1489 | 2112
1490 | 2113
1491 | 2115
1492 | 2117
1493 | 2119
1494 | 2121
1495 | 2125
1496 | 2127
1497 | 2129
1498 | 2134
1499 | 2135
1500 | 2136
1501 | 2137
1502 | 2139
1503 | 2162
1504 | 2164
1505 | 2167
1506 | 2173
1507 | 2182
1508 | 2183
1509 | 2188
1510 | 2205
1511 | 2208
1512 | 2212
1513 | 2223
1514 | 2231
1515 | 2234
1516 | 2236
1517 | 2239
1518 | 2243
1519 | 2248
1520 | 2253
1521 | 2255
1522 | 2256
1523 | 2261
1524 | 2274
1525 | 2291
1526 | 2297
1527 | 2298
1528 | 2299
1529 | 2303
1530 | 2307
1531 | 2315
1532 | 2321
1533 | 2330
1534 | 2331
1535 | 2332
1536 | 2337
1537 | 2339
1538 | 2341
1539 | 2344
1540 | 2345
1541 | 2349
1542 | 2350
1543 | 2353
1544 | 2359
1545 | 2365
1546 | 2370
1547 | 2374
1548 | 2375
1549 | 2379
1550 | 2382
1551 | 2386
1552 | 2387
1553 | 2393
1554 | 2408
1555 | 2411
1556 | 2414
1557 | 2417
1558 | 2422
1559 | 2423
1560 | 2429
1561 | 2430
1562 | 2436
1563 | 2443
1564 | 2455
1565 | 2456
1566 | 2459
1567 | 2461
1568 | 2465
1569 | 2469
1570 | 2472
1571 | 2474
1572 | 2478
1573 | 2480
1574 | 2481
1575 | 2484
1576 | 2498
1577 | 2503
1578 | 2505
1579 | 2509
1580 | 2510
1581 | 2516
1582 | 2517
1583 | 2519
1584 | 2520
1585 | 2528
1586 | 2534
1587 | 2545
1588 | 2547
1589 | 2553
1590 | 2556
1591 | 2559
1592 | 2562
1593 | 2563
1594 | 2572
1595 | 2573
1596 | 2577
1597 | 2578
1598 | 2585
1599 | 2591
1600 | 2593
1601 | 2595
1602 | 2596
1603 | 2602
1604 | 2603
1605 | 2605
1606 | 2609
1607 | 2610
1608 | 2616
1609 | 2618
1610 | 2619
1611 | 2625
1612 | 2628
1613 | 2630
1614 | 2633
1615 | 2647
1616 | 2660
1617 | 2662
1618 | 2681
1619 | 2683
1620 | 2685
1621 | 2689
1622 | 2692
1623 | 2698
1624 | 2705
1625 | 6
1626 | 11
1627 | 13
1628 | 23
1629 | 29
1630 | 31
1631 | 45
1632 | 55
1633 | 57
1634 | 64
1635 | 66
1636 | 68
1637 | 72
1638 | 74
1639 | 82
1640 | 87
1641 | 88
1642 | 94
1643 | 95
1644 | 97
1645 | 104
1646 | 107
1647 | 115
1648 | 116
1649 | 123
1650 | 132
1651 | 139
1652 | 141
1653 | 151
1654 | 154
1655 | 157
1656 | 163
1657 | 164
1658 | 176
1659 | 178
1660 | 185
1661 | 186
1662 | 187
1663 | 188
1664 | 199
1665 | 201
1666 | 202
1667 | 208
1668 | 212
1669 | 233
1670 | 234
1671 | 239
1672 | 243
1673 | 247
1674 | 250
1675 | 253
1676 | 254
1677 | 261
1678 | 275
1679 | 276
1680 | 285
1681 | 293
1682 | 295
1683 | 298
1684 | 300
1685 | 304
1686 | 306
1687 | 307
1688 | 317
1689 | 320
1690 | 325
1691 | 327
1692 | 329
1693 | 342
1694 | 347
1695 | 351
1696 | 360
1697 | 362
1698 | 372
1699 | 373
1700 | 375
1701 | 379
1702 | 383
1703 | 385
1704 | 389
1705 | 396
1706 | 399
1707 | 400
1708 | 407
1709 | 410
1710 | 414
1711 | 417
1712 | 420
1713 | 427
1714 | 428
1715 | 429
1716 | 432
1717 | 434
1718 | 443
1719 | 452
1720 | 454
1721 | 462
1722 | 463
1723 | 465
1724 | 472
1725 | 476
1726 | 489
1727 | 495
1728 | 499
1729 | 502
1730 | 503
1731 | 509
1732 | 515
1733 | 518
1734 | 530
1735 | 535
1736 | 536
1737 | 539
1738 | 541
1739 | 550
1740 | 554
1741 | 561
1742 | 567
1743 | 571
1744 | 578
1745 | 581
1746 | 584
1747 | 589
1748 | 590
1749 | 592
1750 | 598
1751 | 602
1752 | 603
1753 | 607
1754 | 608
1755 | 609
1756 | 612
1757 | 617
1758 | 621
1759 | 622
1760 | 625
1761 | 628
1762 | 631
1763 | 641
1764 | 642
1765 | 643
1766 | 644
1767 | 645
1768 | 646
1769 | 669
1770 | 670
1771 | 681
1772 | 685
1773 | 694
1774 | 696
1775 | 697
1776 | 706
1777 | 708
1778 | 715
1779 | 721
1780 | 723
1781 | 732
1782 | 733
1783 | 734
1784 | 735
1785 | 736
1786 | 739
1787 | 740
1788 | 754
1789 | 758
1790 | 761
1791 | 763
1792 | 765
1793 | 778
1794 | 781
1795 | 793
1796 | 796
1797 | 798
1798 | 800
1799 | 808
1800 | 811
1801 | 826
1802 | 834
1803 | 837
1804 | 855
1805 | 856
1806 | 858
1807 | 862
1808 | 865
1809 | 866
1810 | 867
1811 | 875
1812 | 878
1813 | 882
1814 | 884
1815 | 891
1816 | 902
1817 | 907
1818 | 911
1819 | 912
1820 | 914
1821 | 923
1822 | 926
1823 | 933
1824 | 938
1825 | 940
1826 | 948
1827 | 954
1828 | 958
1829 | 959
1830 | 960
1831 | 963
1832 | 968
1833 | 978
1834 | 1001
1835 | 1006
1836 | 1007
1837 | 1020
1838 | 1034
1839 | 1035
1840 | 1054
1841 | 1055
1842 | 1056
1843 | 1060
1844 | 1065
1845 | 1070
1846 | 1072
1847 | 1076
1848 | 1079
1849 | 1082
1850 | 1089
1851 | 1093
1852 | 1098
1853 | 1102
1854 | 1107
1855 | 1116
1856 | 1126
1857 | 1135
1858 | 1136
1859 | 1141
1860 | 1145
1861 | 1150
1862 | 1163
1863 | 1165
1864 | 1183
1865 | 1191
1866 | 1195
1867 | 1198
1868 | 1203
1869 | 1204
1870 | 1231
1871 | 1244
1872 | 1261
1873 | 1262
1874 | 1265
1875 | 1268
1876 | 1269
1877 | 1273
1878 | 1284
1879 | 1302
1880 | 1304
1881 | 1310
1882 | 1315
1883 | 1319
1884 | 1320
1885 | 1322
1886 | 1324
1887 | 1328
1888 | 1335
1889 | 1340
1890 | 1341
1891 | 1343
1892 | 1344
1893 | 1345
1894 | 1347
1895 | 1349
1896 | 1353
1897 | 1356
1898 | 1359
1899 | 1361
1900 | 1362
1901 | 1369
1902 | 1378
1903 | 1389
1904 | 1391
1905 | 1395
1906 | 1396
1907 | 1408
1908 | 1415
1909 | 1416
1910 | 1432
1911 | 1446
1912 | 1447
1913 | 1452
1914 | 1460
1915 | 1466
1916 | 1477
1917 | 1481
1918 | 1483
1919 | 1486
1920 | 1488
1921 | 1508
1922 | 1511
1923 | 1522
1924 | 1531
1925 | 1538
1926 | 1543
1927 | 1548
1928 | 1552
1929 | 1556
1930 | 1557
1931 | 1563
1932 | 1567
1933 | 1568
1934 | 1574
1935 | 1586
1936 | 1590
1937 | 1599
1938 | 1600
1939 | 1601
1940 | 1602
1941 | 1620
1942 | 1634
1943 | 1640
1944 | 1641
1945 | 1649
1946 | 1653
1947 | 1657
1948 | 1662
1949 | 1671
1950 | 1673
1951 | 1679
1952 | 1688
1953 | 1694
1954 | 1695
1955 | 1696
1956 | 1698
1957 | 1702
1958 | 1704
1959 | 1706
1960 | 1712
1961 | 1714
1962 | 1718
1963 | 1727
1964 | 1732
1965 | 1734
1966 | 1751
1967 | 1770
1968 | 1773
1969 | 1775
1970 | 1781
1971 | 1786
1972 | 1798
1973 | 1803
1974 | 1808
1975 | 1811
1976 | 1815
1977 | 1820
1978 | 1824
1979 | 1837
1980 | 1839
1981 | 1843
1982 | 1849
1983 | 1867
1984 | 1873
1985 | 1874
1986 | 1880
1987 | 1887
1988 | 1889
1989 | 1895
1990 | 1900
1991 | 1904
1992 | 1919
1993 | 1923
1994 | 1927
1995 | 1930
1996 | 1933
1997 | 1935
1998 | 1938
1999 | 1941
2000 | 1942
2001 | 1946
2002 | 1951
2003 | 1959
2004 | 1961
2005 | 1978
2006 | 1979
2007 | 1984
2008 | 1989
2009 | 1990
2010 | 1992
2011 | 2001
2012 | 2004
2013 | 2006
2014 | 2010
2015 | 2014
2016 | 2016
2017 | 2018
2018 | 2025
2019 | 2026
2020 | 2029
2021 | 2031
2022 | 2034
2023 | 2040
2024 | 2044
2025 | 2052
2026 | 2054
2027 | 2055
2028 | 2056
2029 | 2057
2030 | 2058
2031 | 2062
2032 | 2070
2033 | 2074
2034 | 2075
2035 | 2078
2036 | 2092
2037 | 2094
2038 | 2096
2039 | 2099
2040 | 2105
2041 | 2116
2042 | 2122
2043 | 2124
2044 | 2126
2045 | 2132
2046 | 2138
2047 | 2140
2048 | 2141
2049 | 2145
2050 | 2149
2051 | 2154
2052 | 2156
2053 | 2158
2054 | 2166
2055 | 2179
2056 | 2185
2057 | 2189
2058 | 2190
2059 | 2191
2060 | 2194
2061 | 2198
2062 | 2199
2063 | 2200
2064 | 2203
2065 | 2206
2066 | 2207
2067 | 2209
2068 | 2213
2069 | 2215
2070 | 2220
2071 | 2225
2072 | 2229
2073 | 2242
2074 | 2249
2075 | 2267
2076 | 2278
2077 | 2281
2078 | 2285
2079 | 2294
2080 | 2296
2081 | 2301
2082 | 2302
2083 | 2304
2084 | 2310
2085 | 2311
2086 | 2313
2087 | 2314
2088 | 2318
2089 | 2322
2090 | 2325
2091 | 2326
2092 | 2327
2093 | 2334
2094 | 2338
2095 | 2343
2096 | 2351
2097 | 2362
2098 | 2366
2099 | 2369
2100 | 2372
2101 | 2373
2102 | 2378
2103 | 2381
2104 | 2385
2105 | 2392
2106 | 2396
2107 | 2398
2108 | 2399
2109 | 2415
2110 | 2421
2111 | 2424
2112 | 2426
2113 | 2427
2114 | 2431
2115 | 2432
2116 | 2440
2117 | 2441
2118 | 2442
2119 | 2449
2120 | 2458
2121 | 2460
2122 | 2464
2123 | 2467
2124 | 2468
2125 | 2477
2126 | 2486
2127 | 2488
2128 | 2493
2129 | 2497
2130 | 2506
2131 | 2514
2132 | 2518
2133 | 2525
2134 | 2526
2135 | 2533
2136 | 2536
2137 | 2537
2138 | 2539
2139 | 2543
2140 | 2552
2141 | 2554
2142 | 2557
2143 | 2560
2144 | 2566
2145 | 2569
2146 | 2570
2147 | 2581
2148 | 2587
2149 | 2589
2150 | 2592
2151 | 2600
2152 | 2627
2153 | 2635
2154 | 2644
2155 | 2651
2156 | 2652
2157 | 2657
2158 | 2659
2159 | 2674
2160 | 2682
2161 | 2684
2162 | 2701
2163 | 2707
2164 | 


--------------------------------------------------------------------------------
/ingestion/__pycache__/common.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JunweiSUN/AutoGL/e1743e4571d88889ce87c14cb3bce63a0a2a505d/ingestion/__pycache__/common.cpython-36.pyc


--------------------------------------------------------------------------------
/ingestion/__pycache__/dataset.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JunweiSUN/AutoGL/e1743e4571d88889ce87c14cb3bce63a0a2a505d/ingestion/__pycache__/dataset.cpython-36.pyc


--------------------------------------------------------------------------------
/ingestion/__pycache__/timing.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JunweiSUN/AutoGL/e1743e4571d88889ce87c14cb3bce63a0a2a505d/ingestion/__pycache__/timing.cpython-36.pyc


--------------------------------------------------------------------------------
/ingestion/common.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=logging-fstring-interpolation, broad-except
 2 | """common"""
 3 | import logging
 4 | import importlib
 5 | import sys
 6 | 
 7 | 
 8 | class ModelApiError(Exception):
 9 |     """Model api error"""
10 | 
11 | 
12 | def get_logger(verbosity_level, name, use_error_log=False):
13 |     """Set logging format to something like:
14 |         2019-04-25 12:52:51,924 INFO score.py: <message>
15 |     """
16 |     logger = logging.getLogger(name)
17 |     logging_level = getattr(logging, verbosity_level)
18 |     logger.setLevel(logging_level)
19 |     formatter = logging.Formatter(
20 |         fmt='%(asctime)s %(levelname)s %(filename)s: %(message)s')
21 |     stdout_handler = logging.StreamHandler(sys.stdout)
22 |     stdout_handler.setLevel(logging_level)
23 |     stdout_handler.setFormatter(formatter)
24 |     logger.addHandler(stdout_handler)
25 |     if use_error_log:
26 |         stderr_handler = logging.StreamHandler(sys.stderr)
27 |         stderr_handler.setLevel(logging.WARNING)
28 |         stderr_handler.setFormatter(formatter)
29 |         logger.addHandler(stderr_handler)
30 |     logger.propagate = False
31 |     return logger
32 | 
33 | 
34 | VERBOSITY_LEVEL = 'INFO'
35 | LOGGER = get_logger(VERBOSITY_LEVEL, __file__)
36 | METHOD_LIST = ['train_predict']
37 | 
38 | 
39 | def _check_umodel_methed(umodel):
40 |     # Check if the model has methods in METHOD_LIST
41 |     for attr in ['train_predict']:
42 |         if not hasattr(umodel, attr):
43 |             raise ModelApiError(
44 |                 f"Your model object doesn't have the method attr")
45 | 
46 | 
47 | def import_umodel():
48 |     """import user model"""
49 |     model_cls = importlib.import_module('model').Model
50 |     _check_umodel_methed(model_cls)
51 | 
52 |     return model_cls
53 | 
54 | 
55 | def init_usermodel():
56 |     """initialize user model"""
57 |     return import_umodel()()
58 | 


--------------------------------------------------------------------------------
/ingestion/dataset.py:
--------------------------------------------------------------------------------
  1 | """
  2 |   AutoWSL datasets.
  3 | """
  4 | import copy
  5 | from os.path import join
  6 | from datetime import datetime
  7 | import numpy as np
  8 | import pandas as pd
  9 | import yaml
 10 | from common import get_logger
 11 | 
 12 | TYPE_MAP = {
 13 |     'cat': str,
 14 |     'multi-cat': str,
 15 |     'str': str,
 16 |     'num': np.float64,
 17 |     'timestamp': 'str'
 18 | }
 19 | 
 20 | VERBOSITY_LEVEL = 'WARNING'
 21 | LOGGER = get_logger(VERBOSITY_LEVEL, __file__)
 22 | TIMESTAMP_TYPE_NAME = 'timestamp'
 23 | TRAIN_FILE = 'train_node_id.txt'
 24 | TRAIN_LABEL = 'train_label.tsv'
 25 | TEST_FILE = 'test_node_id.txt'
 26 | INFO_FILE = 'config.yml'
 27 | FEA_TABLE = 'feature.tsv'
 28 | EDGE_FILE = 'edge.tsv'
 29 | 
 30 | SEP = '\t'
 31 | 
 32 | 
 33 | def _date_parser(millisecs):
 34 |     if np.isnan(float(millisecs)):
 35 |         return millisecs
 36 | 
 37 |     return datetime.fromtimestamp(float(millisecs))
 38 | 
 39 | 
 40 | class Dataset:
 41 |     """"Dataset"""
 42 |     def __init__(self, dataset_dir):
 43 |         """
 44 |             train_dataset, test_dataset: list of strings
 45 |             train_label: np.array
 46 |         """
 47 |         self.dataset_dir_ = dataset_dir
 48 |         self.metadata_ = self._read_metadata(join(dataset_dir, INFO_FILE))
 49 |         self.edge_data = None
 50 |         self.train_indices = None
 51 |         self.train_label = None
 52 |         self.test_indices = None
 53 |         self.fea_table = None
 54 |         self.get_data()
 55 | 
 56 |     def get_data(self):
 57 |         """get all training data"""
 58 |         data = {
 59 |             'fea_table': self.get_fea_table(),
 60 |             'edge_file': self.get_edge(),
 61 |             'train_indices': self.get_train_indices(),
 62 |             'test_indices': self.get_test_indices(),
 63 |             'train_label': self.get_train_label(),
 64 |         }
 65 |         return data
 66 | 
 67 |     def get_fea_table(self):
 68 |         """get train"""
 69 |         if self.fea_table is None:
 70 |             self.fea_table = self._read_dataset(
 71 |                 join(self.dataset_dir_, FEA_TABLE))
 72 |         return self.fea_table
 73 | 
 74 |     def get_edge(self):
 75 |         """get edge file"""
 76 |         dtype = {
 77 |             'src_id': int,
 78 |             'dst_idx': int,
 79 |             'edge_weight': float
 80 |         }
 81 |         if self.edge_data is None:
 82 |             self.edge_data = pd.read_csv(
 83 |                 join(self.dataset_dir_, EDGE_FILE), dtype=dtype, sep=SEP)
 84 |         return self.edge_data
 85 | 
 86 |     def get_train_label(self):
 87 |         """get train label"""
 88 |         dtype = {
 89 |             'node_index': int,
 90 |             'label': int,
 91 |         }
 92 |         if self.train_label is None:
 93 |             self.train_label = pd.read_csv(
 94 |                 join(self.dataset_dir_, TRAIN_LABEL), dtype=dtype, sep=SEP)
 95 | 
 96 |         return self.train_label
 97 | 
 98 |     def get_test_indices(self):
 99 |         """get test index file"""
100 |         if self.test_indices is None:
101 |             with open(join(self.dataset_dir_, TEST_FILE), 'r') as ftmp:
102 |                 self.test_indices = [int(line.strip()) for line in ftmp]
103 | 
104 |         return self.test_indices
105 | 
106 |     def get_train_indices(self):
107 |         """get train index file"""
108 |         if self.train_indices is None:
109 |             with open(join(self.dataset_dir_, TRAIN_FILE), 'r') as ftmp:
110 |                 self.train_indices = [int(line.strip()) for line in ftmp]
111 | 
112 |         return self.train_indices
113 | 
114 |     def get_metadata(self):
115 |         """get metadata"""
116 |         return copy.deepcopy(self.metadata_)
117 | 
118 |     @staticmethod
119 |     def _read_metadata(metadata_path):
120 |         with open(metadata_path, 'r') as ftmp:
121 |             return yaml.safe_load(ftmp)
122 | 
123 |     def _read_dataset(self, dataset_path):
124 |         schema = self.metadata_['schema']
125 |         if isinstance(schema, dict):
126 |             table_dtype = {key: TYPE_MAP[val] for key, val in schema.items()}
127 |             date_list = [key for key, val in schema.items()
128 |                          if val == TIMESTAMP_TYPE_NAME]
129 |             dataset = pd.read_csv(
130 |                 dataset_path, sep=SEP, dtype=table_dtype,
131 |                 parse_dates=date_list, date_parser=_date_parser)
132 |         else:
133 |             dataset = pd.read_csv(dataset_path, sep=SEP)
134 | 
135 |         return dataset
136 | 


--------------------------------------------------------------------------------
/ingestion/ingestion.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=logging-fstring-interpolation, broad-except
  2 | """ingestion program for autoWSL"""
  3 | import os
  4 | from os.path import join
  5 | import sys
  6 | from sys import path
  7 | import argparse
  8 | import time
  9 | import pandas as pd
 10 | import yaml
 11 | from filelock import FileLock
 12 | 
 13 | from common import get_logger, init_usermodel
 14 | 
 15 | import timing
 16 | from timing import Timer
 17 | from dataset import Dataset
 18 | 
 19 | 
 20 | # Verbosity level of logging:
 21 | # Can be: NOTSET, DEBUG, INFO, WARNING, ERROR, CRITICAL
 22 | VERBOSITY_LEVEL = 'INFO'
 23 | LOGGER = get_logger(VERBOSITY_LEVEL, __file__)
 24 | 
 25 | 
 26 | def _here(*args):
 27 |     """Helper function for getting the current directory of this script."""
 28 |     here = os.path.dirname(os.path.realpath(__file__))
 29 |     return os.path.abspath(os.path.join(here, *args))
 30 | 
 31 | 
 32 | def write_start_file(output_dir):
 33 |     """write start file"""
 34 |     start_filepath = os.path.join(output_dir, 'start.txt')
 35 |     lockfile = os.path.join(output_dir, 'start.txt.lock')
 36 |     ingestion_pid = os.getpid()
 37 | 
 38 |     with FileLock(lockfile):
 39 |         with open(start_filepath, 'w') as ftmp:
 40 |             ftmp.write(str(ingestion_pid))
 41 | 
 42 |     LOGGER.info('===== Finished writing "start.txt" file.')
 43 | 
 44 | 
 45 | class IngestionError(RuntimeError):
 46 |     """Model api error"""
 47 | 
 48 | 
 49 | def _parse_args():
 50 |     root_dir = _here(os.pardir)
 51 |     default_dataset_dir = join(root_dir, "sample_data")
 52 |     default_output_dir = join(root_dir, "sample_result_submission")
 53 |     default_ingestion_program_dir = join(root_dir, "ingestion_program")
 54 |     default_code_dir = join(root_dir, "code_submission")
 55 |     default_score_dir = join(root_dir, "scoring_output")
 56 |     default_temp_dir = join(root_dir, 'temp_output')
 57 |     parser = argparse.ArgumentParser()
 58 |     parser.add_argument('--dataset_dir', type=str,
 59 |                         default=default_dataset_dir,
 60 |                         help="Directory storing the dataset (containing "
 61 |                              "e.g. adult.data/)")
 62 |     parser.add_argument('--output_dir', type=str,
 63 |                         default=default_output_dir,
 64 |                         help="Directory storing the predictions. It will "
 65 |                              "contain e.g. [start.txt, predictions, end.yaml]"
 66 |                              "when ingestion terminates.")
 67 |     parser.add_argument('--ingestion_program_dir', type=str,
 68 |                         default=default_ingestion_program_dir,
 69 |                         help="Directory storing the ingestion program "
 70 |                              "`ingestion.py` and other necessary packages.")
 71 |     parser.add_argument('--code_dir', type=str,
 72 |                         default=default_code_dir,
 73 |                         help="Directory storing the submission code "
 74 |                              "`model.py` and other necessary packages.")
 75 |     parser.add_argument('--score_dir', type=str,
 76 |                         default=default_score_dir,
 77 |                         help="Directory storing the scoring output "
 78 |                              "e.g. `scores.txt` and `detailed_results.html`.")
 79 |     parser.add_argument('--temp_dir', type=str,
 80 |                         default=default_temp_dir,
 81 |                         help="Directory storing the temporary output."
 82 |                              "e.g. save the participants` model after "
 83 |                              "trainning.")
 84 | 
 85 |     args = parser.parse_args()
 86 |     LOGGER.debug(f'Parsed args are: {args}')
 87 |     LOGGER.debug("-" * 50)
 88 |     if (args.dataset_dir.endswith('run/input') and
 89 |             args.code_dir.endswith('run/program')):
 90 |         LOGGER.debug("Since dataset_dir ends with 'run/input' and code_dir "
 91 |                      "ends with 'run/program', suppose running on "
 92 |                      "CodaLab platform. Modify dataset_dir to 'run/input_data'"
 93 |                      " and code_dir to 'run/submission'. "
 94 |                      "Directory parsing should be more flexible in the code of"
 95 |                      " compute worker: we need explicit directories for "
 96 |                      "dataset_dir and code_dir.")
 97 | 
 98 |         args.dataset_dir = args.dataset_dir.replace(
 99 |             'run/input', 'run/input_data')
100 |         args.code_dir = args.code_dir.replace(
101 |             'run/program', 'run/submission')
102 | 
103 |         # Show directories for debugging
104 |         LOGGER.debug(f"sys.argv = {sys.argv}")
105 |         LOGGER.debug(f"Using dataset_dir: {args.dataset_dir}")
106 |         LOGGER.debug(f"Using output_dir: {args.output_dir}")
107 |         LOGGER.debug(
108 |             f"Using ingestion_program_dir: {args.ingestion_program_dir}")
109 |         LOGGER.debug(f"Using code_dir: {args.code_dir}")
110 |     return args
111 | 
112 | 
113 | def _init_python_path(args):
114 |     path.append(args.ingestion_program_dir)
115 |     path.append(args.code_dir)
116 |     os.makedirs(args.output_dir, exist_ok=True)
117 |     os.makedirs(args.temp_dir, exist_ok=True)
118 | 
119 | 
120 | def _train_predict(umodel, dataset, timer, n_class, schema):
121 |     # Train the model
122 |     data = dataset.get_data()
123 | 
124 |     with timer.time_limit('train_predict'):
125 |         predictions = umodel.train_predict(
126 |             data, timer.get_all_remain()['train_predict'], n_class, schema)
127 | 
128 |     return predictions
129 | 
130 | 
131 | def _finalize(args, timer):
132 |     # Finishing ingestion program
133 |     end_time = time.time()
134 | 
135 |     time_stats = timer.get_all_stats()
136 |     for pname, stats in time_stats.items():
137 |         for stat_name, val in stats.items():
138 |             LOGGER.info(f'the {stat_name} of duration in {pname}: {val} sec')
139 | 
140 |     overall_time_spent = timer.get_overall_duration()
141 | 
142 |     # Write overall_time_spent to a end.yaml file
143 |     end_filename = 'end.yaml'
144 |     content = {
145 |         'ingestion_duration': overall_time_spent,
146 |         'time_stats': time_stats,
147 |         'end_time': end_time}
148 | 
149 |     with open(join(args.output_dir, end_filename), 'w') as ftmp:
150 |         yaml.dump(content, ftmp)
151 |         LOGGER.info(
152 |             f'Wrote the file {end_filename} marking the end of ingestion.')
153 | 
154 |         LOGGER.info("[+] Done. Ingestion program successfully terminated.")
155 |         LOGGER.info(f"[+] Overall time spent {overall_time_spent:5.2} sec")
156 | 
157 |     # Copy all files in output_dir to score_dir
158 |     os.system(
159 |         f"cp -R {os.path.join(args.output_dir, '*')} {args.score_dir}")
160 |     LOGGER.debug(
161 |         "Copied all ingestion output to scoring output directory.")
162 | 
163 |     LOGGER.info("[Ingestion terminated]")
164 | 
165 | 
166 | def _write_predict(output_dir, prediction):
167 |     """prediction should be list"""
168 |     os.makedirs(output_dir, exist_ok=True)
169 |     prediction = pd.Series(prediction, name='label')
170 |     LOGGER.debug(f'prediction shape: {prediction.shape}')
171 |     prediction.to_csv(
172 |         join(output_dir, 'predictions'), index=False, header=True)
173 | 
174 | 
175 | def _init_timer(time_budgets):
176 |     timer = Timer()
177 |     timer.add_process('train_predict', time_budgets, timing.RESET)
178 |     LOGGER.debug(
179 |         f"init time budget of train_predict: {time_budgets} "
180 |         f"mode: {timing.RESET}")
181 |     return timer
182 | 
183 | 
184 | def main():
185 |     """main entry"""
186 |     LOGGER.info('===== Start ingestion program.')
187 |     # Parse directories from input arguments
188 |     LOGGER.info('===== Initialize args.')
189 |     args = _parse_args()
190 | 
191 |     _init_python_path(args)
192 | 
193 |     write_start_file(args.output_dir)
194 | 
195 |     LOGGER.info('===== Load data.')
196 |     dataset = Dataset(args.dataset_dir)
197 |     time_budget = dataset.get_metadata().get("time_budget")
198 |     n_class = dataset.get_metadata().get("n_class")
199 |     schema = dataset.get_metadata().get("schema")
200 | 
201 |     LOGGER.info(f"Time budget: {time_budget}")
202 | 
203 |     LOGGER.info("===== import user model")
204 |     umodel = init_usermodel()
205 | 
206 |     LOGGER.info("===== Begin training user model")
207 |     timer = _init_timer(time_budget)
208 |     predictions = _train_predict(umodel, dataset, timer, n_class, schema)
209 |     _write_predict(args.output_dir, predictions)
210 | 
211 |     _finalize(args, timer)
212 | 
213 | 
214 | if __name__ == "__main__":
215 |     main()
216 | 


--------------------------------------------------------------------------------
/ingestion/metadata:
--------------------------------------------------------------------------------
1 | command: python $ingestion_program/ingestion.py --dataset_dir=$input --output_dir=$predictions --ingestion_program_dir=$ingestion_program --code_dir=$submission_program --score_dir=$output --temp_dir=$tmp
2 | 


--------------------------------------------------------------------------------
/ingestion/timing.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=logging-fstring-interpolation, broad-except
  2 | """common"""
  3 | import signal
  4 | import math
  5 | import time
  6 | from contextlib import contextmanager
  7 | import numpy as np
  8 | import yaml
  9 | from common import get_logger
 10 | 
 11 | VERBOSITY_LEVEL = 'INFO'
 12 | LOGGER = get_logger(VERBOSITY_LEVEL, __file__)
 13 | 
 14 | CUM = 0
 15 | RESET = 1
 16 | MODES = set([CUM, RESET])
 17 | 
 18 | 
 19 | OP_MAP = {
 20 |     'mean': np.mean,
 21 |     'max': np.max,
 22 |     'std': np.std,
 23 |     'sum': sum,
 24 | }
 25 | 
 26 | 
 27 | class TimeoutException(Exception):
 28 |     """timeoutexception"""
 29 | 
 30 | 
 31 | class Timer:
 32 |     """timer"""
 33 |     def __init__(self):
 34 |         self.total = {}
 35 |         self.history = {}
 36 |         self.modes = {}
 37 | 
 38 |     @classmethod
 39 |     def from_file(cls, save_file):
 40 |         """contruct timer from a save file"""
 41 |         timer = Timer()
 42 |         timer.load(save_file)
 43 |         return timer
 44 | 
 45 |     def add_process(self, pname, time_budget, mode=RESET):
 46 |         """set time_budget
 47 |         mode: CUM/RESET
 48 |         """
 49 |         if pname in self.total:
 50 |             raise ValueError(f"Existing process of timer: {pname}")
 51 |         if mode not in MODES:
 52 |             raise ValueError(f"wrong process mode: {mode}")
 53 | 
 54 |         self.total[pname] = time_budget
 55 |         self.history[pname] = []
 56 |         self.modes[pname] = mode
 57 | 
 58 |     @contextmanager
 59 |     def time_limit(self, pname, verbose=True):
 60 |         """limit time"""
 61 |         def signal_handler(signum, frame):
 62 |             raise TimeoutException(f"{pname}: Timed out!")
 63 |         signal.signal(signal.SIGALRM, signal_handler)
 64 |         time_budget = int(math.ceil(self.get_remain(pname)))
 65 |         signal.alarm(time_budget)
 66 |         start_time = time.time()
 67 | 
 68 |         try:
 69 | 
 70 |             if verbose:
 71 |                 LOGGER.info(f'start {pname} with time budget {time_budget}')
 72 |             yield
 73 |         finally:
 74 |             exec_time = time.time() - start_time
 75 |             signal.alarm(0)
 76 |             self.history[pname].append(exec_time)
 77 | 
 78 |         if verbose:
 79 |             LOGGER.info(f'{pname} success, time spent {exec_time} sec')
 80 | 
 81 |         if self.get_remain(pname) <= 0:
 82 |             raise TimeoutException(f"{pname}: Timed out!")
 83 | 
 84 |     def get_remain(self, pname):
 85 |         """get remaining time of process"""
 86 |         if self.modes[pname] == CUM:
 87 |             remain = self.total[pname] - sum(self.history[pname])
 88 |         else:
 89 |             remain = self.total[pname]
 90 | 
 91 |         return remain
 92 | 
 93 |     def get_all_remain(self):
 94 |         """get remaining time of process"""
 95 |         return {key: self.get_remain(key) for key in self.total.keys()}
 96 | 
 97 |     def get_stats(self, pname):
 98 |         """get stats of timing history"""
 99 |         result = {}
100 |         for stat in ['sum', 'mean', 'max', 'std']:
101 |             history = self.history[pname]
102 |             if history:
103 |                 result[stat] = float(OP_MAP[stat](self.history[pname]))
104 |             else:
105 |                 result[stat] = 0
106 |         return result
107 | 
108 |     def get_overall_duration(self):
109 |         """get overall duration"""
110 |         duration = 0
111 |         for _, value in self.history.items():
112 |             duration += sum(value)
113 |         return duration
114 | 
115 |     def get_all_stats(self):
116 |         """get all stats of timing history"""
117 |         stats = {pname: self.get_stats(pname) for pname in self.total.keys()}
118 |         return stats
119 | 
120 |     def save(self, save_file):
121 |         """save timer"""
122 |         save_content = {
123 |             'total': self.total,
124 |             'history': self.history,
125 |             'modes': self.modes
126 |         }
127 |         with open(save_file, 'w') as ftmp:
128 |             yaml.dump(save_content, ftmp)
129 | 
130 |     def load(self, save_file):
131 |         """load timer"""
132 |         with open(save_file, 'r') as ftmp:
133 |             save_content = yaml.safe_load(ftmp)
134 |         self.total = save_content['total']
135 |         self.history = save_content['history']
136 |         self.modes = save_content['modes']
137 | 


--------------------------------------------------------------------------------
/run_local_test.py:
--------------------------------------------------------------------------------
  1 | """run local test in starting kit"""
  2 | # pylint: disable=logging-fstring-interpolation
  3 | 
  4 | import argparse
  5 | import logging
  6 | import os
  7 | from os.path import join, isdir
  8 | import shutil
  9 | from multiprocessing import Process
 10 | 
 11 | VERBOSITY_LEVEL = 'WARNING'
 12 | 
 13 | logging.basicConfig(
 14 |     level=getattr(logging, VERBOSITY_LEVEL),
 15 |     format='%(asctime)s %(levelname)s %(filename)s: %(message)s',
 16 |     datefmt='%Y-%m-%d %H:%M:%S'
 17 | )
 18 | 
 19 | 
 20 | def _here(*args):
 21 |     here = os.path.dirname(os.path.realpath(__file__))
 22 |     return os.path.join(here, *args)
 23 | 
 24 | 
 25 | def _ingestion_program(starting_kit_dir):
 26 |     return join(starting_kit_dir, 'ingestion', 'ingestion.py')
 27 | 
 28 | 
 29 | def _scoring_program(starting_kit_dir):
 30 |     return join(starting_kit_dir, 'scoring', 'score.py')
 31 | 
 32 | 
 33 | def remove_dir(output_dir):
 34 |     """Remove the directory `output_dir`.
 35 |   This aims to clean existing output of last run of local test.
 36 |   """
 37 |     if isdir(output_dir):
 38 |         logging.info(
 39 |             f"Cleaning existing output directory of last run: {output_dir}")
 40 |         shutil.rmtree(output_dir)
 41 | 
 42 | 
 43 | def _clean(starting_kit_dir):
 44 |     ingestion_output_dir = join(starting_kit_dir, 'sample_result_submission')
 45 |     score_dir = os.path.join(starting_kit_dir, 'scoring_output')
 46 |     remove_dir(ingestion_output_dir)
 47 |     remove_dir(score_dir)
 48 | 
 49 | 
 50 | def run(dataset_dir, code_dir):
 51 |     """run"""
 52 |     # Current directory containing this script
 53 |     starting_kit_dir = _here()
 54 |     path_ingestion = _ingestion_program(starting_kit_dir)
 55 |     path_scoring = _scoring_program(starting_kit_dir)
 56 | 
 57 |     # Run ingestion and scoring at the same time
 58 |     command_ingestion = (
 59 |         'python '
 60 |         #  f'{path_ingestion} --dataset_dir={dataset_dir}/data '
 61 |         f'{path_ingestion} --dataset_dir={dataset_dir}/train.data'
 62 |         f' --code_dir={code_dir}')
 63 | 
 64 |     command_scoring = (
 65 |         #  f'python {path_scoring} --solution_dir={dataset_dir}/solution')
 66 |         f'python {path_scoring} --solution_dir={dataset_dir}')
 67 | 
 68 |     def run_ingestion():
 69 |         os.system(command_ingestion)
 70 | 
 71 |     def run_scoring():
 72 |         os.system(command_scoring)
 73 | 
 74 |     ingestion_process = Process(name='ingestion', target=run_ingestion)
 75 |     scoring_process = Process(name='scoring', target=run_scoring)
 76 |     _clean(starting_kit_dir)
 77 | 
 78 |     ingestion_process.start()
 79 |     scoring_process.start()
 80 | 
 81 | 
 82 | def _parse_args():
 83 |     default_starting_kit_dir = _here()
 84 |     default_dataset_dir = join(default_starting_kit_dir, 'data', 'demo')
 85 |     default_code_dir = join(default_starting_kit_dir, 'code_submission')
 86 | 
 87 |     parser = argparse.ArgumentParser()
 88 |     parser.add_argument('--dataset_dir', type=str,
 89 |                         default=default_dataset_dir,
 90 |                         help="Directory storing the dataset, should contain"
 91 |                              "'data' and 'solution'")
 92 | 
 93 |     parser.add_argument('--code_dir', type=str,
 94 |                         default=default_code_dir,
 95 |                         help="Directory storing the submission code "
 96 |                              "`model.py` and other necessary packages.")
 97 | 
 98 |     args = parser.parse_args()
 99 |     return args
100 | 
101 | 
102 | def main():
103 |     """main entry"""
104 |     args = _parse_args()
105 |     dataset_dir = args.dataset_dir
106 |     code_dir = args.code_dir
107 |     logging.info("#" * 50)
108 |     logging.info("Begin running local test using")
109 |     logging.info(f"code_dir = {code_dir}")
110 |     logging.info(f"dataset_dir = {dataset_dir}")
111 |     logging.info("#" * 50)
112 |     run(dataset_dir, code_dir)
113 | 
114 | 
115 | if __name__ == '__main__':
116 |     main()
117 | 


--------------------------------------------------------------------------------
/scoring/metadata:
--------------------------------------------------------------------------------
1 | command: python $program/score.py --solution_dir=$hidden --prediction_dir=$predictions --score_dir=$output
2 | description: Compute scores for the competition
3 | 


--------------------------------------------------------------------------------
/scoring/score.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=logging-fstring-interpolation
  2 | """scoring function for autograph"""
  3 | 
  4 | import argparse
  5 | import datetime
  6 | import os
  7 | from os.path import join
  8 | import logging
  9 | import sys
 10 | import time
 11 | 
 12 | import yaml
 13 | import pandas as pd
 14 | from sklearn.metrics import accuracy_score
 15 | 
 16 | from filelock import FileLock
 17 | 
 18 | # Verbosity level of logging.
 19 | # Can be: NOTSET, DEBUG, INFO, WARNING, ERROR, CRITICAL
 20 | #  VERBOSITY_LEVEL = 'INFO'
 21 | VERBOSITY_LEVEL = 'INFO'
 22 | WAIT_TIME = 30
 23 | MAX_TIME_DIFF = datetime.timedelta(seconds=600)
 24 | DEFAULT_SCORE = -1
 25 | SOLUTION_FILE = 'test_label.tsv'
 26 | 
 27 | 
 28 | def get_logger(verbosity_level, use_error_log=False):
 29 |     """Set logging format to something like:
 30 |         2019-04-25 12:52:51,924 INFO score.py: <message>
 31 |     """
 32 |     logger = logging.getLogger(__file__)
 33 |     logging_level = getattr(logging, verbosity_level)
 34 |     logger.setLevel(logging_level)
 35 |     formatter = logging.Formatter(
 36 |         fmt='%(asctime)s %(levelname)s %(filename)s: %(message)s')
 37 |     stdout_handler = logging.StreamHandler(sys.stdout)
 38 |     stdout_handler.setLevel(logging_level)
 39 |     stdout_handler.setFormatter(formatter)
 40 |     logger.addHandler(stdout_handler)
 41 |     if use_error_log:
 42 |         stderr_handler = logging.StreamHandler(sys.stderr)
 43 |         stderr_handler.setLevel(logging.WARNING)
 44 |         stderr_handler.setFormatter(formatter)
 45 |         logger.addHandler(stderr_handler)
 46 |     logger.propagate = False
 47 |     return logger
 48 | 
 49 | 
 50 | LOGGER = get_logger(VERBOSITY_LEVEL)
 51 | 
 52 | 
 53 | def _here(*args):
 54 |     """Helper function for getting the current directory of the script."""
 55 |     here_dir = os.path.dirname(os.path.realpath(__file__))
 56 |     return os.path.abspath(join(here_dir, *args))
 57 | 
 58 | 
 59 | def _get_solution(solution_dir):
 60 |     """Get the solution array from solution directory."""
 61 |     solution_file = join(solution_dir, SOLUTION_FILE)
 62 |     solution = pd.read_csv(solution_file, sep='\t')
 63 |     return solution
 64 | 
 65 | 
 66 | def _get_prediction(prediction_dir):
 67 |     pred_file = join(prediction_dir, 'predictions')
 68 |     return pd.read_csv(pred_file)['label']
 69 | 
 70 | 
 71 | def _get_score(solution_dir, prediction_dir):
 72 |     """get score"""
 73 |     LOGGER.info('===== get solution')
 74 |     solution = _get_solution(solution_dir)['label']
 75 |     LOGGER.info('===== read prediction')
 76 |     prediction = _get_prediction(prediction_dir)
 77 |     if solution.shape != prediction.shape:
 78 |         raise ValueError(f"Bad prediction shape: {prediction.shape}. "
 79 |                          f"Expected shape: {solution.shape}")
 80 | 
 81 |     LOGGER.info('===== calculate score')
 82 |     LOGGER.debug(f'solution shape = {solution.shape}')
 83 |     LOGGER.debug(f'prediction shape = {prediction.shape}')
 84 |     score = accuracy_score(solution, prediction)
 85 | 
 86 |     return score
 87 | 
 88 | 
 89 | def _update_score(args, duration):
 90 |     score = _get_score(solution_dir=args.solution_dir,
 91 |                        prediction_dir=args.prediction_dir)
 92 |     # Update learning curve page (detailed_results.html)
 93 |     _write_scores_html(args.score_dir)
 94 |     # Write score
 95 |     LOGGER.info('===== write score')
 96 |     write_score(args.score_dir, score, duration)
 97 |     LOGGER.info(f"accuracy: {score:.4}")
 98 |     return score
 99 | 
100 | 
101 | def _init_scores_html(detailed_results_filepath):
102 |     html_head = ('<html><head> <meta http-equiv="refresh" content="5"> '
103 |                  '</head><body><pre>')
104 |     html_end = '</pre></body></html>'
105 |     with open(detailed_results_filepath, 'a') as html_file:
106 |         html_file.write(html_head)
107 |         html_file.write("Starting training process... <br> Please be patient. "
108 |                         "Learning curves will be generated when first "
109 |                         "predictions are made.")
110 |         html_file.write(html_end)
111 | 
112 | 
113 | def _write_scores_html(score_dir, auto_refresh=True, append=False):
114 |     filename = 'detailed_results.html'
115 |     if auto_refresh:
116 |         html_head = ('<html><head> <meta http-equiv="refresh" content="5"> '
117 |                      '</head><body><pre>')
118 |     else:
119 |         html_head = """<html><body><pre>"""
120 |     html_end = '</pre></body></html>'
121 |     if append:
122 |         mode = 'a'
123 |     else:
124 |         mode = 'w'
125 |     filepath = join(score_dir, filename)
126 |     with open(filepath, mode) as html_file:
127 |         html_file.write(html_head)
128 |         html_file.write(html_end)
129 |     LOGGER.debug(f"Wrote learning curve page to {filepath}")
130 | 
131 | 
132 | def write_score(score_dir, score, duration):
133 |     """Write score and duration to score_dir/scores.txt"""
134 |     score_filename = join(score_dir, 'scores.txt')
135 |     with open(score_filename, 'w') as ftmp:
136 |         ftmp.write(f'score: {score}\n')
137 |         ftmp.write(f'Duration: {duration}\n')
138 |     LOGGER.debug(f"Wrote to score_filename={score_filename} with "
139 |                  f"score={score}, duration={duration}")
140 | 
141 | 
142 | class IngestionError(Exception):
143 |     """Ingestion error"""
144 | 
145 | 
146 | class ScoringError(Exception):
147 |     """scoring error"""
148 | 
149 | 
150 | def get_ingestion_info(prediction_dir):
151 |     """get ingestion information"""
152 |     ingestion_info = None
153 |     endfile_path = os.path.join(prediction_dir, 'end.yaml')
154 | 
155 |     if not os.path.isfile(endfile_path):
156 |         raise IngestionError("[-] No end.yaml exist, ingestion failed")
157 | 
158 |     LOGGER.info('===== Detected end.yaml file, get ingestion information')
159 |     with open(endfile_path, 'r') as ftmp:
160 |         ingestion_info = yaml.safe_load(ftmp)
161 | 
162 |     return ingestion_info
163 | 
164 | 
165 | def get_ingestion_pid(prediction_dir):
166 |     """get ingestion pid"""
167 |     # Wait 60 seconds for ingestion to start and write 'start.txt',
168 |     # Otherwise, raise an exception.
169 |     wait_time = 60
170 |     startfile = os.path.join(prediction_dir, 'start.txt')
171 |     lockfile = os.path.join(prediction_dir, 'start.txt.lock')
172 | 
173 |     for i in range(wait_time):
174 |         if os.path.exists(startfile):
175 |             with FileLock(lockfile):
176 |                 with open(startfile, 'r') as ftmp:
177 |                     ingestion_pid = ftmp.read()
178 |                     LOGGER.info(
179 |                         f'Detected the start of ingestion after {i} seconds.')
180 |                     return int(ingestion_pid)
181 |         else:
182 |             time.sleep(1)
183 |     raise IngestionError(f'[-] Failed: scoring didn\'t detected the start of'
184 |                          'ingestion after {wait_time} seconds.')
185 | 
186 | 
187 | def is_process_alive(ingestion_pid):
188 |     """detect ingestion alive"""
189 |     try:
190 |         os.kill(ingestion_pid, 0)
191 |     except OSError:
192 |         return False
193 |     else:
194 |         return True
195 | 
196 | 
197 | def _parse_args():
198 |     # Default I/O directories:
199 |     root_dir = _here(os.pardir)
200 |     default_solution_dir = join(root_dir, "sample_data")
201 |     default_prediction_dir = join(root_dir, "sample_result_submission")
202 |     default_score_dir = join(root_dir, "scoring_output")
203 |     parser = argparse.ArgumentParser()
204 |     parser.add_argument('--solution_dir', type=str,
205 |                         default=default_solution_dir,
206 |                         help=("Directory storing the solution with true "
207 |                               "labels, e.g. adult.solution."))
208 |     parser.add_argument('--prediction_dir', type=str,
209 |                         default=default_prediction_dir,
210 |                         help=("Directory storing the predictions. It should"
211 |                               "contain e.g. [start.txt, adult.predict_0, "
212 |                               "adult.predict_1, ..., end.yaml]."))
213 |     parser.add_argument('--score_dir', type=str,
214 |                         default=default_score_dir,
215 |                         help=("Directory storing the scoring output e.g. "
216 |                               "`scores.txt` and `detailed_results.html`."))
217 |     args = parser.parse_args()
218 |     LOGGER.debug(f"Parsed args are: {args}")
219 |     LOGGER.debug("-" * 50)
220 |     LOGGER.debug(f"Using solution_dir: {args.solution_dir}")
221 |     LOGGER.debug(f"Using prediction_dir: {args.prediction_dir}")
222 |     LOGGER.debug(f"Using score_dir: {args.score_dir}")
223 |     return args
224 | 
225 | 
226 | def _init(args):
227 |     if not os.path.isdir(args.score_dir):
228 |         os.mkdir(args.score_dir)
229 |     detailed_results_filepath = join(
230 |         args.score_dir, 'detailed_results.html')
231 |     # Initialize detailed_results.html
232 |     _init_scores_html(detailed_results_filepath)
233 | 
234 | 
235 | def _finalize(score, scoring_start):
236 |     """finalize the scoring"""
237 |     # Use 'end.yaml' file to detect if ingestion program ends
238 |     duration = time.time() - scoring_start
239 |     LOGGER.info(
240 |         "[+] Successfully finished scoring! "
241 |         f"Scoring duration: {duration:.2} sec. "
242 |         f"The score of your algorithm on the task is: {score:.6}.")
243 | 
244 |     LOGGER.info("[Scoring terminated]")
245 | 
246 | 
247 | def main():
248 |     """main entry"""
249 |     scoring_start = time.time()
250 |     LOGGER.info('===== init scoring program')
251 |     args = _parse_args()
252 |     _init(args)
253 |     score = DEFAULT_SCORE
254 | 
255 |     ingestion_pid = get_ingestion_pid(args.prediction_dir)
256 | 
257 |     LOGGER.info("===== wait for the exit of ingestion.")
258 |     while is_process_alive(ingestion_pid):
259 |         time.sleep(1)
260 | 
261 |     # Compute/write score
262 |     ingestion_info = get_ingestion_info(args.prediction_dir)
263 |     duration = ingestion_info['ingestion_duration']
264 |     score = _update_score(args, duration)
265 | 
266 |     _finalize(score, scoring_start)
267 | 
268 | 
269 | if __name__ == "__main__":
270 |     main()
271 | 


--------------------------------------------------------------------------------