├── .flake8
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── SECURITY.md
├── data
    └── qm9
    │   ├── LICENSE
    │   ├── test.jsonl.gz
    │   ├── train.jsonl.gz
    │   └── valid.jsonl.gz
├── gnns
    ├── __init__.py
    ├── ggnn.py
    ├── gnn_edge_mlp.py
    ├── gnn_film.py
    ├── rgat.py
    ├── rgcn.py
    ├── rgdcn.py
    └── rgin.py
├── models
    ├── __init__.py
    ├── ggnn_model.py
    ├── gnn_edge_mlp_model.py
    ├── gnn_film_model.py
    ├── rgat_model.py
    ├── rgcn_model.py
    ├── rgdcn_model.py
    ├── rgin_model.py
    └── sparse_graph_model.py
├── reorg_varmisuse_data.sh
├── requirements.txt
├── run_ppi_benchs.py
├── run_qm9_benchs.py
├── run_varmisuse_benchs.py
├── tasks
    ├── __init__.py
    ├── citation_network_task.py
    ├── default_hypers
    │   ├── PPI_GGNN.json
    │   ├── PPI_GNN-Edge-MLP0.json
    │   ├── PPI_GNN-Edge-MLP1.json
    │   ├── PPI_GNN-FiLM.json
    │   ├── PPI_RGAT.json
    │   ├── PPI_RGCN.json
    │   ├── PPI_RGIN.json
    │   ├── QM9_GGNN.json
    │   ├── QM9_GNN-Edge-MLP0.json
    │   ├── QM9_GNN-Edge-MLP1.json
    │   ├── QM9_GNN-FiLM.json
    │   ├── QM9_RGAT.json
    │   ├── QM9_RGCN.json
    │   ├── QM9_RGIN.json
    │   ├── VarMisuse_GGNN.json
    │   ├── VarMisuse_GNN-Edge-MLP0.json
    │   ├── VarMisuse_GNN-Edge-MLP1.json
    │   ├── VarMisuse_GNN-FiLM.json
    │   ├── VarMisuse_RGAT.json
    │   ├── VarMisuse_RGCN.json
    │   └── VarMisuse_RGIN.json
    ├── ppi_task.py
    ├── qm9_task.py
    ├── sparse_graph_task.py
    └── varmisuse_task.py
├── test.py
├── train.py
└── utils
    ├── __init__.py
    ├── citation_network_utils.py
    ├── model_utils.py
    ├── utils.py
    └── varmisuse_data_splitter.py


/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 150
3 | ignore =
4 |     # W605: invalid escape sequence -- triggered by pseudo-LaTeX in comments
5 |     W605,


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 
103 | molecules_*.json
104 | data/*


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | ## CONTRIBUTING
 2 | 
 3 | This project welcomes contributions and suggestions.  Most contributions require you to agree to a
 4 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
 5 | the rights to use your contribution. For details, visit https://cla.microsoft.com.
 6 | 
 7 | When you submit a pull request, a CLA-bot will automatically determine whether you need to provide
 8 | a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions
 9 | provided by the bot. You will only need to do this once across all repos using our CLA.
10 | 
11 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
12 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
13 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
14 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation. All rights reserved.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # TF Graph Neural Network Samples
  2 | This repository is the code release corresponding to an article introducing
  3 | graph neural networks (GNNs) with feature-wise linear modulation ([Brockschmidt, 2019](#brockschmidt-2019)).
  4 | In the paper, a number of GNN architectures are discussed:
  5 | * Gated Graph Neural Networks (GGNN) ([Li et al., 2015](#li-et-al-2015)).
  6 | * Relational Graph Convolutional Networks (RGCN) ([Schlichtkrull et al., 2016](#schlichtkrull-et-al-2016)).
  7 | * Relational Graph Attention Networks (RGAT) - a generalisation of Graph Attention Networks ([Veličković et al., 2018](#veličković-et-al-2018)) to several edge types.
  8 | * Relational Graph Isomorphism Networks (RGIN) - a generalisation of Graph Isomorphism Networks ([Xu et al., 2019](#xu-et-al-2019)) to several edge types.
  9 | * Graph Neural Network with Edge MLPs (GNN-Edge-MLP) - a variant of RGCN in which messages on edges are computed using full MLPs, not just a single layer.
 10 | * Relational Graph Dynamic Convolution Networks (RGDCN) - a new variant of RGCN in which the weights of convolutional layers are dynamically computed.
 11 | * Graph Neural Networks with Feature-wise Linear Modulation (GNN-FiLM) - a new extension of RGCN with FiLM layers.
 12 | 
 13 | The results presented in the paper are based on the implementations of models
 14 | and tasks provided in this repository.
 15 | 
 16 | This code was tested in Python 3.6 with TensorFlow 1.13.1.
 17 | To install required packages, run `pip install -r requirements.txt`.
 18 | 
 19 | The code is maintained by the [Deep Program Understanding](https://www.microsoft.com/en-us/research/project/program/) project at Microsoft Research, Cambridge, UK. We are [hiring](https://www.microsoft.com/en-us/research/theme/ada/#!opportunities).
 20 | 
 21 | # Running
 22 | To train a model, it suffices to run `python train.py MODEL_TYPE TASK`, for
 23 | example as follows:
 24 | ```
 25 | $ python train.py RGCN PPI
 26 | Loading task/model-specific default parameters from tasks/default_hypers/PPI_RGCN.json.
 27 |  Loading PPI train data from data/ppi.
 28 |  Loading PPI valid data from data/ppi.
 29 | Model has 699257 parameters.
 30 | Run PPI_RGCN_2019-06-26-14-33-58_17208 starting.
 31 |  Using the following task params: {"add_self_loop_edges": true, "tie_fwd_bkwd_edges": false, "out_layer_dropout_keep_prob": 1.0}
 32 |  Using the following model params: {"max_nodes_in_batch": 12500, "graph_num_layers": 3, "graph_num_timesteps_per_layer": 1, "graph_layer_input_dropout_keep_prob": 1.0, "graph_dense_between_every_num_gnn_layers": 10000, "graph_model_activation_function": "tanh", "graph_residual_connection_every_num_layers": 10000, "graph_inter_layer_norm": false, "max_epochs": 10000, "patience": 25, "optimizer": "Adam", "learning_rate": 0.001, "learning_rate_decay": 0.98, "momentum": 0.85, "clamp_gradient_norm": 1.0, "random_seed": 0, "hidden_size": 256, "graph_activation_function": "ReLU", "message_aggregation_function": "sum"}
 33 | == Epoch 1
 34 |  Train: loss: 77.42656 || Avg MicroF1: 0.395 || graphs/sec: 15.09 | nodes/sec: 33879 | edges/sec: 1952084
 35 |  Valid: loss: 68.86771 || Avg MicroF1: 0.370 || graphs/sec: 14.85 | nodes/sec: 48360 | edges/sec: 3098674
 36 |   (Best epoch so far, target metric decreased to 224302.10938 from inf. Saving to 'trained_models/PPI_RGCN_2019-06-26-14-33-58_17208_best_model.pickle')
 37 | [...]
 38 | ```
 39 | An overview of options can be obtained by `python train.py --help`.
 40 | 
 41 | Note that task and model parameters can be overriden (note that every training
 42 | run prints their current settings) using the `--task-param-overrides` and
 43 | `--model-param-overrides` command line options, which take dictionaries in JSON
 44 | form.
 45 | So for example, to choose a different number of layers, 
 46 | `--model-param-overrides '{"graph_num_layers": 4}'` can be used.
 47 | 
 48 | Results of the training run will be saved as well in a directory (by default
 49 | `trained_models/`, but this can be set using the `--result_dir` flag).
 50 | Concretely, the following three files are created:
 51 | * `${RESULT_DIR}/${RUN_NAME}.log`: A log of the training run.
 52 | * `${RESULT_DIR}/${RUN_NAME}_best_model.pickle`: A dump of the model weights 
 53 |   achieving the best results on the validation set.
 54 | 
 55 | To evaluate a model, use the `test.py` script as follows on one of the
 56 | model dumps generated by `train.py`:
 57 | ```
 58 | $ python test.py trained_models/PPI_RGCN_2019-06-26-14-33-58_17208_best_model.pickle
 59 | Loading model from file trained_models/PPI_RGCN_2019-06-26-14-33-58_17208_best_model.pickle.
 60 | Model has 699257 parameters.
 61 | == Running Test on data/ppi ==
 62 |  Loading PPI test data from data/ppi.
 63 | Loss 11.13117 on 2 graphs
 64 | Metrics: Avg MicroF1: 0.954
 65 | ```
 66 | `python test.py --help` provides more options, for example to specify a different
 67 | test data set.
 68 | A run on the default test set can be be automatically triggered after training
 69 | using the `--run-test` option to `train.py` as well.
 70 | 
 71 | # Experimental Results
 72 | Experimental results reported in the accompanying article can be reproduced
 73 | using the code in the repository.
 74 | More precisely, `python run_ppi_benchs.py ppi_eval_results/` should
 75 | produce an ASCII rendering of Table 1 - note, however, that this will take
 76 | quite a while.
 77 | Similarly, `python run_qm9_benchs.py qm9_eval_results/` should
 78 | produce an ASCII rendering of Table 2 - this will take a very long time
 79 | (approx. 13 * 4 * 45 * 5 minutes, i.e., around 8 days), and
 80 | in practice, we used a different version of this parallelising the runs
 81 | across many hosts using Microsoft-internal infrastructure.
 82 | 
 83 | Note that the training script loads fitting default hyperparameters for
 84 | model/task combinations from `tasks/default_hypers/{TASK}_{MODEL}.json`.
 85 | 
 86 | # Models
 87 | Currently, five model types are implemented:
 88 | * `GGNN`: Gated Graph Neural Networks ([Li et al., 2015](#li-et-al-2015)).
 89 | * `RGCN`: Relational Graph Convolutional Networks ([Schlichtkrull et al., 2017](#schlichtkrull-et-al-2017)).
 90 | * `RGAT`: Relational Graph Attention Networks ([Veličković et al., 2018](#veličković-et-al-2018)).
 91 | * `RGIN`: Relational Graph Isomorphism Networks ([Xu et al., 2019](#xu-et-al-2019)).
 92 | * `GNN-Edge-MLP`: Graph Neural Network with Edge MLPs - a variant of RGCN in which messages on edges are computed using full MLPs, not just a single layer applied to the source state.
 93 | * `RGDCN`: Relational Graph Dynamic Convolution Networks - a new variant of RGCN in which the weights of convolutional layers are dynamically computed.
 94 | * `GNN-FiLM`: Graph Neural Networks with Feature-wise Linear Modulation - a new extension of RGCN with FiLM layers.
 95 | 
 96 | # Tasks
 97 | New tasks can be added by implementing the `tasks.sparse_graph_task` interface.
 98 | This provides hooks to load data, create a task-specific output layers and
 99 | compute task-specific metrics.
100 | The documentation in `tasks/sparse_graph_task.py` provides a detailed overview
101 | of the interface. Currently, four tasks are implemented, exposing different
102 | aspects.
103 | 
104 | ## Citation networks
105 | The `CitationNetwork` task (implemented in `tasks/citation_network_task.py`)
106 | handles the Cora, Pubmed and Citeseer citation network datasets often used
107 | in evaluation of GNNs ([Sen et al., 2008](#sen-et-al-2008)).
108 | The implementation illustrates how to handle the case of transductive graph
109 | learning on a single graph instance by masking out nodes that shouldn't be
110 | considered.
111 | You can call this by running `python train.py MODEL Cora` (or `Pubmed` or
112 | `Citeseer` instead of `Cora`).
113 | 
114 | To run experiments on this task, you need to download the data from 
115 | https://github.com/kimiyoung/planetoid/raw/master/data. By default, the
116 | code looks for this data in `data/citation-networks`, but this can be changed
117 | by using `--data-path "SOME/OTHER/DIR"`.
118 | 
119 | ## PPI
120 | The `PPI` task (implemented in `tasks/ppi_task.py`) handles the protein-protein
121 | interaction task first described by [Zitnik & Leskovec, 2017](#zitnik-leskovec-2017).
122 | The implementation illustrates how to handle the case of inductive graph
123 | learning with node-level predictions.
124 | You can call this by running `python train.py MODEL PPI`.
125 | 
126 | To run experiments on this task, you need to download the [data](https://data.dgl.ai/dataset/ppi.zip) 
127 | from. 
128 | 
129 | ```
130 | curl -LO https://data.dgl.ai/dataset/ppi.zip
131 | unzip ppi.zip -d <path-to-directory>
132 | ```
133 | 
134 | By default, the code looks for this data in `data/ppi`, but this can be changed
135 | by using `--data-path "SOME/OTHER/DIR"`.
136 | 
137 | ### Current Results
138 | Running `python run_ppi_benchs.py ppi_results/` should yield results looking
139 | like this (on an NVidia V100):
140 | 
141 | | Model         | Avg. MicroF1      | Avg. Time  |
142 | |---------------|-------------------|------------|
143 | | GGNN          | 0.990 (+/- 0.001) |      432.6 |
144 | | RGCN          | 0.989 (+/- 0.000) |      759.0 |
145 | | RGAT          | 0.989 (+/- 0.001) |      782.3 |
146 | | RGIN          | 0.991 (+/- 0.001) |      704.8 |
147 | | GNN-Edge-MLP0 | 0.992 (+/- 0.000) |      556.9 |
148 | | GNN-Edge-MLP1 | 0.992 (+/- 0.001) |      479.2 |
149 | | GNN_FiLM      | 0.992 (+/- 0.000) |      308.1 |
150 | 
151 | ## QM9
152 | The `QM9` task (implemented in `tasks/qm9_task.py`) handles the quantum chemistry
153 | prediction tasks first described by [Ramakrishnan et al., 2014](#ramakrishnan-et-al-2014)
154 | The implementation illustrates how to handle the case of inductive graph
155 | learning with graph-level predictions.
156 | You can call this by running `python train.py MODEL QM9`.
157 | 
158 | The data for this task is included in the repository in `data/qm9`, which just
159 | contains a JSON representation of a pre-processed version of the dataset originally
160 | released by [Ramakrishnan et al., 2014](#ramakrishnan-et-al-2014).
161 | 
162 | The results shown in Table 2 of the [technical report](#brockschmidt-2019) can
163 | be reproduced by running `python run_qm9_benchs.py qm9_results/`, but this will
164 | take a very long time (several days) and should best be distributed onto different
165 | compute nodes.
166 | 
167 | ## VarMisuse
168 | The `VarMisuse` task (implemented in `tasks/varmisuse_task.py`) handles the
169 | variable misuse task first described by [Allamanis et al., 2018](#allamanis-et-al-2018).
170 | Note that we do not fully re-implement the original model here, and so
171 | results are not (quite) comparable with the results reported in the original
172 | paper.
173 | The implementation illustrates how to handle the case of inductive graph
174 | learning with predictions based on node selection.
175 | You can call this by running `python train.py MODEL VarMisuse`.
176 | 
177 | To run experiments on this task, you need to download the dataset from
178 | https://aka.ms/iclr18-prog-graphs-dataset.
179 | To make this usable for the data loading code in this repository, you then need
180 | to edit the top lines of the script `reorg_varmisuse_data.sh` (from this repo)
181 | to point to the downloaded zip file and the directory you want to extract the
182 | data to, and then run it. Note that this will take a relatively long time.
183 | By default, the code looks for this data in `data/varmisuse/`, but this can be 
184 | changed by using `--data-path "SOME/OTHER/DIR"`.
185 | 
186 | ### Current Results
187 | Running `python run_varmisuse_benchs.py varmisuse_results/` should yield results
188 | looking like this (on a single NVidia V100, this will take about 2 weeks):
189 | 
190 | | Model          | Valid Acc         | Test Acc          | TestOnly Acc      |
191 | |----------------|-------------------|-------------------|-------------------|
192 | | GGNN           | 0.821 (+/- 0.009) | 0.857 (+/- 0.005) | 0.793 (+/- 0.012) |
193 | | RGCN           | 0.857 (+/- 0.016) | 0.872 (+/- 0.015) | 0.814 (+/- 0.023) |
194 | | RGAT           | 0.842 (+/- 0.010) | 0.869 (+/- 0.007) | 0.812 (+/- 0.009) |
195 | | RGIN           | 0.842 (+/- 0.010) | 0.871 (+/- 0.001) | 0.811 (+/- 0.009) |
196 | | GNN-Edge-MLP0  | 0.834 (+/- 0.003) | 0.865 (+/- 0.002) | 0.805 (+/- 0.014) |
197 | | GNN-Edge-MLP1  | 0.844 (+/- 0.004) | 0.869 (+/- 0.003) | 0.814 (+/- 0.007) |
198 | | GNN_FiLM       | 0.846 (+/- 0.006) | 0.870 (+/- 0.002) | 0.813 (+/- 0.009) |
199 | 
200 | 
201 | # References
202 | 
203 | #### Allamanis et al., 2018
204 | Miltiadis Allamanis, Marc Brockschmidt, and Mahmoud Khademi. Learning to
205 | Represent Programs with Graphs. In International Conference on Learning
206 | Representations (ICLR), 2018. (https://arxiv.org/pdf/1711.00740.pdf)
207 | 
208 | #### Brockschmidt, 2019
209 | Marc Brockschmidt. GNN-FiLM: Graph Neural Networks with Feature-wise Linear
210 | Modulation. (https://arxiv.org/abs/1906.12192)
211 | 
212 | #### Li et al., 2015
213 | Yujia Li, Daniel Tarlow, Marc Brockschmidt, and Richard Zemel. Gated Graph
214 | Sequence Neural Networks. In International Conference on Learning
215 | Representations (ICLR), 2016. (https://arxiv.org/pdf/1511.05493.pdf)
216 | 
217 | #### Ramakrishnan et al., 2014
218 | Raghunathan Ramakrishnan, Pavlo O. Dral, Matthias Rupp, and O. Anatole
219 | Von Lilienfeld. Quantum Chemistry Structures and Properties of 134 Kilo
220 | Molecules. Scientific Data, 1, 2014.
221 | (https://www.nature.com/articles/sdata201422/)
222 | 
223 | #### Schlichtkrull et al., 2017
224 | Michael Schlichtkrull, Thomas N. Kipf, Peter Bloem, Rianne van den Berg,
225 | Ivan Titov, and Max Welling. Modeling Relational Data with Graph
226 | Convolutional Networks. In Extended Semantic Web Conference (ESWC), 2018.
227 | (https://arxiv.org/pdf/1703.06103.pdf)
228 | 
229 | #### Sen et al., 2008
230 | Prithviraj Sen, Galileo Namata, Mustafa Bilgic, Lise Getoor, Brian Galligher,
231 | and Tina Eliassi-Rad. Collective Classification in Network Data. AI magazine,
232 | 29, 2008. (https://www.aaai.org/ojs/index.php/aimagazine/article/view/2157)
233 | 
234 | #### Veličković et al. 2018
235 | Petar Veličković, Guillem Cucurull, Arantxa Casanova, Adriana Romero, Pietro
236 | Liò, and Yoshua Bengio. Graph Attention Networks. In International Conference
237 | on Learning Representations (ICLR), 2018. (https://arxiv.org/pdf/1710.10903.pdf)
238 | 
239 | #### Xu et al. 2019
240 | Keyulu Xu, Weihua Hu, Jure Leskovec, and Stefanie Jegelka. How Powerful are
241 | Graph Neural Networks? In International Conference on Learning Representations
242 | (ICLR), 2019. (https://arxiv.org/pdf/1810.00826.pdf)
243 | 
244 | #### Zitnik & Leskovec, 2017
245 | Marinka Zitnik and Jure Leskovec. Predicting Multicellular Function Through
246 | Multi-layer Tissue Networks. Bioinformatics, 33, 2017.
247 | (https://arxiv.org/abs/1707.04638)
248 | 
249 | # Contributing
250 | 
251 | This project welcomes contributions and suggestions.  Most contributions
252 | require you to agree to a Contributor License Agreement (CLA) declaring 
253 | that you have the right to, and actually do, grant us the rights to use
254 | your contribution. For details, visit https://cla.microsoft.com.
255 | 
256 | When you submit a pull request, a CLA-bot will automatically determine 
257 | whether you need to provide a CLA and decorate the PR appropriately (e.g.,
258 | label, comment). Simply follow the instructions provided by the bot.
259 | You will only need to do this once across all repos using our CLA.
260 | 
261 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
262 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
263 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
264 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.7 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->
42 | 


--------------------------------------------------------------------------------
/data/qm9/LICENSE:
--------------------------------------------------------------------------------
 1 | The data in this directory is a lightly processed and split version of the
 2 | dataset originally released in the following article:
 3 | 
 4 |  Raghunathan Ramakrishnan, Pavlo O. Dral, Matthias Rupp, and O. Anatole Von Lilienfeld.
 5 |  Quantum chemistry structures and properties of 134 kilo molecules.
 6 |  Scientific Data, 1, 2014.
 7 |  https://www.nature.com/articles/sdata201422/
 8 | 
 9 | The data is released under the CC0 license:
10 | 
11 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER.
12 | 
13 | Statement of Purpose
14 | 
15 | The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work").
16 | 
17 | Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others.
18 | 
19 | For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights.
20 | 
21 | 1. Copyright and Related Rights. A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following:
22 | 
23 | the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work;
24 | moral rights retained by the original author(s) and/or performer(s);
25 | publicity and privacy rights pertaining to a person's image or likeness depicted in a Work;
26 | rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below;
27 | rights protecting the extraction, dissemination, use and reuse of data in a Work;
28 | database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and
29 | other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof.
30 | 
31 | 2. Waiver. To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose.
32 | 
33 | 3. Public License Fallback. Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose.
34 | 
35 | 4. Limitations and Disclaimers.
36 | 
37 | No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document.
38 | Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law.
39 | Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work.
40 | Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work.
41 | 


--------------------------------------------------------------------------------
/data/qm9/test.jsonl.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/tf-gnn-samples/ff14b96ad97be3dc0c6829d2ad54a63e10779a94/data/qm9/test.jsonl.gz


--------------------------------------------------------------------------------
/data/qm9/train.jsonl.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/tf-gnn-samples/ff14b96ad97be3dc0c6829d2ad54a63e10779a94/data/qm9/train.jsonl.gz


--------------------------------------------------------------------------------
/data/qm9/valid.jsonl.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/tf-gnn-samples/ff14b96ad97be3dc0c6829d2ad54a63e10779a94/data/qm9/valid.jsonl.gz


--------------------------------------------------------------------------------
/gnns/__init__.py:
--------------------------------------------------------------------------------
1 | from .ggnn import sparse_ggnn_layer
2 | from .gnn_edge_mlp import sparse_gnn_edge_mlp_layer
3 | from .gnn_film import sparse_gnn_film_layer
4 | from .rgat import sparse_rgat_layer
5 | from .rgcn import sparse_rgcn_layer
6 | from .rgdcn import sparse_rgdcn_layer
7 | from .rgin import sparse_rgin_layer
8 | 


--------------------------------------------------------------------------------
/gnns/ggnn.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | from utils import get_gated_unit, get_aggregation_function
 6 | 
 7 | 
 8 | def sparse_ggnn_layer(node_embeddings: tf.Tensor,
 9 |                       adjacency_lists: List[tf.Tensor],
10 |                       state_dim: Optional[int],
11 |                       num_timesteps: int = 1,
12 |                       gated_unit_type: str = "gru",
13 |                       activation_function: str = "tanh",
14 |                       message_aggregation_function: str = "sum"
15 |                       ) -> tf.Tensor:
16 |     """
17 |     Compute new graph states by neural message passing and gated units on the nodes.
18 |     For this, we assume existing node states h^t_v and a list of per-edge-type adjacency
19 |     matrices A_\ell.
20 | 
21 |     We compute new states as follows:
22 |         h^{t+1}_v := Cell(h^t_v, \sum_\ell
23 |                                  \sum_{(u, v) \in A_\ell}
24 |                                      W_\ell * h^t_u)
25 |     The learnable parameters of this are the recurrent Cell and the W_\ell \in R^{D,D}.
26 | 
27 |     We use the following abbreviations in shape descriptions:
28 |     * V: number of nodes
29 |     * D: state dimension
30 |     * L: number of different edge types
31 |     * E: number of edges of a given edge type
32 | 
33 |     Arguments:
34 |         node_embeddings: float32 tensor of shape [V, D], the original representation of
35 |             each node in the graph.
36 |         adjacency_lists: List of L adjacency lists, represented as int32 tensors of shape
37 |             [E, 2]. Concretely, adjacency_lists[l][k,:] == [v, u] means that the k-th edge
38 |             of type l connects node v to node u.
39 |         state_dim: Optional size of output dimension of the GNN layer. If not set, defaults
40 |             to D, the dimensionality of the input. If different from the input dimension,
41 |             parameter num_timesteps has to be 1.
42 |         num_timesteps: Number of repeated applications of this message passing layer.
43 |         gated_unit_type: Type of the recurrent unit used (one of RNN, GRU and LSTM).
44 |         activation_function: Type of activation function used.
45 |         message_aggregation_function: Type of aggregation function used for messages.
46 | 
47 |     Returns:
48 |         float32 tensor of shape [V, state_dim]
49 |     """
50 |     num_nodes = tf.shape(node_embeddings, out_type=tf.int32)[0]
51 |     if state_dim is None:
52 |         state_dim = tf.shape(node_embeddings, out_type=tf.int32)[1]
53 | 
54 |     # === Prepare things we need across all timesteps:
55 |     message_aggregation_fn = get_aggregation_function(message_aggregation_function)
56 |     gated_cell = get_gated_unit(state_dim, gated_unit_type, activation_function)
57 |     edge_type_to_message_transformation_layers = []  # Layers to compute the message from a source state
58 |     edge_type_to_message_targets = []  # List of tensors of message targets
59 |     for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists):
60 |         edge_type_to_message_transformation_layers.append(
61 |             tf.keras.layers.Dense(units=state_dim,
62 |                                   use_bias=False,
63 |                                   activation=None,
64 |                                   name="Edge_%i_Weight" % edge_type_idx))
65 |         edge_type_to_message_targets.append(adjacency_list_for_edge_type[:, 1])
66 | 
67 |     # Let M be the number of messages (sum of all E):
68 |     message_targets = tf.concat(edge_type_to_message_targets, axis=0)  # Shape [M]
69 | 
70 |     cur_node_states = node_embeddings
71 |     for _ in range(num_timesteps):
72 |         messages = []  # list of tensors of messages of shape [E, D]
73 |         message_source_states = []  # list of tensors of edge source states of shape [E, D]
74 | 
75 |         # Collect incoming messages per edge type
76 |         for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists):
77 |             edge_sources = adjacency_list_for_edge_type[:, 0]
78 |             edge_source_states = tf.nn.embedding_lookup(params=cur_node_states,
79 |                                                         ids=edge_sources)  # Shape [E, D]
80 |             all_messages_for_edge_type = \
81 |                 edge_type_to_message_transformation_layers[edge_type_idx](edge_source_states)  # Shape [E,D]
82 |             messages.append(all_messages_for_edge_type)
83 |             message_source_states.append(edge_source_states)
84 | 
85 |         messages = tf.concat(messages, axis=0)  # Shape [M, D]
86 |         aggregated_messages = \
87 |             message_aggregation_fn(data=messages,
88 |                                    segment_ids=message_targets,
89 |                                    num_segments=num_nodes)  # Shape [V, D]
90 | 
91 |         # pass updated vertex features into RNN cell
92 |         new_node_states = gated_cell(aggregated_messages, [cur_node_states])[0]  # Shape [V, D]
93 |         cur_node_states = new_node_states
94 | 
95 |     return cur_node_states
96 | 


--------------------------------------------------------------------------------
/gnns/gnn_edge_mlp.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Optional
  2 | import tensorflow as tf
  3 | 
  4 | from utils import get_activation, get_aggregation_function, SMALL_NUMBER, MLP
  5 | 
  6 | 
  7 | def sparse_gnn_edge_mlp_layer(
  8 |         node_embeddings: tf.Tensor,
  9 |         adjacency_lists: List[tf.Tensor],
 10 |         type_to_num_incoming_edges: tf.Tensor,
 11 |         state_dim: Optional[int],
 12 |         num_timesteps: int = 1,
 13 |         activation_function: Optional[str] = "ReLU",
 14 |         message_aggregation_function: str = "sum",
 15 |         normalize_by_num_incoming: bool = False,
 16 |         use_target_state_as_input: bool = True,
 17 |         num_edge_hidden_layers: int = 1
 18 |         ) -> tf.Tensor:
 19 |     """
 20 |     Compute new graph states by neural message passing using an edge MLP.
 21 |     For this, we assume existing node states h^t_v and a list of per-edge-type adjacency
 22 |     matrices A_\ell.
 23 | 
 24 |     We compute new states as follows:
 25 |         h^{t+1}_v := \sum_\ell
 26 |                      \sum_{(u, v) \in A_\ell}
 27 |                         \sigma(1/c_{v,\ell} * MLP(h^t_u || h^t_v))
 28 |         c_{\v,\ell} is usually 1 (but could also be the number of incoming edges).
 29 |     The learnable parameters of this are the W_\ell, F_{\ell,\alpha}, F_{\ell,\beta} \in R^{D, D}.
 30 | 
 31 |     We use the following abbreviations in shape descriptions:
 32 |     * V: number of nodes
 33 |     * D: state dimension
 34 |     * L: number of different edge types
 35 |     * E: number of edges of a given edge type
 36 | 
 37 |     Arguments:
 38 |         node_embeddings: float32 tensor of shape [V, D], the original representation of
 39 |             each node in the graph.
 40 |         adjacency_lists: List of L adjacency lists, represented as int32 tensors of shape
 41 |             [E, 2]. Concretely, adjacency_lists[l][k,:] == [v, u] means that the k-th edge
 42 |             of type l connects node v to node u.
 43 |         type_to_num_incoming_edges: float32 tensor of shape [L, V] representing the number
 44 |             of incoming edges of a given type. Concretely, type_to_num_incoming_edges[l, v]
 45 |             is the number of edge of type l connecting to node v.
 46 |         state_dim: Optional size of output dimension of the GNN layer. If not set, defaults
 47 |             to D, the dimensionality of the input. If different from the input dimension,
 48 |             parameter num_timesteps has to be 1.
 49 |         num_timesteps: Number of repeated applications of this message passing layer.
 50 |         activation_function: Type of activation function used.
 51 |         message_aggregation_function: Type of aggregation function used for messages.
 52 |         normalize_by_num_incoming: Flag indicating if messages should be scaled by 1/(number
 53 |             of incoming edges).
 54 |         use_target_state_as_input: Flag indicating if the edge MLP should consume both
 55 |             source and target state (True) or only source state (False).
 56 |         num_edge_hidden_layers: Number of hidden layers of the edge MLP.
 57 |         message_weights_dropout_ratio: Dropout ratio applied to the weights used
 58 |             to compute message passing functions.
 59 | 
 60 |     Returns:
 61 |         float32 tensor of shape [V, state_dim]
 62 |     """
 63 |     num_nodes = tf.shape(node_embeddings, out_type=tf.int32)[0]
 64 |     if state_dim is None:
 65 |         state_dim = tf.shape(node_embeddings, out_type=tf.int32)[1]
 66 | 
 67 |     # === Prepare things we need across all timesteps:
 68 |     activation_fn = get_activation(activation_function)
 69 |     message_aggregation_fn = get_aggregation_function(message_aggregation_function)
 70 |     edge_type_to_edge_mlp = []  # MLPs to compute the edge messages
 71 |     edge_type_to_message_targets = []  # List of tensors of message targets
 72 |     for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists):
 73 |         edge_type_to_edge_mlp.append(
 74 |             MLP(out_size=state_dim,
 75 |                 hidden_layers=num_edge_hidden_layers,
 76 |                 activation_fun=tf.nn.elu,
 77 |                 name="Edge_%i_MLP" % edge_type_idx))
 78 |         edge_type_to_message_targets.append(adjacency_list_for_edge_type[:, 1])
 79 | 
 80 |     # Let M be the number of messages (sum of all E):
 81 |     message_targets = tf.concat(edge_type_to_message_targets, axis=0)  # Shape [M]
 82 | 
 83 |     cur_node_states = node_embeddings
 84 |     for _ in range(num_timesteps):
 85 |         messages_per_type = []  # list of tensors of messages of shape [E, D]
 86 |         # Collect incoming messages per edge type
 87 |         for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists):
 88 |             edge_sources = adjacency_list_for_edge_type[:, 0]
 89 |             edge_targets = adjacency_list_for_edge_type[:, 1]
 90 |             edge_source_states = \
 91 |                 tf.nn.embedding_lookup(params=cur_node_states,
 92 |                                        ids=edge_sources)  # Shape [E, D]
 93 | 
 94 |             edge_mlp_inputs = edge_source_states
 95 |             if use_target_state_as_input:
 96 |                 edge_target_states = \
 97 |                     tf.nn.embedding_lookup(params=cur_node_states,
 98 |                                            ids=edge_targets)  # Shape [E, D]
 99 |                 edge_mlp_inputs = tf.concat([edge_source_states, edge_target_states],
100 |                                             axis=1)  # Shape [E, 2*D]
101 | 
102 |             messages = edge_type_to_edge_mlp[edge_type_idx](edge_mlp_inputs)  # Shape [E, D]
103 | 
104 |             if normalize_by_num_incoming:
105 |                 per_message_num_incoming_edges = \
106 |                     tf.nn.embedding_lookup(params=type_to_num_incoming_edges[edge_type_idx, :],
107 |                                            ids=edge_targets)  # Shape [E, H]
108 |                 messages = tf.expand_dims(1.0 / (per_message_num_incoming_edges + SMALL_NUMBER), axis=-1) * messages
109 |             messages_per_type.append(messages)
110 | 
111 |         all_messages = tf.concat(messages_per_type, axis=0)  # Shape [M, D]
112 |         all_messages = activation_fn(all_messages)  # Shape [M, D]  (Apply nonlinearity to Edge-MLP outputs as well)
113 |         aggregated_messages = \
114 |             message_aggregation_fn(data=all_messages,
115 |                                    segment_ids=message_targets,
116 |                                    num_segments=num_nodes)  # Shape [V, D]
117 | 
118 |         new_node_states = aggregated_messages
119 |         new_node_states = tf.contrib.layers.layer_norm(new_node_states)
120 |         cur_node_states = new_node_states
121 | 
122 |     return cur_node_states
123 | 


--------------------------------------------------------------------------------
/gnns/gnn_film.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Optional
  2 | import tensorflow as tf
  3 | 
  4 | 
  5 | from utils import get_activation, get_aggregation_function, SMALL_NUMBER
  6 | 
  7 | 
  8 | def sparse_gnn_film_layer(node_embeddings: tf.Tensor,
  9 |                           adjacency_lists: List[tf.Tensor],
 10 |                           type_to_num_incoming_edges: tf.Tensor,
 11 |                           state_dim: Optional[int],
 12 |                           num_timesteps: int = 1,
 13 |                           activation_function: Optional[str] = "ReLU",
 14 |                           message_aggregation_function: str = "sum",
 15 |                           normalize_by_num_incoming: bool = False,
 16 |                           ) -> tf.Tensor:
 17 |     """
 18 |     Compute new graph states by neural message passing modulated by the target state.
 19 |     For this, we assume existing node states h^t_v and a list of per-edge-type adjacency
 20 |     matrices A_\ell.
 21 | 
 22 |     We compute new states as follows:
 23 |         h^{t+1}_v := \sum_\ell
 24 |                      \sum_{(u, v) \in A_\ell}
 25 |                         \sigma(1/c_{v,\ell} * \alpha_{\ell,v} * (W_\ell * h^t_u) + \beta_{\ell,v})
 26 |         \alpha_{\ell,v} := F_{\ell,\alpha} * h^t_v
 27 |         \beta_{\ell,v} := F_{\ell,\beta} * h^t_v
 28 |         c_{\v,\ell} is usually 1 (but could also be the number of incoming edges).
 29 |     The learnable parameters of this are the W_\ell, F_{\ell,\alpha}, F_{\ell,\beta} \in R^{D, D}.
 30 | 
 31 |     We use the following abbreviations in shape descriptions:
 32 |     * V: number of nodes
 33 |     * D: state dimension
 34 |     * L: number of different edge types
 35 |     * E: number of edges of a given edge type
 36 | 
 37 |     Arguments:
 38 |         node_embeddings: float32 tensor of shape [V, D], the original representation of
 39 |             each node in the graph.
 40 |         adjacency_lists: List of L adjacency lists, represented as int32 tensors of shape
 41 |             [E, 2]. Concretely, adjacency_lists[l][k,:] == [v, u] means that the k-th edge
 42 |             of type l connects node v to node u.
 43 |         type_to_num_incoming_edges: float32 tensor of shape [L, V] representing the number
 44 |             of incoming edges of a given type. Concretely, type_to_num_incoming_edges[l, v]
 45 |             is the number of edge of type l connecting to node v.
 46 |         state_dim: Optional size of output dimension of the GNN layer. If not set, defaults
 47 |             to D, the dimensionality of the input. If different from the input dimension,
 48 |             parameter num_timesteps has to be 1.
 49 |         num_timesteps: Number of repeated applications of this message passing layer.
 50 |         activation_function: Type of activation function used.
 51 |         message_aggregation_function: Type of aggregation function used for messages.
 52 |         normalize_by_num_incoming: Flag indicating if messages should be scaled by 1/(number
 53 |             of incoming edges).
 54 | 
 55 |     Returns:
 56 |         float32 tensor of shape [V, state_dim]
 57 |     """
 58 |     num_nodes = tf.shape(node_embeddings, out_type=tf.int32)[0]
 59 |     if state_dim is None:
 60 |         state_dim = tf.shape(node_embeddings, out_type=tf.int32)[1]
 61 | 
 62 |     # === Prepare things we need across all timesteps:
 63 |     activation_fn = get_activation(activation_function)
 64 |     message_aggregation_fn = get_aggregation_function(message_aggregation_function)
 65 |     edge_type_to_message_transformation_layers = []  # Layers to compute the message from a source state
 66 |     edge_type_to_film_computation_layers = []  # Layers to compute the \beta/\gamma weights for FiLM
 67 |     edge_type_to_message_targets = []  # List of tensors of message targets
 68 |     for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists):
 69 |         edge_type_to_message_transformation_layers.append(
 70 |             tf.keras.layers.Dense(units=state_dim,
 71 |                                   use_bias=False,
 72 |                                   activation=None,  # Activation only after FiLM modulation
 73 |                                   name="Edge_%i_Weight" % edge_type_idx))
 74 |         edge_type_to_film_computation_layers.append(
 75 |             tf.keras.layers.Dense(units=2 * state_dim,  # Computes \gamma, \beta in one go
 76 |                                   use_bias=False,
 77 |                                   activation=None,
 78 |                                   name="Edge_%i_FiLM_Computations" % edge_type_idx))
 79 |         edge_type_to_message_targets.append(adjacency_list_for_edge_type[:, 1])
 80 | 
 81 |     # Let M be the number of messages (sum of all E):
 82 |     message_targets = tf.concat(edge_type_to_message_targets, axis=0)  # Shape [M]
 83 | 
 84 |     cur_node_states = node_embeddings
 85 |     for _ in range(num_timesteps):
 86 |         messages_per_type = []  # list of tensors of messages of shape [E, D]
 87 |         # Collect incoming messages per edge type
 88 |         for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists):
 89 |             edge_sources = adjacency_list_for_edge_type[:, 0]
 90 |             edge_targets = adjacency_list_for_edge_type[:, 1]
 91 |             edge_source_states = \
 92 |                 tf.nn.embedding_lookup(params=cur_node_states,
 93 |                                        ids=edge_sources)  # Shape [E, D]
 94 |             messages = edge_type_to_message_transformation_layers[edge_type_idx](edge_source_states)  # Shape [E, D]
 95 | 
 96 |             if normalize_by_num_incoming:
 97 |                 per_message_num_incoming_edges = \
 98 |                     tf.nn.embedding_lookup(params=type_to_num_incoming_edges[edge_type_idx, :],
 99 |                                            ids=edge_targets)  # Shape [E, H]
100 |                 messages = tf.expand_dims(1.0 / (per_message_num_incoming_edges + SMALL_NUMBER), axis=-1) * messages
101 | 
102 |             film_weights = edge_type_to_film_computation_layers[edge_type_idx](cur_node_states)
103 |             per_message_film_weights = \
104 |                 tf.nn.embedding_lookup(params=film_weights, ids=edge_targets)
105 |             per_message_film_gamma_weights = per_message_film_weights[:, :state_dim]  # Shape [E, D]
106 |             per_message_film_beta_weights = per_message_film_weights[:, state_dim:]  # Shape [E, D]
107 | 
108 |             modulated_messages = per_message_film_gamma_weights * messages + per_message_film_beta_weights
109 |             messages_per_type.append(modulated_messages)
110 | 
111 |         all_messages = tf.concat(messages_per_type, axis=0)  # Shape [M, D]
112 |         all_messages = activation_fn(all_messages)  # Shape [M, D]
113 |         aggregated_messages = \
114 |             message_aggregation_fn(data=all_messages,
115 |                                    segment_ids=message_targets,
116 |                                    num_segments=num_nodes)  # Shape [V, D]
117 |         new_node_states = aggregated_messages
118 |         # new_node_states = activation_fn(new_node_states)
119 | 
120 |         cur_node_states = tf.contrib.layers.layer_norm(new_node_states)
121 | 
122 |     return cur_node_states
123 | 


--------------------------------------------------------------------------------
/gnns/rgat.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Optional
  2 | 
  3 | import tensorflow as tf
  4 | from dpu_utils.tfutils import unsorted_segment_log_softmax
  5 | 
  6 | from utils import get_activation
  7 | 
  8 | 
  9 | def sparse_rgat_layer(node_embeddings: tf.Tensor,
 10 |                       adjacency_lists: List[tf.Tensor],
 11 |                       state_dim: Optional[int],
 12 |                       num_heads: int = 4,
 13 |                       num_timesteps: int = 1,
 14 |                       activation_function: Optional[str] = "tanh"
 15 |                       ) -> tf.Tensor:
 16 |     """
 17 |     Compute new graph states by neural message passing using attention. This generalises
 18 |     the original GAT model (Velickovic et al., https://arxiv.org/pdf/1710.10903.pdf)
 19 |     to multiple edge types by using different weights for different edge types.
 20 |     For this, we assume existing node states h^t_v and a list of per-edge-type adjacency
 21 |     matrices A_\ell.
 22 | 
 23 |     In the setting for a single attention head, we compute new states as follows:
 24 |         h^t_{v, \ell} := W_\ell h^t_v
 25 |         e_{u, \ell, v} := LeakyReLU(\alpha_\ell^T * concat(h^t_{u, \ell}, h^t_{v, \ell}))
 26 |         a_v := softmax_{\ell, u with (u, v) \in A_\ell}(e_{u, \ell, v})
 27 |         h^{t+1}_v := \sigma(\sum_{ell, (u, v) \in A_\ell}
 28 |                                 a_v_{u, \ell} * h^_{u, \ell})
 29 |     The learnable parameters of this are the W_\ell \in R^{D, D} and \alpha_\ell \in R^{2*D}.
 30 | 
 31 |     In practice, we use K attention heads, computing separate, partial new states h^{t+1}_{v,k}
 32 |     and compute h^{t+1}_v as the concatentation of the partial states.
 33 |     For this, we reduce the shape of W_\ell to R^{D, D/K} and \alpha_\ell to R^{2*D/K}.
 34 | 
 35 |     We use the following abbreviations in shape descriptions:
 36 |     * V: number of nodes
 37 |     * D: state dimension
 38 |     * K: number of attention heads
 39 |     * L: number of different edge types
 40 |     * E: number of edges of a given edge type
 41 | 
 42 |     Arguments:
 43 |         node_embeddings: float32 tensor of shape [V, D], the original representation of
 44 |             each node in the graph.
 45 |         adjacency_lists: List of L adjacency lists, represented as int32 tensors of shape
 46 |             [E, 2]. Concretely, adjacency_lists[l][k,:] == [v, u] means that the k-th edge
 47 |             of type l connects node v to node u.
 48 |         state_dim: Optional size of output dimension of the GNN layer. If not set, defaults
 49 |             to D, the dimensionality of the input. If different from the input dimension,
 50 |             parameter num_timesteps has to be 1.
 51 |         num_heads: Number of attention heads to use.
 52 |         num_timesteps: Number of repeated applications of this message passing layer.
 53 |         activation_function: Type of activation function used.
 54 | 
 55 |     Returns:
 56 |         float32 tensor of shape [V, state_dim]
 57 |     """
 58 |     num_nodes = tf.shape(node_embeddings, out_type=tf.int32)[0]
 59 |     if state_dim is None:
 60 |         state_dim = tf.shape(node_embeddings, out_type=tf.int32)[1]
 61 |     per_head_dim = state_dim // num_heads
 62 | 
 63 |     # === Prepare things we need across all timesteps:
 64 |     activation_fn = get_activation(activation_function)
 65 |     edge_type_to_state_transformation_layers = []  # Layers to compute the message from a source state
 66 |     edge_type_to_attention_parameters = []  # Parameters for the attention mechanism
 67 |     edge_type_to_message_targets = []  # List of tensors of message targets
 68 |     for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists):
 69 |         edge_type_to_state_transformation_layers.append(
 70 |             tf.keras.layers.Dense(units=state_dim,
 71 |                                   use_bias=False,
 72 |                                   activation=None,
 73 |                                   name="Edge_%i_Weight" % edge_type_idx))
 74 |         edge_type_to_attention_parameters.append(
 75 |             tf.get_variable(shape=(2 * state_dim),
 76 |                             name="Edge_%i_Attention_Parameters" % edge_type_idx))
 77 |         edge_type_to_message_targets.append(adjacency_list_for_edge_type[:, 1])
 78 | 
 79 |     # Let M be the number of messages (sum of all E):
 80 |     message_targets = tf.concat(edge_type_to_message_targets, axis=0)  # Shape [M]
 81 | 
 82 |     cur_node_states = node_embeddings
 83 |     for _ in range(num_timesteps):
 84 |         edge_type_to_per_head_messages = []  # type: List[tf.Tensor]  # list of lists of tensors of messages of shape [E, K, D/K]
 85 |         edge_type_to_per_head_attention_coefficients = []  # type: List[tf.Tensor]  # list of lists of tensors of shape [E, K]
 86 | 
 87 |         # Collect incoming messages per edge type
 88 |         # Note:
 89 |         #  We compute the state transformations (to make use of the wider, faster matrix multiplication),
 90 |         #  and then split into the individual attention heads via some reshapes:
 91 |         for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists):
 92 |             edge_sources = adjacency_list_for_edge_type[:, 0]
 93 |             edge_targets = adjacency_list_for_edge_type[:, 1]
 94 | 
 95 |             transformed_states = \
 96 |                 edge_type_to_state_transformation_layers[edge_type_idx](cur_node_states)  # Shape [V, D]
 97 | 
 98 |             edge_transformed_source_states = \
 99 |                 tf.nn.embedding_lookup(params=transformed_states, ids=edge_sources)  # Shape [E, D]
100 |             edge_transformed_target_states = \
101 |                 tf.nn.embedding_lookup(params=transformed_states, ids=edge_targets)  # Shape [E, D]
102 | 
103 |             per_edge_per_head_transformed_source_states = \
104 |                 tf.reshape(edge_transformed_source_states, shape=(-1, num_heads, per_head_dim))
105 | 
106 |             per_edge_per_head_transformed_states = \
107 |                 tf.concat([per_edge_per_head_transformed_source_states,
108 |                            tf.reshape(edge_transformed_target_states, shape=(-1, num_heads, per_head_dim))],
109 |                           axis=-1)  # Shape [E, K, 2*D/K]
110 |             per_head_attention_pars = tf.reshape(edge_type_to_attention_parameters[edge_type_idx],
111 |                                                  shape=(num_heads, 2 * per_head_dim))  # Shape [K, 2*D/K]
112 |             per_edge_per_head_attention_coefficients = \
113 |                 tf.nn.leaky_relu(tf.einsum('vki,ki->vk',
114 |                                            per_edge_per_head_transformed_states,
115 |                                            per_head_attention_pars))  # Shape [E, K]
116 | 
117 |             edge_type_to_per_head_messages.append(per_edge_per_head_transformed_source_states)
118 |             edge_type_to_per_head_attention_coefficients.append(per_edge_per_head_attention_coefficients)
119 | 
120 |         per_head_messages = tf.concat(edge_type_to_per_head_messages, axis=0)
121 |         per_head_attention_coefficients = tf.concat(edge_type_to_per_head_attention_coefficients, axis=0)
122 | 
123 |         head_to_aggregated_messages = []  # list of tensors of shape [V, D/K]
124 |         for head_idx in range(num_heads):
125 |             # Compute the softmax over all the attention coefficients for all messages going to this state:
126 |             attention_coefficients = tf.concat(per_head_attention_coefficients[:, head_idx], axis=0)  # Shape [M]
127 |             attention_values = \
128 |                 tf.exp(unsorted_segment_log_softmax(logits=attention_coefficients,
129 |                                                     segment_ids=message_targets,
130 |                                                     num_segments=num_nodes))  # Shape [M]
131 |             messages = per_head_messages[:, head_idx, :]  # Shape [M, D/K]
132 |             # Compute weighted sum per target node for this head:
133 |             head_to_aggregated_messages.append(
134 |                 tf.unsorted_segment_sum(data=tf.expand_dims(attention_values, -1) * messages,
135 |                                         segment_ids=message_targets,
136 |                                         num_segments=num_nodes))
137 | 
138 |         new_node_states = activation_fn(tf.concat(head_to_aggregated_messages, axis=-1))
139 |         cur_node_states = new_node_states
140 | 
141 |     return cur_node_states
142 | 


--------------------------------------------------------------------------------
/gnns/rgcn.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Optional
  2 | 
  3 | import tensorflow as tf
  4 | 
  5 | from utils import get_activation, get_aggregation_function, SMALL_NUMBER
  6 | 
  7 | 
  8 | def sparse_rgcn_layer(node_embeddings: tf.Tensor,
  9 |                       adjacency_lists: List[tf.Tensor],
 10 |                       type_to_num_incoming_edges: tf.Tensor,
 11 |                       state_dim: Optional[int],
 12 |                       num_timesteps: int = 1,
 13 |                       activation_function: Optional[str] = "tanh",
 14 |                       message_aggregation_function: str = "sum",
 15 |                       normalize_by_num_incoming: bool = True,
 16 |                       use_both_source_and_target: bool = False,
 17 |                       ) -> tf.Tensor:
 18 |     """
 19 |     Compute new graph states by neural message passing.
 20 |     This implements the R-GCN model (Schlichtkrull et al., https://arxiv.org/pdf/1703.06103.pdf)
 21 |     for the case of few relations / edge types, i.e., we do not use the dimensionality-reduction
 22 |     tricks from section 2.2 of that paper.
 23 |     For this, we assume existing node states h^t_v and a list of per-edge-type adjacency
 24 |     matrices A_\ell.
 25 | 
 26 |     We compute new states as follows:
 27 |         h^{t+1}_v := \sigma(\sum_\ell
 28 |                             \sum_{(u, v) \in A_\ell}
 29 |                                1/c_{v,\ell} * (W_\ell * h^t_u))
 30 |     c_{\v,\ell} is usually the number of \ell edges going into v.
 31 |     The learnable parameters of this are the W_\ell \in R^{D,D}.
 32 | 
 33 |     We use the following abbreviations in shape descriptions:
 34 |     * V: number of nodes
 35 |     * D: state dimension
 36 |     * L: number of different edge types
 37 |     * E: number of edges of a given edge type
 38 | 
 39 |     Arguments:
 40 |         node_embeddings: float32 tensor of shape [V, D], the original representation of
 41 |             each node in the graph.
 42 |         adjacency_lists: List of L adjacency lists, represented as int32 tensors of shape
 43 |             [E, 2]. Concretely, adjacency_lists[l][k,:] == [v, u] means that the k-th edge
 44 |             of type l connects node v to node u.
 45 |         type_to_num_incoming_edges: float32 tensor of shape [L, V] representing the number
 46 |             of incoming edges of a given type. Concretely, type_to_num_incoming_edges[l, v]
 47 |             is the number of edge of type l connecting to node v.
 48 |         state_dim: Optional size of output dimension of the GNN layer. If not set, defaults
 49 |             to D, the dimensionality of the input. If different from the input dimension,
 50 |             parameter num_timesteps has to be 1.
 51 |         num_timesteps: Number of repeated applications of this message passing layer.
 52 |         activation_function: Type of activation function used.
 53 |         message_aggregation_function: Type of aggregation function used for messages.
 54 |         normalize_by_num_incoming: Flag indicating if messages should be scaled by 1/(number
 55 |             of incoming edges).
 56 | 
 57 |     Returns:
 58 |         float32 tensor of shape [V, state_dim]
 59 |     """
 60 |     num_nodes = tf.shape(node_embeddings, out_type=tf.int32)[0]
 61 |     if state_dim is None:
 62 |         state_dim = tf.shape(node_embeddings, out_type=tf.int32)[1]
 63 | 
 64 |     # === Prepare things we need across all timesteps:
 65 |     activation_fn = get_activation(activation_function)
 66 |     message_aggregation_fn = get_aggregation_function(message_aggregation_function)
 67 |     edge_type_to_message_transformation_layers = []  # Layers to compute the message from a source state
 68 |     edge_type_to_message_targets = []  # List of tensors of message targets
 69 |     for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists):
 70 |         edge_type_to_message_transformation_layers.append(
 71 |             tf.keras.layers.Dense(units=state_dim,
 72 |                                   use_bias=False,
 73 |                                   activation=None,
 74 |                                   name="Edge_%i_Weight" % edge_type_idx))
 75 |         edge_type_to_message_targets.append(adjacency_list_for_edge_type[:, 1])
 76 | 
 77 |     # Let M be the number of messages (sum of all E):
 78 |     message_targets = tf.concat(edge_type_to_message_targets, axis=0)  # Shape [M]
 79 | 
 80 |     cur_node_states = node_embeddings
 81 |     for _ in range(num_timesteps):
 82 |         messages_per_type = []  # list of tensors of messages of shape [E, H]
 83 |         # Collect incoming messages per edge type
 84 |         for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists):
 85 |             edge_sources = adjacency_list_for_edge_type[:, 0]
 86 |             edge_targets = adjacency_list_for_edge_type[:, 1]
 87 |             edge_source_states = \
 88 |                 tf.nn.embedding_lookup(params=cur_node_states,
 89 |                                        ids=edge_sources)  # Shape [E, H]
 90 | 
 91 |             if use_both_source_and_target:
 92 |                 edge_target_states = \
 93 |                     tf.nn.embedding_lookup(params=cur_node_states,
 94 |                                            ids=edge_targets)  # Shape [E, H]
 95 |                 edge_state_pairs = tf.concat([edge_source_states, edge_target_states], axis=-1)  # Shape [E, 2H]
 96 |                 messages = edge_type_to_message_transformation_layers[edge_type_idx](edge_state_pairs)  # Shape [E, H]
 97 |             else:
 98 |                 messages = edge_type_to_message_transformation_layers[edge_type_idx](edge_source_states)  # Shape [E, H]
 99 | 
100 |             if normalize_by_num_incoming:
101 |                 num_incoming_to_node_per_message = \
102 |                     tf.nn.embedding_lookup(params=type_to_num_incoming_edges[edge_type_idx, :],
103 |                                            ids=edge_targets)  # Shape [E, H]
104 |                 messages = tf.expand_dims(1.0 / (num_incoming_to_node_per_message + SMALL_NUMBER), axis=-1) * messages
105 | 
106 |             messages_per_type.append(messages)
107 | 
108 |         cur_messages = tf.concat(messages_per_type, axis=0)  # Shape [M, H]
109 |         aggregated_messages = \
110 |             message_aggregation_fn(data=cur_messages,
111 |                                    segment_ids=message_targets,
112 |                                    num_segments=num_nodes)  # Shape [V, H]
113 | 
114 |         new_node_states = activation_fn(aggregated_messages)  # Shape [V, H]
115 |         cur_node_states = new_node_states
116 | 
117 |     return cur_node_states
118 | 


--------------------------------------------------------------------------------
/gnns/rgdcn.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Optional
  2 | 
  3 | import tensorflow as tf
  4 | 
  5 | from utils import get_activation, get_aggregation_function, SMALL_NUMBER
  6 | 
  7 | 
  8 | def sparse_rgdcn_layer(node_embeddings: tf.Tensor,
  9 |                        adjacency_lists: List[tf.Tensor],
 10 |                        type_to_num_incoming_edges: tf.Tensor,
 11 |                        num_channels: int = 8,
 12 |                        channel_dim: int = 16,
 13 |                        num_timesteps: int = 1,
 14 |                        use_full_state_for_channel_weights: bool = False,
 15 |                        tie_channel_weights: bool = False,
 16 |                        activation_function: Optional[str] = "tanh",
 17 |                        message_aggregation_function: str = "sum",
 18 |                        normalize_by_num_incoming: bool = True,
 19 |                        ) -> tf.Tensor:
 20 |     """
 21 |     Compute new graph states by message passing using dynamic convolutions for edge kernels.
 22 |     For this, we assume existing node states h^t_v and a list of per-edge-type adjacency
 23 |     matrices A_\ell.
 24 |     We split each state h^t_v into C "channels" of dimension K, and use h^t_{v,c,:} to refer to
 25 |     the slice of the node state corresponding to the c-th channel.
 26 | 
 27 |     Four variants of the model are implemented:
 28 | 
 29 |     (1) Edge kernels computed from full target node state using weights shared across all channels:
 30 |         [use_full_state_for_channel_weights = True, tie_channel_weights = True]
 31 |           h^{t+1}_v := \sigma(Concat(\sum_\ell \sum_{(u, v) \in A_\ell} W^t_{\ell,v} * h^t_{u,c,:}
 32 |                                      | 1 <= c <= C))
 33 |           W^t_{\ell,v} := F_\ell * h^t_{v,:,:}
 34 |         The learnable parameters of this are the F_\ell \in R^{C*K, K*K}.
 35 | 
 36 |     (2) Edge kernels computed from full target node state using separate weights for each channel:
 37 |         [use_full_state_for_channel_weights = True, tie_channel_weights = False]
 38 |           h^{t+1}_v := \sigma(Concat(\sum_\ell \sum_{(u, v) \in A_\ell} \sigma(W^t_{\ell,v,c} * h^t_{u,c,:}
 39 |                                      | 1 <= c <= C)
 40 |           W^t_{\ell,v,c} := F_{\ell,c} * h^t_{v,:,:}
 41 |         The learnable parameters of this are the F_{\ell,c} \in R^{C*K, K*K}.
 42 | 
 43 |     (3) Edge kernels computed from corresponding channel of target node using weights shared across all channels:
 44 |         [use_full_state_for_channel_weights = False, tie_channel_weights = True]
 45 |           h^{t+1}_v := \sigma(Concat(\sum_\ell \sum_{(u, v) \in A_\ell} \sigma(W^t_{\ell,v,c} * h^t_{u,c,:}
 46 |                                      | 1 <= c <= C)
 47 |           W^t_{\ell,v,c} := F_{\ell} * h^t_{v,c,:}
 48 |         The learnable parameters of this are the F_\ell \in R^{K, K*K}.
 49 | 
 50 |     (4) Edge kernels computed from corresponding channel of target node using separate weights for each channel:
 51 |         [use_full_state_for_channel_weights = False, tie_channel_weights = False]
 52 |           h^{t+1}_v := \sigma(Concat(\sum_\ell \sum_{(u, v) \in A_\ell} W^t_{\ell,v,c} * h^t_{u,c,:}
 53 |                                      | 1 <= c <= C))
 54 |           W^t_{\ell,v,c} := F_{\ell,c} * h^t_{v,c,:}
 55 |         The learnable parameters of this are the F_{\ell,c} \in R^{K, K*K}.
 56 | 
 57 |     We use the following abbreviations in shape descriptions:
 58 |     * V: number of nodes
 59 |     * C: number of "channels"
 60 |     * K: dimension of each "channel"
 61 |     * D: state dimension, fixed to C * K.
 62 |     * L: number of different edge types
 63 |     * E: number of edges of a given edge type
 64 | 
 65 |     Args:
 66 |         node_embeddings: float32 tensor of shape [V, D], the original representation of
 67 |             each node in the graph.
 68 |         adjacency_lists: List of L adjacency lists, represented as int32 tensors of shape
 69 |             [E, 2]. Concretely, adjacency_lists[l][k,:] == [v, u] means that the k-th edge
 70 |             of type l connects node v to node u.
 71 |         num_channels: Number of "channels" to split state information into.
 72 |         channel_dim: Size of each "channel"
 73 |         num_timesteps: Number of repeated applications of this message passing layer.
 74 |         use_full_state_for_channel_weights: Flag indicating if the full state is used to
 75 |             compute the weights for individual channels, or only the corresponding channel.
 76 |         tie_channel_weights: Flag indicating if the weights for computing the per-channel
 77 |             linear layer are shared or not.
 78 |         activation_function: Type of activation function used.
 79 |         message_aggregation_function: Type of aggregation function used for messages.
 80 |         normalize_by_num_incoming: Flag indicating if messages should be scaled by 1/(number
 81 |             of incoming edges).
 82 | 
 83 |     Returns:
 84 |         float32 tensor of shape [V, D]
 85 |     """
 86 |     num_nodes = tf.shape(node_embeddings, out_type=tf.int32)[0]
 87 | 
 88 |     # === Prepare things we need across all timesteps:
 89 |     activation_fn = get_activation(activation_function)
 90 |     message_aggregation_fn = get_aggregation_function(message_aggregation_function)
 91 |     edge_type_to_channel_to_weight_computation_layers = []  # Layers to compute the dynamic computation weights
 92 |     edge_type_to_message_targets = []  # List of tensors of message targets
 93 | 
 94 |     for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists):
 95 |         channel_to_weight_computation_layers = []
 96 |         for channel in range(num_channels):
 97 |             if channel == 0 or not(tie_channel_weights):
 98 |                 channel_to_weight_computation_layers.append(
 99 |                     tf.keras.layers.Dense(
100 |                         units=channel_dim * channel_dim,
101 |                         use_bias=False,
102 |                         kernel_initializer=tf.initializers.truncated_normal(mean=0.0, stddev=1.0 / (channel_dim**2)),
103 |                         activation=activation_fn,
104 |                         name="Edge_%i_Channel_%i_Weight_Computation" % (edge_type_idx, channel)))
105 |             else:  # Case channel > 0 and tie_channel_weights
106 |                 channel_to_weight_computation_layers.append(
107 |                     channel_to_weight_computation_layers[-1])
108 |         edge_type_to_channel_to_weight_computation_layers.append(channel_to_weight_computation_layers)
109 | 
110 |         edge_type_to_message_targets.append(adjacency_list_for_edge_type[:, 1])
111 | 
112 |     # Let M be the number of messages (sum of all E):
113 |     message_targets = tf.concat(edge_type_to_message_targets, axis=0)  # Shape [M]
114 | 
115 |     cur_node_states = node_embeddings  # Shape [V, D]
116 |     for _ in range(num_timesteps):
117 |         node_states_chunked = tf.reshape(cur_node_states,
118 |                                          shape=(-1, num_channels, channel_dim))  # shape [V, C, K]
119 | 
120 |         new_node_states_chunked = []  # type: List[tf.Tensor]  # C tensors of shape [V, K]
121 |         for channel_idx in range(num_channels):
122 |             cur_channel_node_states = node_states_chunked[:, channel_idx, :]  # shape [V, K]
123 |             cur_channel_message_per_type = []  # list of tensors of messages of shape [E, K]
124 | 
125 |             # Collect incoming messages per edge type
126 |             for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists):
127 |                 edge_sources = adjacency_list_for_edge_type[:, 0]
128 |                 edge_targets = adjacency_list_for_edge_type[:, 1]
129 |                 edge_source_states = \
130 |                     tf.nn.embedding_lookup(params=cur_channel_node_states,
131 |                                            ids=edge_sources)  # Shape [E, K]
132 | 
133 |                 if use_full_state_for_channel_weights:
134 |                     weight_computation_input = cur_node_states
135 |                 else:
136 |                     weight_computation_input = cur_channel_node_states
137 |                 # TODO: In the tie_channel_weights && use_full_state_for_channel_weights case,
138 |                 # this is the same for each channel:
139 |                 weight_compute_layer = edge_type_to_channel_to_weight_computation_layers[edge_type_idx][channel_idx]
140 |                 edge_weights = weight_compute_layer(weight_computation_input)  # Shape [V, K*K]
141 |                 edge_weights = tf.reshape(edge_weights, shape=(-1, channel_dim, channel_dim))  # Shape [V, K, K]
142 |                 edge_weights_for_targets = \
143 |                     tf.nn.embedding_lookup(params=edge_weights, ids=edge_targets)  # Shape [E, K, K]
144 | 
145 |                 # Matrix multiply between edge_source_states[v] and edge_weights_for_targets[v]:
146 |                 messages = tf.einsum('vi,vij->vj', edge_source_states, edge_weights_for_targets)  # Shape [E, K]
147 |                 if normalize_by_num_incoming:
148 |                     num_incoming_to_node_per_message = \
149 |                         tf.nn.embedding_lookup(params=type_to_num_incoming_edges[edge_type_idx, :],
150 |                                                ids=edge_targets)  # Shape [E]
151 |                     messages = tf.expand_dims(1.0 / (num_incoming_to_node_per_message + SMALL_NUMBER), axis=-1) * messages
152 | 
153 |                 cur_channel_message_per_type.append(messages)
154 | 
155 |             cur_channel_messages = tf.concat(cur_channel_message_per_type, axis=0)  # Shape [M, K]
156 |             cur_channel_aggregated_incoming_messages = \
157 |                 message_aggregation_fn(data=cur_channel_messages,
158 |                                        segment_ids=message_targets,
159 |                                        num_segments=num_nodes)  # Shape [V, K]
160 |             cur_channel_aggregated_incoming_messages = activation_fn(cur_channel_aggregated_incoming_messages)
161 | 
162 |             new_node_states_chunked.append(cur_channel_aggregated_incoming_messages)
163 | 
164 |         new_node_states = tf.concat(new_node_states_chunked, axis=1)  # Shape [V, C * K]
165 |         cur_node_states = new_node_states
166 | 
167 |     return cur_node_states
168 | 


--------------------------------------------------------------------------------
/gnns/rgin.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Optional
  2 | import tensorflow as tf
  3 | 
  4 | from utils import get_activation, get_aggregation_function, MLP
  5 | 
  6 | 
  7 | def sparse_rgin_layer(
  8 |         node_embeddings: tf.Tensor,
  9 |         adjacency_lists: List[tf.Tensor],
 10 |         state_dim: Optional[int],
 11 |         num_timesteps: int = 1,
 12 |         activation_function: Optional[str] = "ReLU",
 13 |         message_aggregation_function: str = "sum",
 14 |         use_target_state_as_input: bool = False,
 15 |         num_edge_MLP_hidden_layers: Optional[int] = 1,
 16 |         num_aggr_MLP_hidden_layers: Optional[int] = None,
 17 |         ) -> tf.Tensor:
 18 |     """
 19 |     Compute new graph states by neural message passing using MLPs for state updates
 20 |     and message computation.
 21 |     For this, we assume existing node states h^t_v and a list of per-edge-type adjacency
 22 |     matrices A_\ell.
 23 | 
 24 |     We compute new states as follows:
 25 |         h^{t+1}_v := \sigma(MLP_{aggr}(\sum_\ell \sum_{(u, v) \in A_\ell} MLP_\ell(h^t_u)))
 26 |     The learnable parameters of this are the MLPs MLP_\ell.
 27 |     This is derived from Cor. 6 of arXiv:1810.00826, instantiating the functions f, \phi
 28 |     with _separate_ MLPs. This is more powerful than the GIN formulation in Eq. (4.1) of
 29 |     arXiv:1810.00826, as we want to be able to distinguish graphs of the form
 30 |      G_1 = (V={1, 2, 3}, E_1={(1, 2)}, E_2={(3, 2)})
 31 |     and
 32 |      G_2 = (V={1, 2, 3}, E_1={(3, 2)}, E_2={(1, 2)})
 33 |     from each other. If we would treat all edges the same,
 34 |     G_1.E_1 \cup G_1.E_2 == G_2.E_1 \cup G_2.E_2 would imply that the two graphs
 35 |     become indistuingishable.
 36 |     Hence, we introduce per-edge-type MLPs, which also means that we have to drop
 37 |     the optimisation of modelling f \circ \phi by a single MLP used in the original
 38 |     GIN formulation.
 39 | 
 40 |     We use the following abbreviations in shape descriptions:
 41 |     * V: number of nodes
 42 |     * D: state dimension
 43 |     * L: number of different edge types
 44 |     * E: number of edges of a given edge type
 45 | 
 46 |     Arguments:
 47 |         node_embeddings: float32 tensor of shape [V, D], the original representation of
 48 |             each node in the graph.
 49 |         adjacency_lists: List of L adjacency lists, represented as int32 tensors of shape
 50 |             [E, 2]. Concretely, adjacency_lists[l][k,:] == [v, u] means that the k-th edge
 51 |             of type l connects node v to node u.
 52 |         state_dim: Optional size of output dimension of the GNN layer. If not set, defaults
 53 |             to D, the dimensionality of the input. If different from the input dimension,
 54 |             parameter num_timesteps has to be 1.
 55 |         num_timesteps: Number of repeated applications of this message passing layer.
 56 |         activation_function: Type of activation function used.
 57 |         message_aggregation_function: Type of aggregation function used for messages.
 58 |         use_target_state_as_input: Flag indicating if the edge MLP should consume both
 59 |             source and target state (True) or only source state (False).
 60 |         num_edge_MLP_hidden_layers: Number of hidden layers of the MLPs used to transform
 61 |             messages from neighbouring nodes. If None, the raw states are used directly.
 62 |         num_aggr_MLP_hidden_layers: Number of hidden layers of the MLPs used on the
 63 |             aggregation of messages from neighbouring nodes. If none, the aggregated messages
 64 |             are used directly.
 65 | 
 66 |     Returns:
 67 |         float32 tensor of shape [V, state_dim]
 68 |     """
 69 |     num_nodes = tf.shape(node_embeddings, out_type=tf.int32)[0]
 70 |     if state_dim is None:
 71 |         state_dim = tf.shape(node_embeddings, out_type=tf.int32)[1]
 72 | 
 73 |     # === Prepare things we need across all timesteps:
 74 |     activation_fn = get_activation(activation_function)
 75 |     message_aggregation_fn = get_aggregation_function(message_aggregation_function)
 76 | 
 77 |     if num_aggr_MLP_hidden_layers is not None:
 78 |         aggregation_MLP = MLP(out_size=state_dim,
 79 |                               hidden_layers=num_aggr_MLP_hidden_layers,
 80 |                               activation_fun=activation_fn,
 81 |                               name="Aggregation_MLP")  # type: Optional[MLP]
 82 |     else:
 83 |         aggregation_MLP = None
 84 | 
 85 |     if num_edge_MLP_hidden_layers is not None:
 86 |         edge_type_to_edge_mlp = []  # type: Optional[List[MLP]]  # MLPs to compute the edge messages
 87 |     else:
 88 |         edge_type_to_edge_mlp = None
 89 |     edge_type_to_message_targets = []  # List of tensors of message targets
 90 |     for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists):
 91 |         if edge_type_to_edge_mlp is not None and num_edge_MLP_hidden_layers is not None:
 92 |             edge_type_to_edge_mlp.append(
 93 |                 MLP(out_size=state_dim,
 94 |                     hidden_layers=num_edge_MLP_hidden_layers,
 95 |                     activation_fun=activation_fn,
 96 |                     name="Edge_%i_MLP" % edge_type_idx))
 97 |         edge_type_to_message_targets.append(adjacency_list_for_edge_type[:, 1])
 98 | 
 99 |     # Let M be the number of messages (sum of all E):
100 |     message_targets = tf.concat(edge_type_to_message_targets, axis=0)  # Shape [M]
101 | 
102 |     cur_node_states = node_embeddings
103 |     for _ in range(num_timesteps):
104 |         messages_per_type = []  # list of tensors of messages of shape [E, D]
105 |         # Collect incoming messages per edge type
106 |         for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists):
107 |             edge_sources = adjacency_list_for_edge_type[:, 0]
108 |             edge_targets = adjacency_list_for_edge_type[:, 1]
109 |             edge_source_states = \
110 |                 tf.nn.embedding_lookup(params=cur_node_states,
111 |                                        ids=edge_sources)  # Shape [E, D]
112 | 
113 |             edge_mlp_inputs = edge_source_states
114 |             if use_target_state_as_input:
115 |                 edge_target_states = \
116 |                     tf.nn.embedding_lookup(params=cur_node_states,
117 |                                            ids=edge_targets)  # Shape [E, D]
118 |                 edge_mlp_inputs = tf.concat([edge_source_states, edge_target_states],
119 |                                             axis=1)  # Shape [E, 2*D]
120 | 
121 |             if edge_type_to_edge_mlp is not None:
122 |                 messages = edge_type_to_edge_mlp[edge_type_idx](edge_mlp_inputs)  # Shape [E, D]
123 |             else:
124 |                 messages = edge_mlp_inputs
125 |             messages_per_type.append(messages)
126 | 
127 |         all_messages = tf.concat(messages_per_type, axis=0)  # Shape [M, D]
128 |         if edge_type_to_edge_mlp is not None:
129 |             all_messages = activation_fn(all_messages)  # Shape [M, D]  (Apply nonlinearity to Edge-MLP outputs as well)
130 |         aggregated_messages = \
131 |             message_aggregation_fn(data=all_messages,
132 |                                    segment_ids=message_targets,
133 |                                    num_segments=num_nodes)  # Shape [V, D]
134 | 
135 |         new_node_states = aggregated_messages
136 |         if aggregation_MLP is not None:
137 |             new_node_states = aggregation_MLP(new_node_states)
138 |         new_node_states = activation_fn(new_node_states)  # Note that the final MLP layer has no activation, so we do that here explicitly
139 |         new_node_states = tf.contrib.layers.layer_norm(new_node_states)
140 |         cur_node_states = new_node_states
141 | 
142 |     return cur_node_states
143 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .sparse_graph_model import Sparse_Graph_Model
2 | from .ggnn_model import GGNN_Model
3 | from .gnn_edge_mlp_model import GNN_Edge_MLP_Model
4 | from .gnn_film_model import GNN_FiLM_Model
5 | from .rgat_model import RGAT_Model
6 | from .rgcn_model import RGCN_Model
7 | from .rgdcn_model import RGDCN_Model
8 | from .rgin_model import RGIN_Model
9 | 


--------------------------------------------------------------------------------
/models/ggnn_model.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Any, List
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | from .sparse_graph_model import Sparse_Graph_Model
 6 | from tasks import Sparse_Graph_Task
 7 | from gnns import sparse_ggnn_layer
 8 | 
 9 | 
10 | class GGNN_Model(Sparse_Graph_Model):
11 |     @classmethod
12 |     def default_params(cls):
13 |         params = super().default_params()
14 |         params.update({
15 |             'hidden_size': 128,
16 |             'graph_rnn_cell': 'GRU',  # RNN, GRU, or LSTM
17 |             'graph_activation_function': "tanh",
18 |             "message_aggregation_function": "sum",
19 |             'graph_layer_input_dropout_keep_prob': 1.0,
20 |             'graph_dense_between_every_num_gnn_layers': 10000,
21 |             'graph_residual_connection_every_num_layers': 10000,
22 |         })
23 |         return params
24 | 
25 |     @staticmethod
26 |     def name(params: Dict[str, Any]) -> str:
27 |         return "GGNN"
28 | 
29 |     def __init__(self, params: Dict[str, Any], task: Sparse_Graph_Task, run_id: str, result_dir: str) -> None:
30 |         super().__init__(params, task, run_id, result_dir)
31 | 
32 |     def _apply_gnn_layer(self,
33 |                          node_representations: tf.Tensor,
34 |                          adjacency_lists: List[tf.Tensor],
35 |                          type_to_num_incoming_edges: tf.Tensor,
36 |                          num_timesteps: int) -> tf.Tensor:
37 |         return sparse_ggnn_layer(
38 |             node_embeddings=node_representations,
39 |             adjacency_lists=adjacency_lists,
40 |             state_dim=self.params['hidden_size'],
41 |             num_timesteps=num_timesteps,
42 |             gated_unit_type=self.params['graph_rnn_cell'],
43 |             activation_function=self.params['graph_activation_function'],
44 |             message_aggregation_function=self.params['message_aggregation_function'],
45 |         )
46 | 


--------------------------------------------------------------------------------
/models/gnn_edge_mlp_model.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Any, List
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | from .sparse_graph_model import Sparse_Graph_Model
 6 | from tasks import Sparse_Graph_Task
 7 | from gnns import sparse_gnn_edge_mlp_layer
 8 | 
 9 | 
10 | class GNN_Edge_MLP_Model(Sparse_Graph_Model):
11 |     @classmethod
12 |     def default_params(cls):
13 |         params = super().default_params()
14 |         params.update({
15 |             'max_nodes_in_batch': 25000,
16 |             'hidden_size': 128,
17 |             "graph_activation_function": "gelu",
18 |             "message_aggregation_function": "sum",
19 |             'graph_inter_layer_norm': True,
20 |             'use_target_state_as_input': True,
21 |             'num_edge_hidden_layers': 1,
22 |         })
23 |         return params
24 | 
25 |     @staticmethod
26 |     def name(params: Dict[str, Any]) -> str:
27 |         return "GNN-Edge-MLP%i" % (params['num_edge_hidden_layers'])
28 | 
29 |     def __init__(self, params: Dict[str, Any], task: Sparse_Graph_Task, run_id: str, result_dir: str) -> None:
30 |         super().__init__(params, task, run_id, result_dir)
31 | 
32 |     def _apply_gnn_layer(self,
33 |                          node_representations: tf.Tensor,
34 |                          adjacency_lists: List[tf.Tensor],
35 |                          type_to_num_incoming_edges: tf.Tensor,
36 |                          num_timesteps: int,
37 |                          ) -> tf.Tensor:
38 |         return sparse_gnn_edge_mlp_layer(
39 |             node_embeddings=node_representations,
40 |             adjacency_lists=adjacency_lists,
41 |             type_to_num_incoming_edges=type_to_num_incoming_edges,
42 |             state_dim=self.params['hidden_size'],
43 |             num_timesteps=num_timesteps,
44 |             activation_function=self.params['graph_activation_function'],
45 |             message_aggregation_function=self.params['message_aggregation_function'],
46 |             use_target_state_as_input=self.params['use_target_state_as_input'],
47 |             num_edge_hidden_layers=self.params['num_edge_hidden_layers'],
48 |         )
49 | 


--------------------------------------------------------------------------------
/models/gnn_film_model.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Any, List
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | from .sparse_graph_model import Sparse_Graph_Model
 6 | from tasks import Sparse_Graph_Task
 7 | from gnns import sparse_gnn_film_layer
 8 | 
 9 | 
10 | class GNN_FiLM_Model(Sparse_Graph_Model):
11 |     @classmethod
12 |     def default_params(cls):
13 |         params = super().default_params()
14 |         params.update({
15 |             "hidden_size": 128,
16 |             "graph_activation_function": "ReLU",
17 |             "message_aggregation_function": "sum",
18 |             "normalize_messages_by_num_incoming": False,
19 |         })
20 |         return params
21 | 
22 |     @staticmethod
23 |     def name(params: Dict[str, Any]) -> str:
24 |         return "GNN-FiLM"
25 | 
26 |     def __init__(self, params: Dict[str, Any], task: Sparse_Graph_Task, run_id: str, result_dir: str) -> None:
27 |         super().__init__(params, task, run_id, result_dir)
28 | 
29 |     def _apply_gnn_layer(self,
30 |                          node_representations: tf.Tensor,
31 |                          adjacency_lists: List[tf.Tensor],
32 |                          type_to_num_incoming_edges: tf.Tensor,
33 |                          num_timesteps: int) -> tf.Tensor:
34 |         return sparse_gnn_film_layer(
35 |             node_embeddings=node_representations,
36 |             adjacency_lists=adjacency_lists,
37 |             type_to_num_incoming_edges=type_to_num_incoming_edges,
38 |             state_dim=self.params['hidden_size'],
39 |             num_timesteps=num_timesteps,
40 |             activation_function=self.params['graph_activation_function'],
41 |             message_aggregation_function=self.params['message_aggregation_function'],
42 |             normalize_by_num_incoming=self.params["normalize_messages_by_num_incoming"],
43 |         )
44 | 


--------------------------------------------------------------------------------
/models/rgat_model.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Any, List
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | from .sparse_graph_model import Sparse_Graph_Model
 6 | from tasks import Sparse_Graph_Task
 7 | from gnns import sparse_rgat_layer
 8 | 
 9 | 
10 | class RGAT_Model(Sparse_Graph_Model):
11 |     @classmethod
12 |     def default_params(cls):
13 |         params = super().default_params()
14 |         params.update({
15 |             'hidden_size': 128,
16 |             'num_heads': 4,
17 |             'graph_activation_function': 'tanh',
18 |             'graph_layer_input_dropout_keep_prob': 1.0,
19 |             'graph_dense_between_every_num_gnn_layers': 10000,
20 |             'graph_residual_connection_every_num_layers': 10000,
21 |         })
22 |         return params
23 | 
24 |     @staticmethod
25 |     def name(params: Dict[str, Any]) -> str:
26 |         return "RGAT"
27 | 
28 |     def __init__(self, params: Dict[str, Any], task: Sparse_Graph_Task, run_id: str, result_dir: str) -> None:
29 |         super().__init__(params, task, run_id, result_dir)
30 | 
31 |     def _apply_gnn_layer(self,
32 |                          node_representations: tf.Tensor,
33 |                          adjacency_lists: List[tf.Tensor],
34 |                          type_to_num_incoming_edges: tf.Tensor,
35 |                          num_timesteps: int) -> tf.Tensor:
36 |         return sparse_rgat_layer(
37 |             node_embeddings=node_representations,
38 |             adjacency_lists=adjacency_lists,
39 |             state_dim=self.params['hidden_size'],
40 |             num_timesteps=num_timesteps,
41 |             num_heads=self.params['num_heads'],
42 |             activation_function=self.params['graph_activation_function'],
43 |         )
44 | 


--------------------------------------------------------------------------------
/models/rgcn_model.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Any, List
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | from .sparse_graph_model import Sparse_Graph_Model
 6 | from tasks import Sparse_Graph_Task
 7 | from gnns import sparse_rgcn_layer
 8 | 
 9 | 
10 | class RGCN_Model(Sparse_Graph_Model):
11 |     @classmethod
12 |     def default_params(cls):
13 |         params = super().default_params()
14 |         params.update({
15 |             'hidden_size': 128,
16 |             "graph_activation_function": "ReLU",
17 |             "message_aggregation_function": "sum",
18 |             'graph_layer_input_dropout_keep_prob': 1.0,
19 |             'graph_dense_between_every_num_gnn_layers': 10000,
20 |             'graph_residual_connection_every_num_layers': 10000,
21 |         })
22 |         return params
23 | 
24 |     @staticmethod
25 |     def name(params: Dict[str, Any]) -> str:
26 |         return "RGCN"
27 | 
28 |     def __init__(self, params: Dict[str, Any], task: Sparse_Graph_Task, run_id: str, result_dir: str) -> None:
29 |         super().__init__(params, task, run_id, result_dir)
30 | 
31 |     def _apply_gnn_layer(self,
32 |                          node_representations: tf.Tensor,
33 |                          adjacency_lists: List[tf.Tensor],
34 |                          type_to_num_incoming_edges: tf.Tensor,
35 |                          num_timesteps: int) -> tf.Tensor:
36 |         return sparse_rgcn_layer(
37 |             node_embeddings=node_representations,
38 |             adjacency_lists=adjacency_lists,
39 |             type_to_num_incoming_edges=type_to_num_incoming_edges,
40 |             state_dim=self.params['hidden_size'],
41 |             num_timesteps=num_timesteps,
42 |             activation_function=self.params['graph_activation_function'],
43 |             message_aggregation_function=self.params['message_aggregation_function'],
44 |         )
45 | 


--------------------------------------------------------------------------------
/models/rgdcn_model.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Any, List
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | from .sparse_graph_model import Sparse_Graph_Model
 6 | from tasks import Sparse_Graph_Task
 7 | from gnns import sparse_rgdcn_layer
 8 | 
 9 | 
10 | class RGDCN_Model(Sparse_Graph_Model):
11 |     @classmethod
12 |     def default_params(cls):
13 |         params = super().default_params()
14 |         params.update({
15 |             'max_nodes_in_batch': 25000,
16 |             'hidden_size': 128,
17 |             'num_channels': 8,
18 |             "use_full_state_for_channel_weights": False,
19 |             "tie_channel_weights": False,
20 |             "graph_activation_function": "ReLU",
21 |             "message_aggregation_function": "sum",
22 |             'graph_inter_layer_norm': True,
23 |         })
24 |         return params
25 | 
26 |     @staticmethod
27 |     def name(params: Dict[str, Any]) -> str:
28 |         return "RGDCN"
29 | 
30 |     def __init__(self, params: Dict[str, Any], task: Sparse_Graph_Task, run_id: str, result_dir: str) -> None:
31 |         params['channel_dim'] = params['hidden_size'] // params['num_channels']
32 |         super().__init__(params, task, run_id, result_dir)
33 | 
34 |     def _apply_gnn_layer(self,
35 |                          node_representations: tf.Tensor,
36 |                          adjacency_lists: List[tf.Tensor],
37 |                          type_to_num_incoming_edges: tf.Tensor,
38 |                          num_timesteps: int) -> tf.Tensor:
39 |         return sparse_rgdcn_layer(
40 |             node_embeddings=node_representations,
41 |             adjacency_lists=adjacency_lists,
42 |             type_to_num_incoming_edges=type_to_num_incoming_edges,
43 |             num_channels=self.params['num_channels'],
44 |             channel_dim=self.params['channel_dim'],
45 |             num_timesteps=num_timesteps,
46 |             use_full_state_for_channel_weights=self.params['use_full_state_for_channel_weights'],
47 |             tie_channel_weights=self.params['tie_channel_weights'],
48 |             activation_function=self.params['graph_activation_function'],
49 |             message_aggregation_function=self.params['message_aggregation_function'],
50 |         )
51 | 


--------------------------------------------------------------------------------
/models/rgin_model.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Any, List
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | from .sparse_graph_model import Sparse_Graph_Model
 6 | from tasks import Sparse_Graph_Task
 7 | from gnns import sparse_rgin_layer
 8 | 
 9 | 
10 | class RGIN_Model(Sparse_Graph_Model):
11 |     @classmethod
12 |     def default_params(cls):
13 |         params = super().default_params()
14 |         params.update({
15 |             'hidden_size': 128,
16 |             "graph_activation_function": "ReLU",
17 |             'message_aggregation_function': "sum",
18 |             'graph_dense_between_every_num_gnn_layers': 10000,
19 |             'graph_inter_layer_norm': True,
20 |             'use_target_state_as_input': False,
21 |             'graph_num_edge_MLP_hidden_layers': 1,
22 |             'graph_num_aggr_MLP_hidden_layers': None,
23 |         })
24 |         return params
25 | 
26 |     @staticmethod
27 |     def name(params: Dict[str, Any]) -> str:
28 |         return "RGIN"
29 | 
30 |     def __init__(self, params: Dict[str, Any], task: Sparse_Graph_Task, run_id: str, result_dir: str) -> None:
31 |         super().__init__(params, task, run_id, result_dir)
32 | 
33 |     def _apply_gnn_layer(self,
34 |                          node_representations: tf.Tensor,
35 |                          adjacency_lists: List[tf.Tensor],
36 |                          type_to_num_incoming_edges: tf.Tensor,
37 |                          num_timesteps: int,
38 |                          ) -> tf.Tensor:
39 |         return sparse_rgin_layer(
40 |             node_embeddings=node_representations,
41 |             adjacency_lists=adjacency_lists,
42 |             state_dim=self.params['hidden_size'],
43 |             num_timesteps=num_timesteps,
44 |             activation_function=self.params['graph_activation_function'],
45 |             message_aggregation_function=self.params['message_aggregation_function'],
46 |             use_target_state_as_input=self.params['use_target_state_as_input'],
47 |             num_edge_MLP_hidden_layers=self.params['graph_num_edge_MLP_hidden_layers'],
48 |             num_aggr_MLP_hidden_layers=self.params['graph_num_aggr_MLP_hidden_layers'],
49 |         )
50 | 


--------------------------------------------------------------------------------
/models/sparse_graph_model.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pickle
  3 | import random
  4 | import time
  5 | from abc import ABC, abstractmethod
  6 | from typing import Any, Dict, Optional, Tuple, List, Iterable
  7 | 
  8 | import tensorflow as tf
  9 | import numpy as np
 10 | from dpu_utils.utils import ThreadedIterator, RichPath
 11 | 
 12 | from tasks import Sparse_Graph_Task, DataFold
 13 | from utils import get_activation
 14 | 
 15 | 
 16 | class Sparse_Graph_Model(ABC):
 17 |     """
 18 |     Abstract superclass of all graph models, defining core model functionality
 19 |     such as training loops, interaction with tasks, etc. Needs to be extended by
 20 |     concrete GNN implementations.
 21 |     """
 22 |     @classmethod
 23 |     def default_params(cls):
 24 |         return {
 25 |             'max_nodes_in_batch': 50000,
 26 | 
 27 |             'graph_num_layers': 8,
 28 |             'graph_num_timesteps_per_layer': 1,
 29 | 
 30 |             'graph_layer_input_dropout_keep_prob': 0.8,
 31 |             'graph_dense_between_every_num_gnn_layers': 1,
 32 |             'graph_model_activation_function': 'tanh',
 33 |             'graph_residual_connection_every_num_layers': 2,
 34 |             'graph_inter_layer_norm': False,
 35 | 
 36 |             'max_epochs': 10000,
 37 |             'patience': 25,
 38 |             'optimizer': 'Adam',
 39 |             'learning_rate': 0.001,
 40 |             'learning_rate_decay': 0.98,
 41 |             'lr_for_num_graphs_per_batch': None,  # The LR is normalised so that we use it for exactly that number of graphs; no normalisation happens if the value is None
 42 |             'momentum': 0.85,
 43 |             'clamp_gradient_norm': 1.0,
 44 |             'random_seed': 0,
 45 |         }
 46 | 
 47 |     @staticmethod
 48 |     @abstractmethod
 49 |     def name(params: Dict[str, Any]) -> str:
 50 |         raise NotImplementedError()
 51 | 
 52 |     def __init__(self,
 53 |                  params: Dict[str, Any],
 54 |                  task: Sparse_Graph_Task,
 55 |                  run_id: str,
 56 |                  result_dir: str) -> None:
 57 |         self.params = params
 58 |         self.task = task
 59 |         self.run_id = run_id
 60 |         self.result_dir = result_dir
 61 | 
 62 |         self.__placeholders = {}  # type: Dict[str, tf.Tensor]
 63 |         self.__ops = {}  # type: Dict[str, tf.Tensor]
 64 | 
 65 |         # Build the actual model
 66 |         random.seed(params['random_seed'])
 67 |         np.random.seed(params['random_seed'])
 68 |         config = tf.ConfigProto()
 69 |         config.gpu_options.allow_growth = True
 70 |         self.graph = tf.Graph()
 71 |         self.sess = tf.Session(graph=self.graph, config=config)
 72 |         with self.graph.as_default():
 73 |             tf.set_random_seed(self.params['random_seed'])
 74 |             self.__make_model()
 75 | 
 76 |     @property
 77 |     def log_file(self):
 78 |         return os.path.join(self.result_dir, "%s.log" % self.run_id)
 79 | 
 80 |     @property
 81 |     def best_model_file(self):
 82 |         return os.path.join(self.result_dir, "%s_best_model.pickle" % self.run_id)
 83 | 
 84 |     # -------------------- Model Saving/Loading --------------------
 85 |     def initialize_model(self) -> None:
 86 |         with self.sess.graph.as_default():
 87 |             init_op = tf.group(tf.global_variables_initializer(),
 88 |                                tf.local_variables_initializer())
 89 |             self.sess.run(init_op)
 90 | 
 91 |     def save_model(self, path: str) -> None:
 92 |         vars_to_retrieve = {}  # type: Dict[str, tf.Tensor]
 93 |         for variable in self.sess.graph.get_collection(tf.GraphKeys.GLOBAL_VARIABLES):
 94 |             assert variable.name not in vars_to_retrieve
 95 |             vars_to_retrieve[variable.name] = variable
 96 |         weights_to_save = self.sess.run(vars_to_retrieve)
 97 | 
 98 |         data_to_save = {
 99 |             "model_class": self.name(self.params),
100 |             "task_class": self.task.name(),
101 |             "model_params": self.params,
102 |             "task_params": self.task.params,
103 |             "task_metadata": self.task.get_metadata(),
104 |             "weights": weights_to_save,
105 |         }
106 |         with open(path, 'wb') as out_file:
107 |             pickle.dump(data_to_save, out_file, pickle.HIGHEST_PROTOCOL)
108 | 
109 |     def load_weights(self, weights: Dict[str, np.ndarray]) -> None:
110 |         with self.graph.as_default():
111 |             variables_to_initialize = []
112 |             with tf.name_scope("restore"):
113 |                 restore_ops = []
114 |                 used_vars = set()
115 |                 for variable in self.graph.get_collection(tf.GraphKeys.GLOBAL_VARIABLES):
116 |                     used_vars.add(variable.name)
117 |                     if variable.name in weights:
118 |                         restore_ops.append(variable.assign(weights[variable.name]))
119 |                     else:
120 |                         print('Freshly initializing %s since no saved value was found.' % variable.name)
121 |                         variables_to_initialize.append(variable)
122 |                 for var_name in weights:
123 |                     if var_name not in used_vars:
124 |                         print('Saved weights for %s not used by model.' % var_name)
125 |                 restore_ops.append(tf.variables_initializer(variables_to_initialize))
126 |                 self.sess.run(restore_ops)
127 | 
128 |     # -------------------- Model Construction --------------------
129 |     def __make_model(self):
130 |         self.task.make_task_input_model(self.__placeholders, self.__ops)
131 | 
132 |         with tf.variable_scope("graph_model"):
133 |             self.__placeholders['num_graphs'] = \
134 |                 tf.placeholder(dtype=tf.int64, shape=[], name='num_graphs')
135 |             self.__placeholders['graph_layer_input_dropout_keep_prob'] = \
136 |                 tf.placeholder_with_default(1.0, shape=[], name='graph_layer_input_dropout_keep_prob')
137 | 
138 |             self.__build_graph_propagation_model()
139 | 
140 |         self.task.make_task_output_model(self.__placeholders, self.__ops)
141 | 
142 |         tf.summary.scalar('loss', self.__ops['task_metrics']['loss'])
143 |         total_num_graphs_variable = \
144 |             tf.get_variable(name='total_num_graphs',
145 |                             shape=(),
146 |                             dtype=tf.int64,
147 |                             initializer=tf.zeros_initializer,
148 |                             trainable=False)
149 |         self.__ops['total_num_graphs'] = \
150 |             tf.assign_add(total_num_graphs_variable, self.__placeholders['num_graphs'])
151 |         self.__ops['tf_summaries'] = tf.summary.merge_all()
152 | 
153 |         # Print some stats:
154 |         num_pars = 0
155 |         for variable in tf.trainable_variables():
156 |             num_pars += np.prod([dim.value for dim in variable.get_shape()])
157 |         self.log_line("Model has %i parameters." % num_pars)
158 | 
159 |         # Now add the optimizer bits:
160 |         self.__make_train_step()
161 | 
162 |     def __build_graph_propagation_model(self) -> tf.Tensor:
163 |         h_dim = self.params['hidden_size']
164 |         activation_fn = get_activation(self.params['graph_model_activation_function'])
165 |         if self.task.initial_node_feature_size != self.params['hidden_size']:
166 |             self.__ops['projected_node_features'] = \
167 |                 tf.keras.layers.Dense(units=h_dim,
168 |                                       use_bias=False,
169 |                                       activation=activation_fn,
170 |                                       )(self.__ops['initial_node_features'])
171 |         else:
172 |             self.__ops['projected_node_features'] = self.__ops['initial_node_features']
173 | 
174 |         cur_node_representations = self.__ops['projected_node_features']
175 |         last_residual_representations = tf.zeros_like(cur_node_representations)
176 |         for layer_idx in range(self.params['graph_num_layers']):
177 |             with tf.variable_scope('gnn_layer_%i' % layer_idx):
178 |                 cur_node_representations = \
179 |                     tf.nn.dropout(cur_node_representations, rate=1.0 - self.__placeholders['graph_layer_input_dropout_keep_prob'])
180 |                 if layer_idx % self.params['graph_residual_connection_every_num_layers'] == 0:
181 |                     t = cur_node_representations
182 |                     if layer_idx > 0:
183 |                         cur_node_representations += last_residual_representations
184 |                         cur_node_representations /= 2
185 |                     last_residual_representations = t
186 |                 cur_node_representations = \
187 |                     self._apply_gnn_layer(
188 |                         cur_node_representations,
189 |                         self.__ops['adjacency_lists'],
190 |                         self.__ops['type_to_num_incoming_edges'],
191 |                         self.params['graph_num_timesteps_per_layer'])
192 |                 if self.params['graph_inter_layer_norm']:
193 |                     cur_node_representations = tf.contrib.layers.layer_norm(cur_node_representations)
194 |                 if layer_idx % self.params['graph_dense_between_every_num_gnn_layers'] == 0:
195 |                     cur_node_representations = \
196 |                         tf.keras.layers.Dense(units=h_dim,
197 |                                               use_bias=False,
198 |                                               activation=activation_fn,
199 |                                               name="Dense",
200 |                                               )(cur_node_representations)
201 | 
202 |         self.__ops['final_node_representations'] = cur_node_representations
203 | 
204 |     @abstractmethod
205 |     def _apply_gnn_layer(self,
206 |                          node_representations: tf.Tensor,
207 |                          adjacency_lists: List[tf.Tensor],
208 |                          type_to_num_incoming_edges: tf.Tensor,
209 |                          num_timesteps: int) -> tf.Tensor:
210 |         """
211 |         Run a GNN layer on a graph.
212 | 
213 |         Arguments:
214 |             node_features: float32 tensor of shape [V, D], where V is the number of nodes.
215 |             adjacency_lists: list of L int32 tensors of shape [E, 2], where L is the number
216 |                 of edge types and E the number of edges of that type.
217 |                 Hence, adjacency_lists[l][e,:] == [u, v] means that u has an edge of type l
218 |                 to v.
219 |             type_to_num_incoming_edges: int32 tensor of shape [L, V], where L is the number
220 |                 of edge types.
221 |                 type_to_num_incoming_edges[l, v] = k indicates that node v has k incoming
222 |                 edges of type l.
223 |             num_timesteps: Number of propagation steps in to run in this GNN layer.
224 |         """
225 |         raise Exception("Models have to implement _apply_gnn_layer!")
226 | 
227 |     def __make_train_step(self):
228 |         trainable_vars = self.sess.graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
229 | 
230 |         learning_rate = self.params['learning_rate']
231 | 
232 |         lr_for_num_graphs_per_batch = self.params.get('lr_for_num_graphs_per_batch')
233 |         if lr_for_num_graphs_per_batch is not None:
234 |             # This ensures that the learning rate _per_ graph in the batch stays the same,
235 |             # which can be important for tasks in which the loss is defined per-graph
236 |             # (e.g., full graph regression tasks, or one-node-per-graph classification)
237 |             lr_norm_factor = (tf.cast(self.__placeholders['num_graphs'], tf.float32)
238 |                               / tf.constant(lr_for_num_graphs_per_batch, dtype=tf.float32))
239 |             learning_rate *= lr_norm_factor
240 | 
241 |         optimizer_name = self.params['optimizer'].lower()
242 |         if optimizer_name == 'sgd':
243 |             optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
244 |         elif optimizer_name == 'rmsprop':
245 |             optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate,
246 |                                                   decay=self.params['learning_rate_decay'],
247 |                                                   momentum=self.params['momentum'])
248 |         elif optimizer_name == 'adam':
249 |             optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
250 |         else:
251 |             raise Exception('Unknown optimizer "%s".' % (self.params['optimizer']))
252 | 
253 |         grads_and_vars = optimizer.compute_gradients(self.__ops['task_metrics']['loss'], var_list=trainable_vars)
254 |         clipped_grads = []
255 |         for grad, var in grads_and_vars:
256 |             if grad is not None:
257 |                 clipped_grads.append((tf.clip_by_norm(grad, self.params['clamp_gradient_norm']), var))
258 |             else:
259 |                 clipped_grads.append((grad, var))
260 |         self.__ops['train_step'] = optimizer.apply_gradients(clipped_grads)
261 | 
262 |     # -------------------- Training Loop --------------------
263 |     def __run_epoch(self,
264 |                     epoch_name: str,
265 |                     data: Iterable[Any],
266 |                     data_fold: DataFold,
267 |                     quiet: Optional[bool] = False,
268 |                     summary_writer: Optional[tf.summary.FileWriter] = None) \
269 |             -> Tuple[float, List[Dict[str, Any]], int, float, float, float]:
270 |         batch_iterator = self.task.make_minibatch_iterator(
271 |             data, data_fold, self.__placeholders, self.params['max_nodes_in_batch'])
272 |         batch_iterator = ThreadedIterator(batch_iterator, max_queue_size=5)
273 |         task_metric_results = []
274 |         start_time = time.time()
275 |         processed_graphs, processed_nodes, processed_edges = 0, 0, 0
276 |         epoch_loss = 0.0
277 |         for step, batch_data in enumerate(batch_iterator):
278 |             if data_fold == DataFold.TRAIN:
279 |                 batch_data.feed_dict[self.__placeholders['graph_layer_input_dropout_keep_prob']] = \
280 |                     self.params['graph_layer_input_dropout_keep_prob']
281 |             batch_data.feed_dict[self.__placeholders['num_graphs']] = batch_data.num_graphs
282 |             # Collect some statistics:
283 |             processed_graphs += batch_data.num_graphs
284 |             processed_nodes += batch_data.num_nodes
285 |             processed_edges += batch_data.num_edges
286 | 
287 |             fetch_dict = {'task_metrics': self.__ops['task_metrics']}
288 |             if summary_writer:
289 |                 fetch_dict['tf_summaries'] = self.__ops['tf_summaries']
290 |                 fetch_dict['total_num_graphs'] = self.__ops['total_num_graphs']
291 |             if data_fold == DataFold.TRAIN:
292 |                 fetch_dict['train_step'] = self.__ops['train_step']
293 |             fetch_results = self.sess.run(fetch_dict, feed_dict=batch_data.feed_dict)
294 |             epoch_loss += fetch_results['task_metrics']['loss'] * batch_data.num_graphs
295 |             task_metric_results.append(fetch_results['task_metrics'])
296 | 
297 |             if not quiet:
298 |                 print("Running %s, batch %i (has %i graphs). Loss so far: %.4f"
299 |                       % (epoch_name, step, batch_data.num_graphs, epoch_loss / processed_graphs),
300 |                       end='\r')
301 |             if summary_writer:
302 |                 summary_writer.add_summary(fetch_results['tf_summaries'], fetch_results['total_num_graphs'])
303 | 
304 |         assert processed_graphs > 0, "Can't run epoch over empty dataset."
305 | 
306 |         epoch_time = time.time() - start_time
307 |         per_graph_loss = epoch_loss / processed_graphs
308 |         graphs_per_sec = processed_graphs / epoch_time
309 |         nodes_per_sec = processed_nodes / epoch_time
310 |         edges_per_sec = processed_edges / epoch_time
311 |         return per_graph_loss, task_metric_results, processed_graphs, graphs_per_sec, nodes_per_sec, edges_per_sec
312 | 
313 |     def log_line(self, msg):
314 |         with open(self.log_file, 'a') as log_fh:
315 |             log_fh.write(msg + '\n')
316 |         print(msg)
317 | 
318 |     def train(self, quiet: Optional[bool] = False, tf_summary_path: Optional[str] = None):
319 |         total_time_start = time.time()
320 |         with self.graph.as_default():
321 |             if tf_summary_path is not None:
322 |                 os.makedirs(tf_summary_path, exist_ok=True)
323 |                 train_writer = tf.summary.FileWriter(os.path.join(tf_summary_path, "train"), graph=self.graph)
324 |                 valid_writer = tf.summary.FileWriter(os.path.join(tf_summary_path, "valid"))
325 |             else:
326 |                 train_writer, valid_writer = None, None
327 | 
328 |             (best_valid_metric, best_val_metric_epoch, best_val_metric_descr) = (float("+inf"), 0, "")
329 |             for epoch in range(1, self.params['max_epochs'] + 1):
330 |                 self.log_line("== Epoch %i" % epoch)
331 | 
332 |                 train_loss, train_task_metrics, train_num_graphs, train_graphs_p_s, train_nodes_p_s, train_edges_p_s = \
333 |                     self.__run_epoch("epoch %i (training)" % epoch,
334 |                                      self.task._loaded_data[DataFold.TRAIN],
335 |                                      DataFold.TRAIN,
336 |                                      quiet=quiet,
337 |                                      summary_writer=train_writer)
338 |                 if not quiet:
339 |                     print("\r\x1b[K", end='')
340 |                 self.log_line(" Train: loss: %.5f || %s || graphs/sec: %.2f | nodes/sec: %.0f | edges/sec: %.0f"
341 |                               % (train_loss,
342 |                                  self.task.pretty_print_epoch_task_metrics(train_task_metrics, train_num_graphs),
343 |                                  train_graphs_p_s, train_nodes_p_s, train_edges_p_s))
344 | 
345 |                 valid_loss, valid_task_metrics, valid_num_graphs, valid_graphs_p_s, valid_nodes_p_s, valid_edges_p_s = \
346 |                     self.__run_epoch("epoch %i (validation)" % epoch,
347 |                                      self.task._loaded_data[DataFold.VALIDATION],
348 |                                      DataFold.VALIDATION,
349 |                                      quiet=quiet,
350 |                                      summary_writer=valid_writer)
351 |                 if not quiet:
352 |                     print("\r\x1b[K", end='')
353 |                 early_stopping_metric = self.task.early_stopping_metric(valid_task_metrics, valid_num_graphs)
354 |                 valid_metric_descr = \
355 |                     self.task.pretty_print_epoch_task_metrics(valid_task_metrics, valid_num_graphs)
356 |                 self.log_line(" Valid: loss: %.5f || %s || graphs/sec: %.2f | nodes/sec: %.0f | edges/sec: %.0f"
357 |                               % (valid_loss, valid_metric_descr, valid_graphs_p_s, valid_nodes_p_s, valid_edges_p_s))
358 | 
359 |                 if early_stopping_metric < best_valid_metric:
360 |                     self.save_model(self.best_model_file)
361 |                     self.log_line("  (Best epoch so far, target metric decreased to %.5f from %.5f. Saving to '%s')"
362 |                                   % (early_stopping_metric, best_valid_metric, self.best_model_file))
363 |                     best_valid_metric = early_stopping_metric
364 |                     best_val_metric_epoch = epoch
365 |                     best_val_metric_descr = valid_metric_descr
366 |                 elif epoch - best_val_metric_epoch >= self.params['patience']:
367 |                     total_time = time.time() - total_time_start
368 |                     self.log_line("Stopping training after %i epochs without improvement on validation loss." % self.params['patience'])
369 |                     self.log_line("Training took %is. Best validation results: %s"
370 |                                   % (total_time, best_val_metric_descr))
371 |                     break
372 | 
373 |     def test(self, path: RichPath, quiet: Optional[bool] = False):
374 |         with self.graph.as_default():
375 |             self.log_line("== Running Test on %s ==" % (path,))
376 |             data = self.task._loaded_data.get(DataFold.TEST)
377 |             if data is None:
378 |                 data = self.task.load_eval_data_from_path(path)
379 |             test_loss, test_task_metrics, test_num_graphs, _, _, _ = \
380 |                 self.__run_epoch("Test", data, DataFold.TEST, quiet=quiet)
381 |             if not quiet:
382 |                 print("\r\x1b[K", end='')
383 |             self.log_line("Loss %.5f on %i graphs" % (test_loss, test_num_graphs))
384 |             self.log_line("Metrics: %s" % self.task.pretty_print_epoch_task_metrics(test_task_metrics, test_num_graphs))
385 | 


--------------------------------------------------------------------------------
/reorg_varmisuse_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Set this to the path of the downloaded dataset:
 4 | DOWNLOADED_ZIP="graph-dataset.zip"
 5 | # Set this to the path where the data will be extracted to (requires ~15 GB of space):
 6 | OUTDIR="reorged-varmisuse-dataset"
 7 | 
 8 | ### The following bits should not require any changes:
 9 | CODEDIR=$(dirname $0)
10 | TESTONLY_PROJS="commandline humanizer lean"
11 | 
12 | for fold in train valid test testonly; do
13 |     mkdir -p "${OUTDIR}/graphs-${fold}-raw"
14 | done
15 | 
16 | 7za x "${DOWNLOADED_ZIP}"
17 | 
18 | for test_proj in $TESTONLY_PROJS; do
19 |     mv graph-dataset/${test_proj}/graphs-test/* "${OUTDIR}/graphs-testonly-raw"
20 |     rm -rf graph-dataset/${test_proj}
21 | done
22 | 
23 | for fold in train valid test; do
24 |     mv graph-dataset/*/graphs-${fold}/* "${OUTDIR}/graphs-${fold}-raw"
25 | done
26 | 
27 | for file in "${OUTDIR}"/*/*.gz; do
28 |     new_file=$(echo "${file}" | sed -e 's/.gz$/.json.gz/')
29 |     mv "${file}" "${new_file}"
30 | done
31 | 
32 | for fold in train valid test testonly; do
33 |     python3 "$CODEDIR/utils/varmisuse_data_splitter.py" "${OUTDIR}/graphs-${fold}-raw/" "${OUTDIR}/graphs-${fold}/"
34 |     rm -rf "${OUTDIR}/graphs-${fold}-raw/"
35 | done
36 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | docopt
2 | numpy
3 | dpu-utils>=0.1.30
4 | tensorflow-gpu>=1.13.1


--------------------------------------------------------------------------------
/run_ppi_benchs.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | Usage:
 4 |     run_ppi_benchs.py [options] LOG_TARGET_DIR
 5 | 
 6 | Options:
 7 |     -h --help         Show this screen.
 8 |     --num-runs NUM    Number of runs to perform for each configuration. [default: 10]
 9 |     --debug           Turn on debugger.
10 | """
11 | import os
12 | import subprocess
13 | import re
14 | import numpy as np
15 | 
16 | from docopt import docopt
17 | from dpu_utils.utils import run_and_debug
18 | 
19 | MODEL_TYPES = ["GGNN", "RGCN", "RGAT", "RGIN", "GNN-Edge-MLP0", "GNN-Edge-MLP1", "GNN_FiLM"]
20 | 
21 | TEST_RES_RE = re.compile('^Metrics: Avg MicroF1: (0.\d+)')
22 | TIME_RE = re.compile('^Training took (\d+)s')
23 | 
24 | 
25 | def run(args):
26 |     target_dir = args['LOG_TARGET_DIR']
27 |     os.makedirs(target_dir, exist_ok=True)
28 |     print("Starting PPI experiments, will write logfiles for runs into %s." % target_dir)
29 |     num_seeds = int(args.get('--num-runs'))
30 |     print("| %- 13s | %- 17s | %- 10s |" % ("Model", "Avg. MicroF1", "Avg. Time"))
31 |     print("|" + "-" * 15 + "|" + "-" * 19 + "|" + "-" * 12 + "|")
32 |     for model in MODEL_TYPES:
33 |         model_f1s = []
34 |         model_times = []
35 |         for seed in range(1, 1 + num_seeds):
36 |             logfile = os.path.join(target_dir, "%s_seed%i.txt" % (model.lower(), seed))
37 |             with open(logfile, "w") as log_fh:
38 |                 subprocess.check_call(["python",
39 |                                        "train.py",
40 |                                        "--quiet",
41 |                                        "--run-test",
42 |                                        model,
43 |                                        "PPI",
44 |                                        "--model-param-overrides",
45 |                                        "{\"random_seed\": %i}" % seed,
46 |                                        ],
47 |                                       stdout=log_fh,
48 |                                       stderr=log_fh)
49 |             with open(logfile, "r") as log_fh:
50 |                 for line in log_fh.readlines():
51 |                     time_match = TIME_RE.search(line)
52 |                     res_match = TEST_RES_RE.search(line)
53 |                     if time_match is not None:
54 |                         model_times.append(int(time_match.groups()[0]))
55 |                     elif res_match is not None:
56 |                         model_f1s.append(float(res_match.groups()[0]))
57 | 
58 |         print("| %- 13s | %.3f (+/- %.3f) |     % 4.1f |"
59 |               % (model,
60 |                  np.mean(model_f1s),
61 |                  np.std(model_f1s),
62 |                  np.mean(model_times)))
63 | 
64 | 
65 | if __name__ == "__main__":
66 |     args = docopt(__doc__)
67 |     run_and_debug(lambda: run(args), enable_debugging=args['--debug'])
68 | 


--------------------------------------------------------------------------------
/run_qm9_benchs.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | Usage:
 4 |     run_qm9_benchs.py [options] LOG_TARGET_DIR
 5 | 
 6 | Options:
 7 |     -h --help         Show this screen.
 8 |     --num-runs NUM    Number of runs to perform for each configuration. [default: 5]
 9 |     --debug           Turn on debugger.
10 | """
11 | import os
12 | import subprocess
13 | import re
14 | import numpy as np
15 | 
16 | from docopt import docopt
17 | from dpu_utils.utils import run_and_debug
18 | 
19 | MODEL_TYPES = ["GGNN", "RGCN", "RGAT", "RGIN", "GNN-Edge-MLP0", "GNN-Edge-MLP1", "GNN_FiLM"]
20 | TASKS = ["mu", "alpha", "HOMO", "LUMO", "gap", "R2", "ZPVE", "U0", "U", "H", "G", "Cv", "Omega"]
21 | 
22 | TEST_RES_RE = re.compile('^Metrics: MAEs: \d+:([0-9.]+) \| Error Ratios: \d+:([0-9.]+)')
23 | TIME_RE = re.compile('^Training took (\d+)s')
24 | 
25 | 
26 | def run(args):
27 |     target_dir = args['LOG_TARGET_DIR']
28 |     os.makedirs(target_dir, exist_ok=True)
29 |     print("Starting QM9 experiments, will write logfiles for runs into %s." % target_dir)
30 |     num_seeds = int(args.get('--num-runs'))
31 |     results = {}
32 |     for model in MODEL_TYPES:
33 |         results[model] = [{"test_errors": [], "times": []} for _ in TASKS]
34 |         for task_id in range(len(TASKS)):
35 |             for seed in range(1, 1 + num_seeds):
36 |                 logfile = os.path.join(target_dir, "%s_task%i_seed%i.txt" % (model, task_id, seed))
37 |                 with open(logfile, "w") as log_fh:
38 |                     subprocess.check_call(["python",
39 |                                            "train.py",
40 |                                            "--run-test",
41 |                                            model,
42 |                                            "QM9",
43 |                                            "--model-param-overrides",
44 |                                            "{\"random_seed\": %i}" % seed,
45 |                                            "--task-param-overrides",
46 |                                            "{\"task_ids\": [%i]}" % task_id,
47 |                                            ],
48 |                                           stdout=log_fh,
49 |                                           stderr=log_fh)
50 |                 with open(logfile, "r") as log_fh:
51 |                     for line in log_fh.readlines():
52 |                         time_match = TIME_RE.search(line)
53 |                         res_match = TEST_RES_RE.search(line)
54 |                         if time_match is not None:
55 |                             results[model][task_id]["times"].append(int(time_match.groups()[0]))
56 |                         elif res_match is not None:
57 |                             results[model][task_id]["test_errors"].append(float(res_match.groups()[1]))
58 | 
59 |     row_fmt_string = "%7s " + "&% 35s " * len(MODEL_TYPES) + "\\\\"
60 |     print(row_fmt_string % tuple([""] + MODEL_TYPES))
61 |     for task_id, task in enumerate(TASKS):
62 |         model_results = []
63 |         for model in MODEL_TYPES:
64 |             err = np.mean(results[model][task_id]["test_errors"])
65 |             std = np.std(results[model][task_id]["test_errors"])
66 |             time_in_min = np.mean(results[model][task_id]["times"]) / 60
67 |             model_results.append("%.2f & ($\pm %.2f$; $%.1f$min)" % (err, std, time_in_min))
68 |         print(row_fmt_string % tuple([task] + model_results))
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     args = docopt(__doc__)
73 |     run_and_debug(lambda: run(args), enable_debugging=args['--debug'])
74 | 


--------------------------------------------------------------------------------
/run_varmisuse_benchs.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | Usage:
 4 |     run_varmisuse_benchs.py [options] LOG_TARGET_DIR
 5 | 
 6 | Options:
 7 |     -h --help         Show this screen.
 8 |     --num-runs NUM    Number of runs to perform for each configuration. [default: 5]
 9 |     --debug           Turn on debugger.
10 | """
11 | import os
12 | import subprocess
13 | import re
14 | import numpy as np
15 | 
16 | from docopt import docopt
17 | from dpu_utils.utils import run_and_debug
18 | 
19 | MODEL_TYPES = ["GGNN", "RGCN", "RGAT", "RGIN", "GNN-Edge-MLP0", "GNN-Edge-MLP1", "GNN_FiLM"]
20 | 
21 | TEST_RES_RE = re.compile('^Metrics: Accuracy: (0.\d+)')
22 | VALID_RES_RE = re.compile('Best validation results: Accuracy: (0.\d+)')
23 | MODEL_FILE_RE = re.compile('^Loading model from file (.+)\.')
24 | 
25 | 
26 | def run(args):
27 |     target_dir = args['LOG_TARGET_DIR']
28 |     os.makedirs(target_dir, exist_ok=True)
29 |     print("Starting VarMisuse experiments, will write logfiles for runs into %s." % target_dir)
30 |     num_seeds = int(args.get('--num-runs'))
31 |     print("| %- 14s | %- 17s | %- 17s | %- 17s |" % ("Model",
32 |                                                      "Valid Acc",
33 |                                                      "Test Acc",
34 |                                                      "TestOnly Acc"))
35 |     print("|" + "-" * 16 + "|" + "-" * 19 + "|" + "-" * 19 + "|" + "-" * 19 + "|")
36 |     for model in MODEL_TYPES:
37 |         valid_accs, test_accs, testonly_accs = [], [], []
38 |         for seed in range(1, 1 + num_seeds):
39 |             logfile = os.path.join(target_dir, "%s_seed%i.txt" % (model.lower(), seed))
40 |             test_logfile = os.path.join(target_dir, "%s_seed%i-testonly.txt" % (model.lower(), seed))
41 |             with open(logfile, "w") as log_fh:
42 |                 subprocess.check_call(["python",
43 |                                        "train.py",
44 |                                        "--quiet",
45 |                                        "--run-test",
46 |                                        model,
47 |                                        "VarMisuse",
48 |                                        "--model-param-overrides",
49 |                                        "{\"random_seed\": %i}" % seed,
50 |                                        ],
51 |                                       stdout=log_fh,
52 |                                       stderr=log_fh)
53 |             model_file = None
54 |             with open(logfile, "r") as log_fh:
55 |                 for line in log_fh.readlines():
56 |                     valid_res_match = VALID_RES_RE.search(line)
57 |                     test_res_match = TEST_RES_RE.search(line)
58 |                     model_file_match = MODEL_FILE_RE.search(line)
59 |                     if valid_res_match is not None:
60 |                         valid_accs.append(float(valid_res_match.groups()[0]))
61 |                     elif test_res_match is not None:
62 |                         test_accs.append(float(test_res_match.groups()[0]))
63 |                     elif model_file_match is not None:
64 |                         model_file = model_file_match.groups()[0]
65 | 
66 |             # Run TestOnly
67 |             assert model_file is not None, "Could not find saved model file"
68 |             with open(test_logfile, "w") as log_fh:
69 |                 subprocess.check_call(["python",
70 |                                        "test.py",
71 |                                        "--quiet",
72 |                                        model_file,
73 |                                        "data/varmisuse/graphs-testonly",
74 |                                        ],
75 |                                       stdout=log_fh,
76 |                                       stderr=log_fh)
77 |             with open(test_logfile, "r") as log_fh:
78 |                 for line in log_fh.readlines():
79 |                     test_res_match = TEST_RES_RE.search(line)
80 |                     if test_res_match is not None:
81 |                         testonly_accs.append(float(test_res_match.groups()[0]))
82 |  
83 |         print("| %- 14s | %.3f (+/- %.3f) | %.3f (+/- %.3f) | %.3f (+/- %.3f) |"
84 |               % (model,
85 |                  np.mean(valid_accs),
86 |                  np.std(valid_accs),
87 |                  np.mean(test_accs),
88 |                  np.std(test_accs),
89 |                  np.mean(testonly_accs),
90 |                  np.std(testonly_accs),
91 |                 ))
92 | 
93 | 
94 | if __name__ == "__main__":
95 |     args = docopt(__doc__)
96 |     run_and_debug(lambda: run(args), enable_debugging=args['--debug'])
97 | 


--------------------------------------------------------------------------------
/tasks/__init__.py:
--------------------------------------------------------------------------------
1 | from .sparse_graph_task import Sparse_Graph_Task, DataFold
2 | from .qm9_task import QM9_Task
3 | from .citation_network_task import Citation_Network_Task
4 | from .ppi_task import PPI_Task
5 | from .varmisuse_task import VarMisuse_Task
6 | 


--------------------------------------------------------------------------------
/tasks/citation_network_task.py:
--------------------------------------------------------------------------------
  1 | from collections import namedtuple
  2 | from typing import Any, Dict, List, Iterable, Iterator
  3 | 
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | from dpu_utils.utils import RichPath, LocalPath
  7 | 
  8 | from .sparse_graph_task import Sparse_Graph_Task, DataFold, MinibatchData
  9 | from utils.citation_network_utils import load_data, preprocess_features
 10 | 
 11 | 
 12 | CitationData = namedtuple('CitationData', ['adj_lists', 'num_incoming_edges', 'features', 'labels', 'mask'])
 13 | 
 14 | 
 15 | class Citation_Network_Task(Sparse_Graph_Task):
 16 |     @classmethod
 17 |     def default_params(cls):
 18 |         params = super().default_params()
 19 |         params.update({
 20 |             'add_self_loop_edges': True,
 21 |             'use_graph': True,
 22 |             'activation_function': "tanh",
 23 |             'out_layer_dropout_keep_prob': 1.0,
 24 |         })
 25 |         return params
 26 | 
 27 |     @staticmethod
 28 |     def name() -> str:
 29 |         return "CitationNetwork"
 30 | 
 31 |     @staticmethod
 32 |     def default_data_path() -> str:
 33 |         return "data/citation-networks"
 34 | 
 35 |     def __init__(self, params: Dict[str, Any]):
 36 |         super().__init__(params)
 37 | 
 38 |         # Things that will be filled once we load data:
 39 |         self.__num_edge_types = 2
 40 |         self.__initial_node_feature_size = 0
 41 |         self.__num_output_classes = 0
 42 | 
 43 |     def get_metadata(self) -> Dict[str, Any]:
 44 |         metadata = super().get_metadata()
 45 |         metadata['initial_node_feature_size'] = self.__initial_node_feature_size
 46 |         metadata['num_output_classes'] = self.__num_output_classes
 47 |         return metadata
 48 | 
 49 |     def restore_from_metadata(self, metadata: Dict[str, Any]) -> None:
 50 |         super().restore_from_metadata(metadata)
 51 |         self.__initial_node_feature_size = metadata['initial_node_feature_size']
 52 |         self.__num_output_classes = metadata['num_output_classes']
 53 | 
 54 |     @property
 55 |     def num_edge_types(self) -> int:
 56 |         return self.__num_edge_types
 57 | 
 58 |     @property
 59 |     def initial_node_feature_size(self) -> int:
 60 |         return self.__initial_node_feature_size
 61 | 
 62 |     # -------------------- Data Loading --------------------
 63 |     def load_data(self, path: RichPath) -> None:
 64 |         train_data, valid_data, _ = self.__load_data(path)
 65 |         self._loaded_data[DataFold.TRAIN] = train_data
 66 |         self._loaded_data[DataFold.VALIDATION] = valid_data
 67 | 
 68 |     def load_eval_data_from_path(self, path: RichPath) -> Iterable[Any]:
 69 |         _, _, test_data = self.__load_data(path)
 70 |         return test_data
 71 | 
 72 |     def __load_data(self, data_directory: RichPath):
 73 |         assert isinstance(data_directory, LocalPath), "CitationNetworkTask can only handle local data"
 74 |         data_path = data_directory.path
 75 |         print(" Loading CitationNetwork data from %s." % (data_path,))
 76 |         (adj_list, features, train_labels, valid_labels, test_labels, train_mask, valid_mask, test_mask) = \
 77 |             load_data(data_path, self.params['data_kind'])
 78 |         self.__initial_node_feature_size = features.shape[1]
 79 |         self.__num_output_classes = train_labels.shape[1]
 80 |         features = preprocess_features(features)
 81 | 
 82 |         train_data = \
 83 |             [self.__preprocess_data(adj_list, features, np.argmax(train_labels, axis=1), train_mask)]
 84 |         valid_data = \
 85 |             [self.__preprocess_data(adj_list, features, np.argmax(valid_labels, axis=1), valid_mask)]
 86 |         test_data = \
 87 |             [self.__preprocess_data(adj_list, features, np.argmax(test_labels, axis=1), test_mask)]
 88 |         return train_data, valid_data, test_data
 89 | 
 90 |     def __preprocess_data(self, adj_list: Dict[int, List[int]], features, labels, mask) -> CitationData:
 91 |         flat_adj_list = []
 92 |         self_loop_adj_list = []
 93 |         num_incoming_edges = np.zeros(shape=[len(adj_list)], dtype=np.int32)
 94 |         for node, neighbours in adj_list.items():
 95 |             for neighbour in neighbours:
 96 |                 flat_adj_list.append((node, neighbour))
 97 |                 flat_adj_list.append((neighbour, node))
 98 |                 num_incoming_edges[neighbour] += 1
 99 |                 num_incoming_edges[node] += 1
100 |             self_loop_adj_list.append((node, node))
101 | 
102 |         # Prepend the self-loop information:
103 |         num_incoming_edges = np.stack([np.ones_like(num_incoming_edges, dtype=np.int32),
104 |                                        num_incoming_edges])  # Shape [2, V]
105 |         return CitationData(adj_lists=[self_loop_adj_list, flat_adj_list],
106 |                             num_incoming_edges=num_incoming_edges,
107 |                             features=features,
108 |                             labels=labels,
109 |                             mask=mask)
110 | 
111 |     # -------------------- Model Construction --------------------
112 |     def make_task_output_model(self,
113 |                                placeholders: Dict[str, tf.Tensor],
114 |                                model_ops: Dict[str, tf.Tensor],
115 |                                ) -> None:
116 |         placeholders['labels'] = tf.placeholder(tf.int32, [None], name='labels')
117 |         placeholders['mask'] = tf.placeholder(tf.float32, [None], name='mask')
118 |         placeholders['out_layer_dropout_keep_prob'] =\
119 |             tf.placeholder_with_default(input=tf.constant(1.0, dtype=tf.float32),
120 |                                         shape=[],
121 |                                         name='out_layer_dropout_keep_prob')
122 | 
123 |         final_node_representations = \
124 |             tf.nn.dropout(model_ops['final_node_representations'],
125 |                           rate=1.0 - placeholders['out_layer_dropout_keep_prob'])
126 |         output_label_logits = \
127 |             tf.keras.layers.Dense(units=self.__num_output_classes,
128 |                                   use_bias=False,
129 |                                   activation=None,
130 |                                   name="OutputDenseLayer",
131 |                                   )(final_node_representations)  # Shape [V, Classes]
132 | 
133 |         num_masked_preds = tf.reduce_sum(placeholders['mask'])
134 |         losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output_label_logits,
135 |                                                                 labels=placeholders['labels'])
136 |         total_loss = tf.reduce_sum(losses * placeholders['mask'])
137 | 
138 |         correct_preds = tf.equal(tf.argmax(output_label_logits, axis=1, output_type=tf.int32),
139 |                                  placeholders['labels'])
140 |         num_masked_correct = tf.reduce_sum(tf.cast(correct_preds, tf.float32) * placeholders['mask'])
141 |         accuracy = num_masked_correct / num_masked_preds
142 |         tf.summary.scalar('accuracy', accuracy)
143 | 
144 |         model_ops['task_metrics'] = {
145 |             'loss': total_loss / num_masked_preds,
146 |             'total_loss': total_loss,
147 |             'accuracy': accuracy,
148 |         }
149 | 
150 |     # -------------------- Minibatching and training loop --------------------
151 |     def make_minibatch_iterator(self,
152 |                                 data: Iterable[Any],
153 |                                 data_fold: DataFold,
154 |                                 model_placeholders: Dict[str, tf.Tensor],
155 |                                 max_nodes_per_batch: int) \
156 |             -> Iterator[MinibatchData]:
157 |         data = next(iter(data))  # type: CitationData
158 |         if data_fold == DataFold.TRAIN:
159 |             out_layer_dropout_keep_prob = self.params['out_layer_dropout_keep_prob']
160 |         else:
161 |             out_layer_dropout_keep_prob = 1.0
162 | 
163 |         feed_dict = {
164 |             model_placeholders['initial_node_features']: data.features,
165 |             model_placeholders['adjacency_lists'][0]: data.adj_lists[0],
166 |             model_placeholders['adjacency_lists'][1]: data.adj_lists[1],
167 |             model_placeholders['type_to_num_incoming_edges']: data.num_incoming_edges,
168 |             model_placeholders['num_graphs']: 1,
169 |             model_placeholders['labels']: data.labels,
170 |             model_placeholders['mask']: data.mask,
171 |             model_placeholders['out_layer_dropout_keep_prob']: out_layer_dropout_keep_prob,
172 |         }
173 | 
174 |         yield MinibatchData(feed_dict=feed_dict,
175 |                             num_graphs=1,
176 |                             num_nodes=data.features.shape[0],
177 |                             num_edges=sum(len(adj_list) for adj_list in data.adj_lists))
178 | 
179 |     def early_stopping_metric(self, task_metric_results: List[Dict[str, np.ndarray]], num_graphs: int) -> float:
180 |         # Early stopping based on average loss:
181 |         return np.sum([m['total_loss'] for m in task_metric_results]) / num_graphs
182 | 
183 |     def pretty_print_epoch_task_metrics(self, task_metric_results: List[Dict[str, np.ndarray]], num_graphs: int) -> str:
184 |         return "Acc: %.2f%%" % (task_metric_results[0]['accuracy'] * 100,)
185 | 


--------------------------------------------------------------------------------
/tasks/default_hypers/PPI_GGNN.json:
--------------------------------------------------------------------------------
1 | {"task_params": {},
2 |  "model_params": {"graph_num_layers": 3,
3 |                   "hidden_size": 320, 
4 |                   "max_nodes_in_batch": 12500,
5 |                   "graph_layer_input_dropout_keep_prob": 0.9
6 |                  }
7 | }


--------------------------------------------------------------------------------
/tasks/default_hypers/PPI_GNN-Edge-MLP0.json:
--------------------------------------------------------------------------------
1 | {"task_params": {},
2 |  "model_params": {"graph_num_layers": 5,
3 |                   "hidden_size": 256, 
4 |                   "max_nodes_in_batch": 6000,
5 |                   "graph_layer_input_dropout_keep_prob": 0.8
6 |                  }
7 | }


--------------------------------------------------------------------------------
/tasks/default_hypers/PPI_GNN-Edge-MLP1.json:
--------------------------------------------------------------------------------
1 | {"task_params": {},
2 |  "model_params": {"graph_num_layers": 4,
3 |                   "hidden_size": 320, 
4 |                   "max_nodes_in_batch": 6000,
5 |                   "graph_layer_input_dropout_keep_prob": 0.9
6 |                  }
7 | }


--------------------------------------------------------------------------------
/tasks/default_hypers/PPI_GNN-FiLM.json:
--------------------------------------------------------------------------------
1 | {"task_params": {},
2 |  "model_params": {"graph_num_layers": 4,
3 |                   "hidden_size": 320, 
4 |                   "max_nodes_in_batch": 6000,
5 |                   "graph_layer_input_dropout_keep_prob": 0.9
6 |                  }
7 | }


--------------------------------------------------------------------------------
/tasks/default_hypers/PPI_RGAT.json:
--------------------------------------------------------------------------------
1 | {"task_params": {},
2 |  "model_params": {"graph_num_layers": 3,
3 |                   "hidden_size": 320, 
4 |                   "max_nodes_in_batch": 11000,
5 |                   "graph_layer_input_dropout_keep_prob": 0.9
6 |                  }
7 | }


--------------------------------------------------------------------------------
/tasks/default_hypers/PPI_RGCN.json:
--------------------------------------------------------------------------------
1 | {"task_params": {},
2 |  "model_params": {"graph_num_layers": 4,
3 |                   "hidden_size": 320, 
4 |                   "max_nodes_in_batch": 12500,
5 |                   "graph_layer_input_dropout_keep_prob": 0.9
6 |                  }
7 | }


--------------------------------------------------------------------------------
/tasks/default_hypers/PPI_RGIN.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "task_params": {
 3 |         "add_self_loop_edges": true
 4 |     },
 5 |     "model_params": {
 6 |         "patience": 25,
 7 |         "graph_num_layers": 5,
 8 |         "hidden_size": 256,
 9 |         "max_nodes_in_batch": 8000,
10 |         "graph_num_edge_MLP_hidden_layers": 1,
11 |         "graph_num_aggr_MLP_hidden_layers": null,
12 |         "graph_layer_input_dropout_keep_prob": 0.8
13 |     }
14 | }
15 | 


--------------------------------------------------------------------------------
/tasks/default_hypers/QM9_GGNN.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "task_params": {},
 3 |     "model_params": {
 4 |         "graph_dense_between_every_num_gnn_layers": 32,
 5 |         "learning_rate": 0.0008471209461829375,
 6 |         "graph_inter_layer_norm": true,
 7 |         "graph_activation_function": "relu",
 8 |         "graph_num_timesteps_per_layer": 1,
 9 |         "graph_model_activation_function": "tanh",
10 |         "momentum": 0.85,
11 |         "optimizer": "RMSProp",
12 |         "clamp_gradient_norm": 1.0,
13 |         "patience": 25,
14 |         "max_epochs": 10000,
15 |         "graph_rnn_cell": "RNN",
16 |         "graph_layer_input_dropout_keep_prob": 1.0,
17 |         "graph_num_layers": 6,
18 |         "message_aggregation_function": "sum",
19 |         "graph_residual_connection_every_num_layers": 2,
20 |         "hidden_size": 128,
21 |         "max_nodes_in_batch": 50000,
22 |         "learning_rate_decay": 0.98
23 |     }
24 | }


--------------------------------------------------------------------------------
/tasks/default_hypers/QM9_GNN-Edge-MLP0.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "task_params": {},
 3 |     "model_params": {
 4 |         "max_nodes_in_batch": 50000,
 5 |         "graph_num_layers": 8,
 6 |         "graph_num_timesteps_per_layer": 1,
 7 |         "graph_layer_input_dropout_keep_prob": 0.9,
 8 |         "graph_dense_between_every_num_gnn_layers": 32,
 9 |         "graph_model_activation_function": "tanh",
10 |         "graph_residual_connection_every_num_layers": 2,
11 |         "graph_inter_layer_norm": true,
12 |         "max_epochs": 10000,
13 |         "patience": 25,
14 |         "optimizer": "RMSProp",
15 |         "learning_rate": 0.0005072060718321982,
16 |         "learning_rate_decay": 0.98,
17 |         "lr_for_num_graphs_per_batch": null,
18 |         "momentum": 0.85,
19 |         "clamp_gradient_norm": 1.0,
20 |         "hidden_size": 128,
21 |         "graph_activation_function": "relu",
22 |         "message_aggregation_function": "sum",
23 |         "graph_message_weights_dropout_ratio": 0.0,
24 |         "use_target_state_as_input": true,
25 |         "num_edge_hidden_layers": 0
26 |     }
27 | }


--------------------------------------------------------------------------------
/tasks/default_hypers/QM9_GNN-Edge-MLP1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "task_params": {},
 3 |     "model_params": {
 4 |         "max_nodes_in_batch": 50000,
 5 |         "graph_num_layers": 8,
 6 |         "graph_num_timesteps_per_layer": 1,
 7 |         "graph_layer_input_dropout_keep_prob": 0.9,
 8 |         "graph_dense_between_every_num_gnn_layers": 32,
 9 |         "graph_model_activation_function": "tanh",
10 |         "graph_residual_connection_every_num_layers": 2,
11 |         "graph_inter_layer_norm": false,
12 |         "max_epochs": 10000,
13 |         "patience": 25,
14 |         "optimizer": "Adam",
15 |         "learning_rate": 0.0006482335154980316,
16 |         "learning_rate_decay": 0.98,
17 |         "lr_for_num_graphs_per_batch": null,
18 |         "momentum": 0.85,
19 |         "clamp_gradient_norm": 1.0,
20 |         "hidden_size": 128,
21 |         "graph_activation_function": "gelu",
22 |         "message_aggregation_function": "sum",
23 |         "graph_message_weights_dropout_ratio": 0.0,
24 |         "use_target_state_as_input": true,
25 |         "num_edge_hidden_layers": 1
26 |     }
27 | }


--------------------------------------------------------------------------------
/tasks/default_hypers/QM9_GNN-FiLM.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "task_params": {},
 3 |     "model_params": {
 4 |         "message_aggregation_function": "sum",
 5 |         "graph_activation_function": "elu",
 6 |         "momentum": 0.85,
 7 |         "learning_rate_decay": 0.98,
 8 |         "patience": 25,
 9 |         "normalize_messages_by_num_incoming": false,
10 |         "max_epochs": 10000,
11 |         "graph_num_timesteps_per_layer": 1,
12 |         "optimizer": "RMSProp",
13 |         "hidden_size": 128,
14 |         "graph_num_layers": 8,
15 |         "graph_residual_connection_every_num_layers": 2,
16 |         "graph_layer_input_dropout_keep_prob": 0.9,
17 |         "learning_rate": 0.0006654723503723253,
18 |         "graph_inter_layer_norm": true,
19 |         "graph_dense_between_every_num_gnn_layers": 32,
20 |         "max_nodes_in_batch": 50000,
21 |         "graph_model_activation_function": "tanh",
22 |         "clamp_gradient_norm": 1.0
23 |     }
24 | }


--------------------------------------------------------------------------------
/tasks/default_hypers/QM9_RGAT.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "task_params": {},
 3 |     "model_params": {
 4 |         "graph_model_activation_function": "tanh",
 5 |         "patience": 25,
 6 |         "optimizer": "RMSProp",
 7 |         "graph_activation_function": "elu",
 8 |         "learning_rate_decay": 0.98,
 9 |         "max_nodes_in_batch": 50000,
10 |         "graph_layer_input_dropout_keep_prob": 0.9,
11 |         "graph_inter_layer_norm": false,
12 |         "clamp_gradient_norm": 1.0,
13 |         "graph_num_layers": 8,
14 |         "momentum": 0.85,
15 |         "graph_dense_between_every_num_gnn_layers": 32,
16 |         "hidden_size": 128,
17 |         "graph_residual_connection_every_num_layers": 2,
18 |         "num_heads": 8,
19 |         "learning_rate": 0.0005800837190772856,
20 |         "graph_num_timesteps_per_layer": 1,
21 |         "max_epochs": 10000
22 |     }
23 | }


--------------------------------------------------------------------------------
/tasks/default_hypers/QM9_RGCN.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "task_params": {},
 3 |     "model_params": {
 4 |         "graph_residual_connection_every_num_layers": 2,
 5 |         "max_nodes_in_batch": 50000,
 6 |         "graph_num_layers": 8,
 7 |         "graph_model_activation_function": "tanh",
 8 |         "graph_layer_input_dropout_keep_prob": 1.0,
 9 |         "graph_activation_function": "leaky_relu",
10 |         "graph_num_timesteps_per_layer": 1,
11 |         "learning_rate_decay": 0.98,
12 |         "max_epochs": 10000,
13 |         "momentum": 0.85,
14 |         "message_aggregation_function": "sum",
15 |         "graph_dense_between_every_num_gnn_layers": 32,
16 |         "learning_rate": 0.0005720408870458782,
17 |         "graph_inter_layer_norm": true,
18 |         "hidden_size": 128,
19 |         "clamp_gradient_norm": 1.0,
20 |         "patience": 25,
21 |         "optimizer": "RMSProp"
22 |     }
23 | }


--------------------------------------------------------------------------------
/tasks/default_hypers/QM9_RGIN.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "task_params": {
 3 |         "add_self_loop_edges": true
 4 |     },
 5 |     "model_params": {
 6 |         "max_nodes_in_batch": 50000,
 7 |         "graph_num_layers": 6,
 8 |         "graph_num_timesteps_per_layer": 1,
 9 |         "graph_layer_input_dropout_keep_prob": 0.9,
10 |         "graph_dense_between_every_num_gnn_layers": 32,
11 |         "graph_model_activation_function": "tanh",
12 |         "graph_residual_connection_every_num_layers": 2,
13 |         "graph_inter_layer_norm": false,
14 |         "max_epochs": 10000,
15 |         "patience": 25,
16 |         "optimizer": "RMSProp",
17 |         "learning_rate": 0.000700776770702023,
18 |         "learning_rate_decay": 0.98,
19 |         "lr_for_num_graphs_per_batch": null,
20 |         "momentum": 0.85,
21 |         "clamp_gradient_norm": 1.0,
22 |         "hidden_size": 128,
23 |         "graph_activation_function": "elu",
24 |         "message_aggregation_function": "sum",
25 |         "use_target_state_as_input": false,
26 |         "graph_num_edge_MLP_hidden_layers": 1,
27 |         "graph_num_aggr_MLP_hidden_layers": null
28 |     }
29 | }


--------------------------------------------------------------------------------
/tasks/default_hypers/VarMisuse_GGNN.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "task_params": {
 3 |         "add_self_loop_edges": false
 4 |     },
 5 |     "model_params": {
 6 |         "max_nodes_in_batch": 100000,
 7 |         "graph_num_layers": 6,
 8 |         "graph_num_timesteps_per_layer": 1,
 9 |         "graph_layer_input_dropout_keep_prob": 0.8,
10 |         "graph_message_weights_dropout_ratio": 0.0,
11 |         "graph_dense_between_every_num_gnn_layers": 10000,
12 |         "graph_model_activation_function": "tanh",
13 |         "graph_residual_connection_every_num_layers": 10000,
14 |         "graph_inter_layer_norm": false,
15 |         "max_epochs": 10000,
16 |         "patience": 5,
17 |         "optimizer": "Adam",
18 |         "learning_rate": 0.00015,
19 |         "learning_rate_decay": 0.98,
20 |         "momentum": 0.85,
21 |         "clamp_gradient_norm": 1.0,
22 |         "random_seed": 0,
23 |         "hidden_size": 96,
24 |         "graph_rnn_cell": "GRU",
25 |         "graph_activation_function": "tanh",
26 |         "message_aggregation_function": "sum"
27 |     }
28 | }


--------------------------------------------------------------------------------
/tasks/default_hypers/VarMisuse_GNN-Edge-MLP0.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "task_params": {
 3 |         "add_self_loop_edges": false
 4 |     },
 5 |     "model_params": {
 6 |         "max_nodes_in_batch": 50000,
 7 |         "graph_num_layers": 8,
 8 |         "graph_num_timesteps_per_layer": 1,
 9 |         "graph_layer_input_dropout_keep_prob": 0.9,
10 |         "graph_dense_between_every_num_gnn_layers": 1,
11 |         "graph_model_activation_function": "tanh",
12 |         "graph_residual_connection_every_num_layers": 2,
13 |         "graph_inter_layer_norm": true,
14 |         "max_epochs": 10000,
15 |         "patience": 5,
16 |         "optimizer": "Adam",
17 |         "learning_rate": 0.00015,
18 |         "learning_rate_decay": 0.98,
19 |         "lr_for_num_graphs_per_batch": 30,
20 |         "momentum": 0.85,
21 |         "clamp_gradient_norm": 1.0,
22 |         "random_seed": 0,
23 |         "hidden_size": 128,
24 |         "graph_activation_function": "gelu",
25 |         "message_aggregation_function": "sum",
26 |         "graph_message_weights_dropout_ratio": 0.0,
27 |         "use_target_state_as_input": true,
28 |         "num_edge_hidden_layers": 0
29 |     }
30 | }


--------------------------------------------------------------------------------
/tasks/default_hypers/VarMisuse_GNN-Edge-MLP1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "task_params": {
 3 |         "add_self_loop_edges": false
 4 |     },
 5 |     "model_params": {
 6 |         "max_nodes_in_batch": 45000,
 7 |         "graph_num_layers": 10,
 8 |         "graph_num_timesteps_per_layer": 1,
 9 |         "graph_layer_input_dropout_keep_prob": 0.9,
10 |         "graph_dense_between_every_num_gnn_layers": 1,
11 |         "graph_model_activation_function": "tanh",
12 |         "graph_residual_connection_every_num_layers": 2,
13 |         "graph_inter_layer_norm": true,
14 |         "max_epochs": 10000,
15 |         "patience": 5,
16 |         "optimizer": "Adam",
17 |         "learning_rate": 0.00015,
18 |         "learning_rate_decay": 0.98,
19 |         "lr_for_num_graphs_per_batch": 30,
20 |         "momentum": 0.85,
21 |         "clamp_gradient_norm": 1.0,
22 |         "random_seed": 0,
23 |         "hidden_size": 128,
24 |         "graph_activation_function": "gelu",
25 |         "message_aggregation_function": "sum",
26 |         "graph_message_weights_dropout_ratio": 0.0,
27 |         "use_target_state_as_input": true,
28 |         "num_edge_hidden_layers": 1
29 |     }
30 | }


--------------------------------------------------------------------------------
/tasks/default_hypers/VarMisuse_GNN-FiLM.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "task_params": {
 3 |         "add_self_loop_edges": true
 4 |     },
 5 |     "model_params": {
 6 |         "max_nodes_in_batch": 60000,
 7 |         "graph_num_layers": 10,
 8 |         "graph_num_timesteps_per_layer": 1,
 9 |         "graph_layer_input_dropout_keep_prob": 0.8,
10 |         "graph_message_weights_dropout_ratio": 0.0,
11 |         "graph_dense_between_every_num_gnn_layers": 1,
12 |         "graph_model_activation_function": "tanh",
13 |         "graph_residual_connection_every_num_layers": 2,
14 |         "graph_inter_layer_norm": false,
15 |         "max_epochs": 10000,
16 |         "patience": 5,
17 |         "optimizer": "Adam",
18 |         "learning_rate": 0.00015,
19 |         "learning_rate_decay": 0.98,
20 |         "momentum": 0.85,
21 |         "clamp_gradient_norm": 1.0,
22 |         "random_seed": 0,
23 |         "hidden_size": 128,
24 |         "graph_activation_function": "ReLU",
25 |         "message_aggregation_function": "sum",
26 |         "normalize_messages_by_num_incoming": false
27 |     }
28 | }
29 | 


--------------------------------------------------------------------------------
/tasks/default_hypers/VarMisuse_RGAT.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "task_params": {
 3 |         "add_self_loop_edges": true
 4 |     },
 5 |     "model_params": {
 6 |         "max_nodes_in_batch": 50000,
 7 |         "graph_num_layers": 8,
 8 |         "graph_num_timesteps_per_layer": 1,
 9 |         "graph_layer_input_dropout_keep_prob": 0.9,
10 |         "graph_dense_between_every_num_gnn_layers": 10000,
11 |         "graph_model_activation_function": "tanh",
12 |         "graph_residual_connection_every_num_layers": 10000,
13 |         "graph_inter_layer_norm": false,
14 |         "max_epochs": 10000,
15 |         "patience": 5,
16 |         "optimizer": "Adam",
17 |         "learning_rate": 0.00015,
18 |         "learning_rate_decay": 0.98,
19 |         "lr_for_num_graphs_per_batch": 30,
20 |         "momentum": 0.85,
21 |         "clamp_gradient_norm": 1.0,
22 |         "random_seed": 0,
23 |         "hidden_size": 96,
24 |         "num_heads": 8,
25 |         "graph_activation_function": "tanh"
26 |     }
27 | }


--------------------------------------------------------------------------------
/tasks/default_hypers/VarMisuse_RGCN.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "task_params": {
 3 |         "add_self_loop_edges": true
 4 |     },
 5 |     "model_params": {
 6 |         "max_nodes_in_batch": 75000,
 7 |         "graph_num_layers": 10,
 8 |         "graph_num_timesteps_per_layer": 1,
 9 |         "graph_layer_input_dropout_keep_prob": 0.9,
10 |         "graph_dense_between_every_num_gnn_layers": 10000,
11 |         "graph_model_activation_function": "tanh",
12 |         "graph_residual_connection_every_num_layers": 10000,
13 |         "graph_inter_layer_norm": false,
14 |         "max_epochs": 10000,
15 |         "patience": 5,
16 |         "optimizer": "Adam",
17 |         "learning_rate": 0.00015,
18 |         "learning_rate_decay": 0.98,
19 |         "momentum": 0.85,
20 |         "clamp_gradient_norm": 1.0,
21 |         "random_seed": 0,
22 |         "hidden_size": 128,
23 |         "graph_activation_function": "ReLU",
24 |         "message_aggregation_function": "sum"
25 |     }
26 | }


--------------------------------------------------------------------------------
/tasks/default_hypers/VarMisuse_RGIN.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "task_params": {
 3 |         "add_self_loop_edges": true
 4 |     },
 5 |     "model_params": {
 6 |         "max_nodes_in_batch": 50000,
 7 |         "graph_num_layers": 6,
 8 |         "graph_num_timesteps_per_layer": 1,
 9 |         "graph_layer_input_dropout_keep_prob": 0.8,
10 |         "graph_dense_between_every_num_gnn_layers": 1,
11 |         "graph_model_activation_function": "tanh",
12 |         "graph_residual_connection_every_num_layers": 2,
13 |         "graph_inter_layer_norm": true,
14 |         "max_epochs": 10000,
15 |         "patience": 5,
16 |         "optimizer": "Adam",
17 |         "learning_rate": 0.00015,
18 |         "learning_rate_decay": 0.98,
19 |         "lr_for_num_graphs_per_batch": 30,
20 |         "momentum": 0.85,
21 |         "clamp_gradient_norm": 1.0,
22 |         "hidden_size": 128,
23 |         "graph_activation_function": "ReLU",
24 |         "message_aggregation_function": "sum",
25 |         "use_target_state_as_input": false,
26 |         "graph_num_edge_MLP_hidden_layers": 1,
27 |         "graph_num_aggr_MLP_hidden_layers": null
28 |     }
29 | }


--------------------------------------------------------------------------------
/tasks/ppi_task.py:
--------------------------------------------------------------------------------
  1 | from collections import namedtuple
  2 | from typing import Any, Dict, Iterator, List, Iterable
  3 | 
  4 | import tensorflow as tf
  5 | import numpy as np
  6 | from dpu_utils.utils import RichPath
  7 | 
  8 | from .sparse_graph_task import Sparse_Graph_Task, DataFold, MinibatchData
  9 | from utils import micro_f1
 10 | 
 11 | 
 12 | GraphSample = namedtuple('GraphSample', ['adjacency_lists',
 13 |                                          'type_to_node_to_num_incoming_edges',
 14 |                                          'node_features',
 15 |                                          'node_labels',
 16 |                                          ])
 17 | 
 18 | 
 19 | class PPI_Task(Sparse_Graph_Task):
 20 |     @classmethod
 21 |     def default_params(cls):
 22 |         params = super().default_params()
 23 |         params.update({
 24 |             'add_self_loop_edges': True,
 25 |             'tie_fwd_bkwd_edges': False,
 26 |             'out_layer_dropout_keep_prob': 1.0,
 27 |         })
 28 |         return params
 29 | 
 30 |     @staticmethod
 31 |     def name() -> str:
 32 |         return "PPI"
 33 | 
 34 |     @staticmethod
 35 |     def default_data_path() -> str:
 36 |         return "data/ppi"
 37 | 
 38 |     def __init__(self, params: Dict[str, Any]):
 39 |         super().__init__(params)
 40 | 
 41 |         # Things that will be filled once we load data:
 42 |         self.__num_edge_types = 0
 43 |         self.__initial_node_feature_size = 0
 44 |         self.__num_labels = 0
 45 | 
 46 |     def get_metadata(self) -> Dict[str, Any]:
 47 |         metadata = super().get_metadata()
 48 |         metadata['num_edge_types'] = self.__num_edge_types
 49 |         metadata['initial_node_feature_size'] = self.__initial_node_feature_size
 50 |         metadata['num_labels'] = self.__num_labels
 51 |         return metadata
 52 | 
 53 |     def restore_from_metadata(self, metadata: Dict[str, Any]) -> None:
 54 |         super().restore_from_metadata(metadata)
 55 |         self.__num_edge_types = metadata['num_edge_types']
 56 |         self.__initial_node_feature_size = metadata['initial_node_feature_size']
 57 |         self.__num_labels = metadata['num_labels']
 58 | 
 59 |     @property
 60 |     def num_edge_types(self) -> int:
 61 |         return self.__num_edge_types
 62 | 
 63 |     @property
 64 |     def initial_node_feature_size(self) -> int:
 65 |         return self.__initial_node_feature_size
 66 | 
 67 |     # -------------------- Data Loading --------------------
 68 |     def load_data(self, path: RichPath) -> None:
 69 |         # Data in format as downloaded from https://s3.us-east-2.amazonaws.com/dgl.ai/dataset/ppi.zip
 70 |         self._loaded_data[DataFold.TRAIN] = self.__load_data(path, DataFold.TRAIN)
 71 |         self._loaded_data[DataFold.VALIDATION] = self.__load_data(path, DataFold.VALIDATION)
 72 | 
 73 |     def load_eval_data_from_path(self, path: RichPath) -> Iterable[Any]:
 74 |         return self.__load_data(path, DataFold.TEST)
 75 | 
 76 |     def __load_data(self, data_dir: RichPath, data_fold: DataFold) -> List[GraphSample]:
 77 |         if data_fold == DataFold.TRAIN:
 78 |             data_name = "train"
 79 |         elif data_fold == DataFold.VALIDATION:
 80 |             data_name = "valid"
 81 |         elif data_fold == DataFold.TEST:
 82 |             data_name = "test"
 83 |         else:
 84 |             raise ValueError("Unknown data fold '%s'" % str(data_fold))
 85 |         print(" Loading PPI %s data from %s." % (data_name, data_dir))
 86 | 
 87 |         graph_json_data = data_dir.join("%s_graph.json" % data_name).read_by_file_suffix()
 88 |         node_to_features = data_dir.join("%s_feats.npy" % data_name).read_by_file_suffix()
 89 |         node_to_labels = data_dir.join("%s_labels.npy" % data_name).read_by_file_suffix()
 90 |         node_to_graph_id = data_dir.join("%s_graph_id.npy" % data_name).read_by_file_suffix()
 91 |         self.__initial_node_feature_size = node_to_features.shape[-1]
 92 |         self.__num_labels = node_to_labels.shape[-1]
 93 | 
 94 |         # We read in all the data in two steps:
 95 |         #  (1) Read features, labels and insert self-loop edges (edge type 0).
 96 |         #      Implicitly, this gives us the number of nodes per graph.
 97 |         #  (2) Read all edges, and shift them so that each graph starts with node 0.
 98 | 
 99 |         fwd_edge_type = 0
100 |         self.__num_edge_types = 1
101 |         if self.params['add_self_loop_edges']:
102 |             self_loop_edge_type = self.__num_edge_types
103 |             self.__num_edge_types += 1
104 |         if not self.params['tie_fwd_bkwd_edges']:
105 |             bkwd_edge_type = self.__num_edge_types
106 |             self.__num_edge_types += 1
107 | 
108 |         graph_id_to_graph_data = {}  # type: Dict[int, GraphSample]
109 |         graph_id_to_node_offset = {}
110 |         num_total_nodes = node_to_features.shape[0]
111 |         for node_id in range(num_total_nodes):
112 |             graph_id = node_to_graph_id[node_id]
113 |             # In case we are entering a new graph, note its ID, so that we can normalise everything to start at 0
114 |             if graph_id not in graph_id_to_graph_data:
115 |                 graph_id_to_graph_data[graph_id] = \
116 |                     GraphSample(adjacency_lists=[[] for _ in range(self.__num_edge_types)],
117 |                                 type_to_node_to_num_incoming_edges=[[] for _ in range(self.__num_edge_types)],
118 |                                 node_features=[],
119 |                                 node_labels=[])
120 |                 graph_id_to_node_offset[graph_id] = node_id
121 |             cur_graph_data = graph_id_to_graph_data[graph_id]
122 |             cur_graph_data.node_features.append(node_to_features[node_id])
123 |             cur_graph_data.node_labels.append(node_to_labels[node_id])
124 |             shifted_node_id = node_id - graph_id_to_node_offset[graph_id]
125 |             if self.params['add_self_loop_edges']:
126 |                 cur_graph_data.adjacency_lists[self_loop_edge_type].append((shifted_node_id, shifted_node_id))
127 |                 cur_graph_data.type_to_node_to_num_incoming_edges[self_loop_edge_type].append(1)
128 | 
129 |         # Prepare reading of the edges by setting counters to 0:
130 |         for graph_data in graph_id_to_graph_data.values():
131 |             num_graph_nodes = len(graph_data.node_features)
132 |             graph_data.type_to_node_to_num_incoming_edges[fwd_edge_type] = np.zeros([num_graph_nodes], np.int32)
133 |             if not self.params['tie_fwd_bkwd_edges']:
134 |                 graph_data.type_to_node_to_num_incoming_edges[bkwd_edge_type] = np.zeros([num_graph_nodes], np.int32)
135 | 
136 |         for edge_info in graph_json_data['links']:
137 |             src_node, tgt_node = edge_info['source'], edge_info['target']
138 |             # First, shift node IDs so that each graph starts at node 0:
139 |             graph_id = node_to_graph_id[src_node]
140 |             graph_node_offset = graph_id_to_node_offset[graph_id]
141 |             src_node, tgt_node = src_node - graph_node_offset, tgt_node - graph_node_offset
142 | 
143 |             cur_graph_data = graph_id_to_graph_data[graph_id]
144 |             cur_graph_data.adjacency_lists[fwd_edge_type].append((src_node, tgt_node))
145 |             cur_graph_data.type_to_node_to_num_incoming_edges[fwd_edge_type][tgt_node] += 1
146 |             if not self.params['tie_fwd_bkwd_edges']:
147 |                 cur_graph_data.adjacency_lists[bkwd_edge_type].append((tgt_node, src_node))
148 |                 cur_graph_data.type_to_node_to_num_incoming_edges[bkwd_edge_type][src_node] += 1
149 | 
150 |         final_graphs = []
151 |         for graph_data in graph_id_to_graph_data.values():
152 |             # numpy-ize:
153 |             adj_lists = []
154 |             for edge_type_idx in range(self.__num_edge_types):
155 |                 adj_lists.append(np.array(graph_data.adjacency_lists[edge_type_idx]))
156 |             final_graphs.append(
157 |                 GraphSample(adjacency_lists=adj_lists,
158 |                             type_to_node_to_num_incoming_edges=np.array(graph_data.type_to_node_to_num_incoming_edges),
159 |                             node_features=np.array(graph_data.node_features),
160 |                             node_labels=np.array(graph_data.node_labels)))
161 | 
162 |         return final_graphs
163 | 
164 |     # -------------------- Model Construction --------------------
165 |     def make_task_output_model(self,
166 |                                placeholders: Dict[str, tf.Tensor],
167 |                                model_ops: Dict[str, tf.Tensor],
168 |                                ) -> None:
169 |         placeholders['graph_nodes_list'] = \
170 |             tf.placeholder(dtype=tf.int32, shape=[None], name='graph_nodes_list')
171 |         placeholders['target_labels'] = \
172 |             tf.placeholder(dtype=tf.float32, shape=[None, self.__num_labels], name='target_labels')
173 |         placeholders['out_layer_dropout_keep_prob'] = \
174 |             tf.placeholder(dtype=tf.float32, shape=[], name='out_layer_dropout_keep_prob')
175 | 
176 |         per_node_logits = \
177 |             tf.keras.layers.Dense(units=self.__num_labels,
178 |                                   use_bias=True,
179 |                                   )(model_ops['final_node_representations'])
180 | 
181 |         losses = tf.nn.sigmoid_cross_entropy_with_logits(logits=per_node_logits,
182 |                                                          labels=placeholders['target_labels'])
183 |         total_loss = tf.reduce_sum(losses)
184 | 
185 |         # Compute loss as average per node (to account for changing number of nodes per batch):
186 |         num_nodes_in_batch = tf.shape(placeholders['target_labels'])[0]
187 | 
188 |         f1_score = micro_f1(per_node_logits, placeholders['target_labels'])
189 |         tf.summary.scalar("Micro F1", f1_score)
190 |         model_ops['task_metrics'] = {
191 |             'loss': total_loss / tf.cast(num_nodes_in_batch, tf.float32),
192 |             'total_loss': total_loss,
193 |             'f1_score': f1_score,
194 |         }
195 | 
196 |     # -------------------- Minibatching and training loop --------------------
197 |     def make_minibatch_iterator(self,
198 |                                 data: Iterable[Any],
199 |                                 data_fold: DataFold,
200 |                                 model_placeholders: Dict[str, tf.Tensor],
201 |                                 max_nodes_per_batch: int) \
202 |             -> Iterator[MinibatchData]:
203 |         if data_fold == DataFold.TRAIN:
204 |             np.random.shuffle(data)
205 |             out_layer_dropout_keep_prob = self.params['out_layer_dropout_keep_prob']
206 |         else:
207 |             out_layer_dropout_keep_prob = 1.0
208 | 
209 |         # Pack until we cannot fit more graphs in the batch
210 |         num_graphs = 0
211 |         while num_graphs < len(data):
212 |             num_graphs_in_batch = 0
213 |             batch_node_features = []  # type: List[np.ndarray]
214 |             batch_node_labels = []
215 |             batch_adjacency_lists = [[] for _ in range(self.num_edge_types)]  # type: List[List[np.ndarray]]
216 |             batch_type_to_num_incoming_edges = []
217 |             batch_graph_nodes_list = []
218 |             node_offset = 0
219 | 
220 |             while num_graphs < len(data) and node_offset + len(data[num_graphs].node_features) < max_nodes_per_batch:
221 |                 cur_graph = data[num_graphs]
222 |                 num_nodes_in_graph = len(data[num_graphs].node_features)
223 |                 batch_node_features.extend(cur_graph.node_features)
224 |                 batch_graph_nodes_list.append(np.full(shape=[num_nodes_in_graph],
225 |                                                       fill_value=num_graphs_in_batch,
226 |                                                       dtype=np.int32))
227 |                 for i in range(self.num_edge_types):
228 |                     batch_adjacency_lists[i].append(cur_graph.adjacency_lists[i] + node_offset)
229 |                 batch_type_to_num_incoming_edges.append(cur_graph.type_to_node_to_num_incoming_edges)
230 |                 batch_node_labels.append(cur_graph.node_labels)
231 |                 num_graphs += 1
232 |                 num_graphs_in_batch += 1
233 |                 node_offset += num_nodes_in_graph
234 | 
235 |             batch_feed_dict = {
236 |                 model_placeholders['initial_node_features']: np.array(batch_node_features),
237 |                 model_placeholders['type_to_num_incoming_edges']: np.concatenate(batch_type_to_num_incoming_edges, axis=1),
238 |                 model_placeholders['graph_nodes_list']: np.concatenate(batch_graph_nodes_list),
239 |                 model_placeholders['target_labels']: np.concatenate(batch_node_labels, axis=0),
240 |                 model_placeholders['out_layer_dropout_keep_prob']: out_layer_dropout_keep_prob,
241 |             }
242 | 
243 |             # Merge adjacency lists:
244 |             num_edges = 0
245 |             for i in range(self.num_edge_types):
246 |                 if len(batch_adjacency_lists[i]) > 0:
247 |                     adj_list = np.concatenate(batch_adjacency_lists[i])
248 |                 else:
249 |                     adj_list = np.zeros((0, 2), dtype=np.int32)
250 |                 num_edges += adj_list.shape[0]
251 |                 batch_feed_dict[model_placeholders['adjacency_lists'][i]] = adj_list
252 | 
253 |             yield MinibatchData(feed_dict=batch_feed_dict,
254 |                                 num_graphs=num_graphs_in_batch,
255 |                                 num_nodes=node_offset,
256 |                                 num_edges=num_edges)
257 | 
258 |     def early_stopping_metric(self, task_metric_results: List[Dict[str, np.ndarray]], num_graphs: int) -> float:
259 |         # Early stopping based on average loss:
260 |         return np.sum([m['total_loss'] for m in task_metric_results]) / num_graphs
261 | 
262 |     def pretty_print_epoch_task_metrics(self, task_metric_results: List[Dict[str, np.ndarray]], num_graphs: int) -> str:
263 |         avg_microf1 = np.average([m['f1_score'] for m in task_metric_results])
264 |         return "Avg MicroF1: %.3f" % (avg_microf1,)
265 | 


--------------------------------------------------------------------------------
/tasks/qm9_task.py:
--------------------------------------------------------------------------------
  1 | from collections import namedtuple
  2 | from typing import Any, Dict, Tuple, List, Iterable
  3 | 
  4 | import tensorflow as tf
  5 | import numpy as np
  6 | from dpu_utils.utils import RichPath
  7 | 
  8 | from .sparse_graph_task import Sparse_Graph_Task, DataFold, MinibatchData
  9 | from utils import MLP
 10 | 
 11 | 
 12 | GraphSample = namedtuple('GraphSample', ['adjacency_lists',
 13 |                                          'type_to_node_to_num_incoming_edges',
 14 |                                          'node_features',
 15 |                                          'target_values',
 16 |                                          ])
 17 | 
 18 | 
 19 | class QM9_Task(Sparse_Graph_Task):
 20 |     # These magic constants were obtained during dataset generation, as result of normalising
 21 |     # the values of target properties:
 22 |     CHEMICAL_ACC_NORMALISING_FACTORS = [0.066513725, 0.012235489, 0.071939046,
 23 |                                         0.033730778, 0.033486113, 0.004278493,
 24 |                                         0.001330901, 0.004165489, 0.004128926,
 25 |                                         0.00409976, 0.004527465, 0.012292586,
 26 |                                         0.037467458]
 27 | 
 28 |     @classmethod
 29 |     def default_params(cls):
 30 |         params = super().default_params()
 31 |         params.update({
 32 |             'task_ids': [0],
 33 | 
 34 |             'add_self_loop_edges': True,
 35 |             'tie_fwd_bkwd_edges': True,
 36 |             'use_graph': True,
 37 |             'activation_function': "tanh",
 38 |             'out_layer_dropout_keep_prob': 1.0,
 39 |         })
 40 |         return params
 41 | 
 42 |     @staticmethod
 43 |     def name() -> str:
 44 |         return "QM9"
 45 | 
 46 |     @staticmethod
 47 |     def default_data_path() -> str:
 48 |         return "data/qm9"
 49 | 
 50 |     def __init__(self, params: Dict[str, Any]):
 51 |         super().__init__(params)
 52 | 
 53 |         # Things that will be filled once we load data:
 54 |         self.__num_edge_types = 0
 55 |         self.__annotation_size = 0
 56 | 
 57 |     def get_metadata(self) -> Dict[str, Any]:
 58 |         metadata = super().get_metadata()
 59 |         metadata['num_edge_types'] = self.__num_edge_types
 60 |         metadata['annotation_size'] = self.__annotation_size
 61 |         return metadata
 62 | 
 63 |     def restore_from_metadata(self, metadata: Dict[str, Any]) -> None:
 64 |         super().restore_from_metadata(metadata)
 65 |         self.__num_edge_types = metadata['num_edge_types']
 66 |         self.__annotation_size = metadata['annotation_size']
 67 | 
 68 |     @property
 69 |     def num_edge_types(self) -> int:
 70 |         return self.__num_edge_types
 71 | 
 72 |     @property
 73 |     def initial_node_feature_size(self) -> int:
 74 |         return self.__annotation_size
 75 | 
 76 |     # -------------------- Data Loading --------------------
 77 |     def load_data(self, path: RichPath) -> None:
 78 |         self._loaded_data[DataFold.TRAIN] = self.__load_data(path.join("train.jsonl.gz"))
 79 |         self._loaded_data[DataFold.VALIDATION] = self.__load_data(path.join("valid.jsonl.gz"))
 80 | 
 81 |     def load_eval_data_from_path(self, path: RichPath) -> Iterable[Any]:
 82 |         if path.path == self.default_data_path():
 83 |             path = path.join("test.jsonl.gz")
 84 |         return self.__load_data(path)
 85 | 
 86 |     def __load_data(self, data_file: RichPath) -> List[GraphSample]:
 87 |         print(" Loading QM9 data from %s." % (data_file,))
 88 |         data = list(data_file.read_by_file_suffix())  # list() needed for .jsonl case, where .read*() is just a generator
 89 | 
 90 |         # Get some common data out:
 91 |         num_fwd_edge_types = 0
 92 |         for g in data:
 93 |             num_fwd_edge_types = max(num_fwd_edge_types, max([e[1] for e in g['graph']]))
 94 |         if self.params['add_self_loop_edges']:
 95 |             num_fwd_edge_types += 1
 96 |         self.__num_edge_types = max(self.num_edge_types,
 97 |                                     num_fwd_edge_types * (1 if self.params['tie_fwd_bkwd_edges'] else 2))
 98 |         self.__annotation_size = max(self.__annotation_size, len(data[0]["node_features"][0]))
 99 |         return self.__process_raw_graphs(data)
100 | 
101 |     def __process_raw_graphs(self, raw_data: Iterable[Any]) -> List[GraphSample]:
102 |         processed_graphs = []
103 |         for d in raw_data:
104 |             (type_to_adjacency_list, type_to_num_incoming_edges) = \
105 |                 self.__graph_to_adjacency_lists(d['graph'], num_nodes=len(d["node_features"]))
106 |             processed_graphs.append(
107 |                 GraphSample(adjacency_lists=type_to_adjacency_list,
108 |                             type_to_node_to_num_incoming_edges=type_to_num_incoming_edges,
109 |                             node_features=d["node_features"],
110 |                             target_values=[d["targets"][task_id][0] for task_id in self.params['task_ids']],
111 |                             ))
112 |         return processed_graphs
113 | 
114 |     def __graph_to_adjacency_lists(self, graph: Iterable[Tuple[int, int, int]], num_nodes: int) \
115 |             -> Tuple[List[np.ndarray], np.ndarray]:
116 |         type_to_adj_list = [[] for _ in range(self.num_edge_types)]  # type: List[List[Tuple[int, int]]]
117 |         type_to_num_incoming_edges = np.zeros(shape=(self.num_edge_types, num_nodes,))
118 |         for src, e, dest in graph:
119 |             if self.params['add_self_loop_edges']:
120 |                 fwd_edge_type = e  # 0 will be the self-loop type
121 |             else:
122 |                 fwd_edge_type = e - 1  # Make edges start from 0
123 |             type_to_adj_list[fwd_edge_type].append((src, dest))
124 |             type_to_num_incoming_edges[fwd_edge_type, dest] += 1
125 |             if self.params['tie_fwd_bkwd_edges']:
126 |                 type_to_adj_list[fwd_edge_type].append((dest, src))
127 |                 type_to_num_incoming_edges[fwd_edge_type, src] += 1
128 | 
129 |         if self.params['add_self_loop_edges']:
130 |             # Add self-loop edges (idx 0, which isn't used in the data):
131 |             for node in range(num_nodes):
132 |                 type_to_num_incoming_edges[0, node] = 1
133 |                 type_to_adj_list[0].append((node, node))
134 | 
135 |         type_to_adj_list = [np.array(sorted(adj_list), dtype=np.int32) if len(adj_list) > 0 else np.zeros(shape=(0, 2), dtype=np.int32)
136 |                             for adj_list in type_to_adj_list]
137 | 
138 |         # Add backward edges as an additional edge type that goes backwards:
139 |         if not (self.params['tie_fwd_bkwd_edges']):
140 |             type_to_adj_list = type_to_adj_list[:self.num_edge_types // 2]  # We allocated too much earlier...
141 |             for (edge_type, adj_list) in enumerate(type_to_adj_list):
142 |                 bwd_edge_type = self.num_edge_types // 2 + edge_type
143 |                 type_to_adj_list.append(np.array(sorted((y, x) for (x, y) in adj_list), dtype=np.int32))
144 |                 for (x, y) in adj_list:
145 |                     type_to_num_incoming_edges[bwd_edge_type][y] += 1
146 | 
147 |         return type_to_adj_list, type_to_num_incoming_edges
148 | 
149 |     # -------------------- Model Construction --------------------
150 |     def make_task_output_model(self,
151 |                                placeholders: Dict[str, tf.Tensor],
152 |                                model_ops: Dict[str, tf.Tensor],
153 |                                ) -> None:
154 |         placeholders['graph_nodes_list'] = \
155 |             tf.placeholder(dtype=tf.int32, shape=[None], name='graph_nodes_list')
156 |         placeholders['target_values'] = \
157 |             tf.placeholder(dtype=tf.float32, shape=[len(self.params['task_ids']), None], name='target_values')
158 |         placeholders['out_layer_dropout_keep_prob'] = \
159 |             tf.placeholder(dtype=tf.float32, shape=[], name='out_layer_dropout_keep_prob')
160 | 
161 |         task_metrics = {}
162 |         losses = []
163 |         for (internal_id, task_id) in enumerate(self.params['task_ids']):
164 |             with tf.variable_scope("out_layer_task%i" % task_id):
165 |                 regression_gate = \
166 |                     MLP(out_size=1,
167 |                         hidden_layers=[],
168 |                         use_biases=True,
169 |                         dropout_rate=1.0 - placeholders['out_layer_dropout_keep_prob'],
170 |                         name="regression_gate")
171 |                 regression_transform = \
172 |                     MLP(out_size=1,
173 |                         hidden_layers=[],
174 |                         use_biases=True,
175 |                         dropout_rate=1.0 - placeholders['out_layer_dropout_keep_prob'],
176 |                         name="regression")
177 | 
178 |                 per_node_outputs = regression_transform(model_ops['final_node_representations'])
179 |                 gate_input = tf.concat([model_ops['final_node_representations'],
180 |                                         model_ops['initial_node_features']],
181 |                                        axis=-1)
182 |                 per_node_gated_outputs = tf.nn.sigmoid(regression_gate(gate_input)) * per_node_outputs
183 | 
184 |                 # Sum up all nodes per-graph
185 |                 per_graph_outputs = tf.unsorted_segment_sum(data=per_node_gated_outputs,
186 |                                                             segment_ids=placeholders['graph_nodes_list'],
187 |                                                             num_segments=placeholders['num_graphs'])
188 |                 per_graph_outputs = tf.squeeze(per_graph_outputs)  # [g]
189 | 
190 |                 per_graph_errors = per_graph_outputs - placeholders['target_values'][internal_id, :]
191 |                 task_metrics['abs_err_task%i' % task_id] = tf.reduce_sum(tf.abs(per_graph_errors))
192 |                 tf.summary.scalar('mae_task%i' % task_id,
193 |                                   task_metrics['abs_err_task%i' % task_id] / tf.cast(placeholders['num_graphs'], tf.float32))
194 |                 losses.append(tf.reduce_mean(0.5 * tf.square(per_graph_errors)))
195 |         model_ops['task_metrics'] = task_metrics
196 |         model_ops['task_metrics']['loss'] = tf.reduce_sum(losses)
197 |         model_ops['task_metrics']['total_loss'] = model_ops['task_metrics']['loss'] * tf.cast(placeholders['num_graphs'], tf.float32)
198 | 
199 |     # -------------------- Minibatching and training loop --------------------
200 |     def make_minibatch_iterator(self,
201 |                                 data: Iterable[Any],
202 |                                 data_fold: DataFold,
203 |                                 model_placeholders: Dict[str, tf.Tensor],
204 |                                 max_nodes_per_batch: int) \
205 |             -> Iterable[MinibatchData]:
206 |         if data_fold == DataFold.TRAIN:
207 |             np.random.shuffle(data)
208 |             out_layer_dropout_keep_prob = self.params['out_layer_dropout_keep_prob']
209 |         else:
210 |             out_layer_dropout_keep_prob = 1.0
211 | 
212 |         # Pack until we cannot fit more graphs in the batch
213 |         num_graphs = 0
214 |         while num_graphs < len(data):
215 |             num_graphs_in_batch = 0
216 |             batch_node_features = []  # type: List[np.ndarray]
217 |             batch_target_task_values = []
218 |             batch_adjacency_lists = [[] for _ in range(self.num_edge_types)]  # type: List[List[np.ndarray]]
219 |             batch_type_to_num_incoming_edges = []
220 |             batch_graph_nodes_list = []
221 |             node_offset = 0
222 | 
223 |             while num_graphs < len(data) and node_offset + len(data[num_graphs].node_features) < max_nodes_per_batch:
224 |                 cur_graph = data[num_graphs]
225 |                 num_nodes_in_graph = len(cur_graph.node_features)
226 |                 batch_node_features.extend(cur_graph.node_features)
227 |                 batch_graph_nodes_list.append(np.full(shape=[num_nodes_in_graph],
228 |                                                       fill_value=num_graphs_in_batch,
229 |                                                       dtype=np.int32))
230 |                 for i in range(self.num_edge_types):
231 |                     batch_adjacency_lists[i].append(cur_graph.adjacency_lists[i] + node_offset)
232 | 
233 |                 # Turn counters for incoming edges into np array:
234 |                 batch_type_to_num_incoming_edges.append(cur_graph.type_to_node_to_num_incoming_edges)
235 |                 batch_target_task_values.append(cur_graph.target_values)
236 |                 num_graphs += 1
237 |                 num_graphs_in_batch += 1
238 |                 node_offset += num_nodes_in_graph
239 | 
240 |             batch_feed_dict = {
241 |                 model_placeholders['initial_node_features']: np.array(batch_node_features),
242 |                 model_placeholders['type_to_num_incoming_edges']: np.concatenate(batch_type_to_num_incoming_edges, axis=1),
243 |                 model_placeholders['graph_nodes_list']: np.concatenate(batch_graph_nodes_list),
244 |                 model_placeholders['target_values']: np.transpose(batch_target_task_values, axes=[1, 0]),
245 |                 model_placeholders['out_layer_dropout_keep_prob']: out_layer_dropout_keep_prob,
246 |             }
247 | 
248 |             # Merge adjacency lists:
249 |             num_edges = 0
250 |             for i in range(self.num_edge_types):
251 |                 if len(batch_adjacency_lists[i]) > 0:
252 |                     adj_list = np.concatenate(batch_adjacency_lists[i])
253 |                 else:
254 |                     adj_list = np.zeros((0, 2), dtype=np.int32)
255 |                 num_edges += adj_list.shape[0]
256 |                 batch_feed_dict[model_placeholders['adjacency_lists'][i]] = adj_list
257 | 
258 |             yield MinibatchData(feed_dict=batch_feed_dict,
259 |                                 num_graphs=num_graphs_in_batch,
260 |                                 num_nodes=node_offset,
261 |                                 num_edges=num_edges)
262 | 
263 |     def early_stopping_metric(self, task_metric_results: List[Dict[str, np.ndarray]], num_graphs: int) -> float:
264 |         # Early stopping based on average loss:
265 |         return np.sum([m['total_loss'] for m in task_metric_results]) / num_graphs
266 | 
267 |     def pretty_print_epoch_task_metrics(self, task_metric_results: List[Dict[str, np.ndarray]], num_graphs: int) -> str:
268 |         maes = {}
269 |         for task_id in self.params['task_ids']:
270 |             maes['mae_task%i' % task_id] = 0.
271 |         fnum_graphs = float(num_graphs)
272 |         for batch_task_metric_results in task_metric_results:
273 |             for task_id in self.params['task_ids']:
274 |                 maes['mae_task%i' % task_id] += batch_task_metric_results['abs_err_task%i' % task_id] / fnum_graphs
275 | 
276 |         maes_str = " ".join("%i:%.5f" % (task_id, maes['mae_task%i' % task_id])
277 |                             for task_id in self.params['task_ids'])
278 |         # The following translates back from MAE on the property values normalised to the [0,1] range to the original scale:
279 |         err_str = " ".join("%i:%.5f" % (task_id, maes['mae_task%i' % task_id] / self.CHEMICAL_ACC_NORMALISING_FACTORS[task_id])
280 |                            for task_id in self.params['task_ids'])
281 | 
282 |         return "MAEs: %s | Error Ratios: %s" % (maes_str, err_str)
283 | 


--------------------------------------------------------------------------------
/tasks/sparse_graph_task.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from enum import Enum
  3 | from typing import Any, Dict, Iterable, List, NamedTuple, Iterator, Optional
  4 | 
  5 | import tensorflow as tf
  6 | import numpy as np
  7 | from dpu_utils.utils import RichPath
  8 | 
  9 | 
 10 | class DataFold(Enum):
 11 |     TRAIN = 0
 12 |     VALIDATION = 1
 13 |     TEST = 2
 14 | 
 15 | 
 16 | class MinibatchData(NamedTuple):
 17 |     feed_dict: Dict[str, tf.Tensor]
 18 |     num_graphs: int
 19 |     num_nodes: int
 20 |     num_edges: int
 21 | 
 22 | 
 23 | class Sparse_Graph_Task(ABC):
 24 |     """
 25 |     Abstract superclass of all graph tasks, defining the interface used by the
 26 |     remainder of the code to interact with a task.
 27 |     """
 28 |     @classmethod
 29 |     def default_params(cls):
 30 |         return {}
 31 | 
 32 |     @staticmethod
 33 |     @abstractmethod
 34 |     def default_data_path() -> str:
 35 |         raise NotImplementedError()
 36 | 
 37 |     @staticmethod
 38 |     @abstractmethod
 39 |     def name() -> str:
 40 |         raise NotImplementedError()
 41 | 
 42 |     def __init__(self, params: Dict[str, Any]):
 43 |         self.params = params
 44 |         self._loaded_data = {}  # type: Dict[DataFold, Any]
 45 | 
 46 |     def get_metadata(self) -> Dict[str, Any]:
 47 |         """
 48 |         Returns:
 49 |             Dictionary with all metadata that defines this task, for example parameters
 50 |             or vocabularies.
 51 |         """
 52 |         return {"params": self.params}
 53 | 
 54 |     def restore_from_metadata(self, metadata: Dict[str, Any]) -> None:
 55 |         """
 56 |         Set up task to match passed metadata, e.g., by using the passed vocabulary.
 57 |         The input can be expected to be an output of get_metadata from another run.
 58 |         """
 59 |         self.params = metadata["params"]
 60 | 
 61 |     @property
 62 |     @abstractmethod
 63 |     def num_edge_types(self) -> int:
 64 |         """
 65 |         Returns:
 66 |             Number of edge types used in the dataset.
 67 |         """
 68 |         raise NotImplementedError()
 69 | 
 70 |     @property
 71 |     @abstractmethod
 72 |     def initial_node_feature_size(self) -> int:
 73 |         """
 74 |         Return:
 75 |             Size of the initial node representation.
 76 |         """
 77 |         raise NotImplementedError()
 78 | 
 79 |     @property
 80 |     def has_test_data(self) -> bool:
 81 |         return DataFold.TEST in self._loaded_data
 82 | 
 83 |     @abstractmethod
 84 |     def load_data(self, path: Optional[RichPath]) -> None:
 85 |         """
 86 |         Load data required to train on this task into memory.
 87 | 
 88 |         Arguments:
 89 |             path: Optional path to load from, if not specified, will use task-specific
 90 |                 default under "./data/".
 91 |         """
 92 |         raise NotImplementedError()
 93 | 
 94 |     def load_eval_data_from_path(self, path: RichPath) -> Iterable[Any]:
 95 |         """
 96 |         Load data from a given path for evaluation purposes.
 97 | 
 98 |         Arguments:
 99 |             path: Depending on the task a file or directory containing data to load.
100 | 
101 |         Returns:
102 |             An iterator over graph samples, suitable for being passed into
103 |             task.make_minibatch_iterator().
104 |         """
105 |         raise NotImplementedError()
106 | 
107 |     def make_task_input_model(self,
108 |                               placeholders: Dict[str, tf.Tensor],
109 |                               model_ops: Dict[str, tf.Tensor],
110 |                               ) -> None:
111 |         """
112 |         Create a task-specific input model. The default implementation
113 |         simply creates placeholders to feed the input in, but more advanced
114 |         variants could include sub-networks determining node features,
115 |         for example.
116 | 
117 |         This method cannot assume the placeholders or model_ops dictionaries
118 |         to be pre-populated, and needs to add at least the following
119 |         entries to model_ops:
120 |          * 'initial_node_features': float32 tensor of shape [V, D], where V
121 |            is the number of nodes and D is the initial hidden dimension
122 |            (needs to match the value of task.initial_node_feature_size).
123 |          * 'adjacency_lists': list of L int32 tensors of shape [E, 2], where
124 |            L is the number of edge types and E the number of edges of that
125 |            type.
126 |            Hence, adjacency_lists[l][e,:] == [u, v] means that u has an edge
127 |            of type l to v.
128 |          * 'type_to_num_incoming_edges': int32 tensor of shape [L, V], where
129 |            L is the number of edge types and V the number of nodes.
130 |            type_to_num_incoming_edges[l, v] = k indicates that node v has k
131 |            incoming edges of type l.
132 | 
133 |         Arguments:
134 |             placeholders: Dictionary of placeholders used by the model, to
135 |                 be extended with task-specific placeholders.
136 |             model_ops: Dictionary of named operations in the model, to
137 |                 be extended with task-specific operations.
138 |         """
139 |         placeholders['initial_node_features'] = \
140 |             tf.placeholder(dtype=tf.float32, shape=[None, self.initial_node_feature_size], name='initial_node_features')
141 |         placeholders['adjacency_lists'] = \
142 |             [tf.placeholder(dtype=tf.int32, shape=[None, 2], name='adjacency_e%s' % e)
143 |                 for e in range(self.num_edge_types)]
144 |         placeholders['type_to_num_incoming_edges'] = \
145 |             tf.placeholder(dtype=tf.float32, shape=[self.num_edge_types, None], name='type_to_num_incoming_edges')
146 | 
147 |         model_ops['initial_node_features'] = placeholders['initial_node_features']
148 |         model_ops['adjacency_lists'] = placeholders['adjacency_lists']
149 |         model_ops['type_to_num_incoming_edges'] = placeholders['type_to_num_incoming_edges']
150 | 
151 |     @abstractmethod
152 |     def make_task_output_model(self,
153 |                                placeholders: Dict[str, tf.Tensor],
154 |                                model_ops: Dict[str, tf.Tensor],
155 |                                ) -> None:
156 |         """
157 |         Create task-specific output model. For this, additional placeholders
158 |         can be created, but will need to be filled in the
159 |         make_minibatch_iterator implementation.
160 | 
161 |         This method may assume existence of the placeholders and ops created in
162 |         make_task_input_model and of the following:
163 |             model_ops['final_node_representations']: a float32 tensor of shape
164 |                 [V, D], which holds the final node representations after the
165 |                 GNN layers.
166 |             placeholders['num_graphs']: a int32 scalar holding the number of
167 |                 graphs in this batch.
168 |         Order of nodes is preserved across all tensors.
169 | 
170 |         This method has to define model_ops['task_metrics'] to a dictionary,
171 |         from which model_ops['task_metrics']['loss'] will be used for
172 |         optimization. Other entries may hold additional metrics (accuracy,
173 |         MAE, ...).
174 | 
175 |         Arguments:
176 |             placeholders: Dictionary of placeholders used by the model,
177 |                 pre-populated by the generic graph model values, and to
178 |                 be extended with task-specific placeholders.
179 |             model_ops: Dictionary of named operations in the model,
180 |                 pre-populated by the generic graph model values, and to
181 |                 be extended with task-specific operations.
182 |         """
183 |         raise NotImplementedError()
184 | 
185 |     @abstractmethod
186 |     def make_minibatch_iterator(self,
187 |                                 data: Iterable[Any],
188 |                                 data_fold: DataFold,
189 |                                 model_placeholders: Dict[str, tf.Tensor],
190 |                                 max_nodes_per_batch: int,
191 |                                 ) -> Iterator[MinibatchData]:
192 |         """
193 |         Create minibatches for a sparse graph model, usually by flattening
194 |         many smaller graphs into one large graphs of disconnected components.
195 |         This should produce one epoch's worth of minibatches.
196 | 
197 |         Arguments:
198 |             data: Data to iterate over, created by either load_data or
199 |                 load_eval_data_from_path.
200 |             data_fold: Fold of the loaded data to iterate over.
201 |             model_placeholders: The placeholders of the model that need to be
202 |                 filled with data. Aside from the placeholders introduced by the
203 |                 task in make_task_input_model and make_task_output_model.
204 |             max_nodes_per_batch: Maximal number of nodes that can be packed
205 |                 into one batch.
206 | 
207 |         Returns:
208 |             Iterator over MinibatchData values, which provide feed dicts
209 |             as well as some batch statistics.
210 |         """
211 |         raise NotImplementedError()
212 | 
213 |     @abstractmethod
214 |     def early_stopping_metric(self,
215 |                               task_metric_results: List[Dict[str, np.ndarray]],
216 |                               num_graphs: int,
217 |                               ) -> float:
218 |         """
219 |         Given the results of the task's metric for all minibatches of an
220 |         epoch, produce a metric that should go down (e.g., loss). This is used
221 |         for early stopping of training.
222 | 
223 |         Arguments:
224 |             task_metric_results: List of the values of model_ops['task_metrics']
225 |                 (defined in make_task_model) for each of the minibatches produced
226 |                 by make_minibatch_iterator.
227 |             num_graphs: Number of graphs processed in this epoch.
228 | 
229 |         Returns:
230 |             Numeric value, where a lower value indicates more desirable results.
231 |         """
232 |         raise NotImplementedError()
233 | 
234 |     @abstractmethod
235 |     def pretty_print_epoch_task_metrics(self,
236 |                                         task_metric_results: List[Dict[str, np.ndarray]],
237 |                                         num_graphs: int,
238 |                                         ) -> str:
239 |         """
240 |         Given the results of the task's metric for all minibatches of an
241 |         epoch, produce a human-readable result for the epoch (e.g., average
242 |         accuracy).
243 | 
244 |         Arguments:
245 |             task_metric_results: List of the values of model_ops['task_metrics']
246 |                 (defined in make_task_model) for each of the minibatches produced
247 |                 by make_minibatch_iterator.
248 |             num_graphs: Number of graphs processed in this epoch.
249 | 
250 |         Returns:
251 |             String representation of the task-specific metrics for this epoch,
252 |             e.g., mean absolute error for a regression task.
253 |         """
254 |         raise NotImplementedError()
255 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | Usage:
 4 |    test.py [options] STORED_MODEL_PATH [DATA_PATH]
 5 | 
 6 | STORED_MODEL is the path of a model snapshot created by train.py.
 7 | DATA_PATH is the location of the data to test on.
 8 | 
 9 | Options:
10 |     -h --help                       Show this screen.
11 |     --result-dir DIR                Directory to store logfiles and trained models. [default: trained_models]
12 |     --azure-info PATH               Azure authentication information file (JSON). [default: azure_auth.json]
13 |     --quiet                         Show less output.
14 |     --debug                         Turn on debugger.
15 | """
16 | import json
17 | from typing import Optional
18 | 
19 | from docopt import docopt
20 | from dpu_utils.utils import run_and_debug, RichPath
21 | 
22 | from utils.model_utils import restore
23 | 
24 | 
25 | def test(model_path: str, test_data_path: Optional[RichPath], result_dir: str, quiet: bool = False, run_id: str = None):
26 |     model = restore(model_path, result_dir, run_id)
27 |     model.params['max_nodes_in_batch'] = 2 * model.params['max_nodes_in_batch']  # We can process larger batches if we don't do training
28 |     test_data_path = test_data_path or RichPath.create(model.task.default_data_path())
29 |     model.log_line(" Using the following task params: %s" % json.dumps(model.task.params))
30 |     model.log_line(" Using the following model params: %s" % json.dumps(model.params))
31 |     model.test(test_data_path)
32 | 
33 | 
34 | def run(args):
35 |     azure_info_path = args.get('--azure-info', None)
36 |     model_path = args['STORED_MODEL_PATH']
37 |     test_data_path = args.get('DATA_PATH')
38 |     if test_data_path is not None:
39 |         test_data_path = RichPath.create(test_data_path, azure_info_path)
40 |     result_dir = args.get('--result-dir', 'trained_models')
41 |     test(model_path, test_data_path, result_dir, quiet=args.get('--quiet'))
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     args = docopt(__doc__)
46 |     run_and_debug(lambda: run(args), enable_debugging=args['--debug'])
47 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | Usage:
  4 |    train.py [options] MODEL_NAME TASK_NAME
  5 | 
  6 | MODEL_NAME has to be one of the supported models, which currently are
  7 |  GGNN, GNN-Edge-MLP, GNN-FiLM, RGAT, RGCN, RGDCN
  8 | 
  9 | Options:
 10 |     -h --help                       Show this screen.
 11 |     --data-path PATH                Path to load data from, has task-specific defaults under data/.
 12 |     --result-dir DIR                Directory to store logfiles and trained models. [default: trained_models]
 13 |     --run-test                      Indicate if the task's test should be run.
 14 |     --model-param-overrides PARAMS  Parameter settings overriding model defaults (in JSON format).
 15 |     --task-param-overrides PARAMS   Parameter settings overriding task defaults (in JSON format).
 16 |     --quiet                         Show less output.
 17 |     --tensorboard DIR               Dump tensorboard event files to DIR.
 18 |     --azure-info=<path>             Azure authentication information file (JSON). [default: azure_auth.json]
 19 |     --debug                         Turn on debugger.
 20 | """
 21 | import json
 22 | import os
 23 | import sys
 24 | import time
 25 | 
 26 | from docopt import docopt
 27 | from dpu_utils.utils import run_and_debug, RichPath, git_tag_run
 28 | 
 29 | from utils.model_utils import name_to_model_class, name_to_task_class
 30 | from test import test
 31 | 
 32 | 
 33 | def run(args):
 34 |     azure_info_path = args.get('--azure-info', None)
 35 |     model_cls, additional_model_params = name_to_model_class(args['MODEL_NAME'])
 36 |     task_cls, additional_task_params = name_to_task_class(args['TASK_NAME'])
 37 | 
 38 |     # Collect parameters from first the class defaults, potential task defaults, and then CLI:
 39 |     task_params = task_cls.default_params()
 40 |     task_params.update(additional_task_params)
 41 |     model_params = model_cls.default_params()
 42 |     model_params.update(additional_model_params)
 43 | 
 44 |     # Load potential task-specific defaults:
 45 |     task_model_default_hypers_file = \
 46 |         os.path.join(os.path.dirname(__file__),
 47 |                      "tasks",
 48 |                      "default_hypers",
 49 |                      "%s_%s.json" % (task_cls.name(), model_cls.name(model_params)))
 50 |     if os.path.exists(task_model_default_hypers_file):
 51 |         print("Loading task/model-specific default parameters from %s." % task_model_default_hypers_file)
 52 |         with open(task_model_default_hypers_file, "rt") as f:
 53 |             default_task_model_hypers = json.load(f)
 54 |         task_params.update(default_task_model_hypers['task_params'])
 55 |         model_params.update(default_task_model_hypers['model_params'])
 56 | 
 57 |     # Load overrides from command line:
 58 |     task_params.update(json.loads(args.get('--task-param-overrides') or '{}'))
 59 |     model_params.update(json.loads(args.get('--model-param-overrides') or '{}'))
 60 | 
 61 |     # Finally, upgrade every parameters that's a path to a RichPath:
 62 |     task_params_orig = dict(task_params)
 63 |     for (param_name, param_value) in task_params.items():
 64 |         if param_name.endswith("_path"):
 65 |             task_params[param_name] = RichPath.create(param_value, azure_info_path)
 66 | 
 67 |     # Now prepare to actually run by setting up directories, creating object instances and running:
 68 |     result_dir = args.get('--result-dir', 'trained_models')
 69 |     os.makedirs(result_dir, exist_ok=True)
 70 |     task = task_cls(task_params)
 71 |     data_path = args.get('--data-path') or task.default_data_path()
 72 |     data_path = RichPath.create(data_path, azure_info_path)
 73 |     task.load_data(data_path)
 74 | 
 75 |     random_seeds = model_params['random_seed']
 76 |     if not isinstance(random_seeds, list):
 77 |         random_seeds = [random_seeds]
 78 | 
 79 |     for random_seed in random_seeds:
 80 |         model_params['random_seed'] = random_seed
 81 |         run_id = "_".join([task_cls.name(), model_cls.name(model_params), time.strftime("%Y-%m-%d-%H-%M-%S"), str(os.getpid())])
 82 | 
 83 |         model = model_cls(model_params, task, run_id, result_dir)
 84 |         model.log_line("Run %s starting." % run_id)
 85 |         model.log_line(" Using the following task params: %s" % json.dumps(task_params_orig))
 86 |         model.log_line(" Using the following model params: %s" % json.dumps(model_params))
 87 | 
 88 |         if sys.stdin.isatty():
 89 |             try:
 90 |                 git_sha = git_tag_run(run_id)
 91 |                 model.log_line(" git tagged as %s" % git_sha)
 92 |             except:
 93 |                 print(" Tried tagging run in git, but failed.")
 94 |                 pass
 95 | 
 96 |         model.initialize_model()
 97 |         model.train(quiet=args.get('--quiet'), tf_summary_path=args.get('--tensorboard'))
 98 | 
 99 |         if args.get('--run-test'):
100 |             test(model.best_model_file, data_path, result_dir, quiet=args.get('--quiet'), run_id=run_id)
101 | 
102 | 
103 | if __name__ == "__main__":
104 |     args = docopt(__doc__)
105 |     run_and_debug(lambda: run(args), enable_debugging=args['--debug'])
106 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .utils import SMALL_NUMBER, BIG_NUMBER, get_gated_unit, get_aggregation_function, get_activation, MLP, micro_f1
2 | 


--------------------------------------------------------------------------------
/utils/citation_network_utils.py:
--------------------------------------------------------------------------------
  1 | # This is largely copied from https://raw.githubusercontent.com/tkipf/gcn/master/gcn/utils.py
  2 | # It is Copyright (c) 2016 Thomas Kipf, under the MIT license (see LICENSE for a copy)
  3 | 
  4 | import numpy as np
  5 | import pickle as pkl
  6 | import scipy.sparse as sp
  7 | import sys
  8 | 
  9 | 
 10 | def parse_index_file(filename):
 11 |     """Parse index file."""
 12 |     index = []
 13 |     for line in open(filename):
 14 |         index.append(int(line.strip()))
 15 |     return index
 16 | 
 17 | 
 18 | def sample_mask(idx, l):
 19 |     """Create mask."""
 20 |     mask = np.zeros(l)
 21 |     mask[idx] = 1
 22 |     return np.array(mask, dtype=np.bool)
 23 | 
 24 | 
 25 | def load_data(directory: str, dataset_str: str):
 26 |     """
 27 |     Loads input data from gcn/data directory
 28 | 
 29 |     ind.dataset_str.x => the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object;
 30 |     ind.dataset_str.tx => the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object;
 31 |     ind.dataset_str.allx => the feature vectors of both labeled and unlabeled training instances
 32 |         (a superset of ind.dataset_str.x) as scipy.sparse.csr.csr_matrix object;
 33 |     ind.dataset_str.y => the one-hot labels of the labeled training instances as numpy.ndarray object;
 34 |     ind.dataset_str.ty => the one-hot labels of the test instances as numpy.ndarray object;
 35 |     ind.dataset_str.ally => the labels for instances in ind.dataset_str.allx as numpy.ndarray object;
 36 |     ind.dataset_str.graph => a dict in the format {index: [index_of_neighbor_nodes]} as collections.defaultdict
 37 |         object;
 38 |     ind.dataset_str.test.index => the indices of test instances in graph, for the inductive setting as list object.
 39 | 
 40 |     All objects above must be saved using python pickle module.
 41 | 
 42 |     :param dataset_str: Dataset name
 43 |     :return: All data input files loaded (as well the training/test data).
 44 |     """
 45 |     names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
 46 |     objects = []
 47 |     for i in range(len(names)):
 48 |         with open("{}/ind.{}.{}".format(directory, dataset_str, names[i]), 'rb') as f:
 49 |             if sys.version_info > (3, 0):
 50 |                 objects.append(pkl.load(f, encoding='latin1'))
 51 |             else:
 52 |                 objects.append(pkl.load(f))
 53 | 
 54 |     x, y, tx, ty, allx, ally, graph = tuple(objects)
 55 |     test_idx_reorder = parse_index_file("{}/ind.{}.test.index".format(directory, dataset_str))
 56 |     test_idx_range = np.sort(test_idx_reorder)
 57 | 
 58 |     if dataset_str == 'citeseer':
 59 |         # Fix citeseer dataset (there are some isolated nodes in the graph)
 60 |         # Find isolated nodes, add them as zero-vecs into the right position
 61 |         test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
 62 |         tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
 63 |         tx_extended[test_idx_range-min(test_idx_range), :] = tx
 64 |         tx = tx_extended
 65 |         ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
 66 |         ty_extended[test_idx_range-min(test_idx_range), :] = ty
 67 |         ty = ty_extended
 68 | 
 69 |     features = sp.vstack((allx, tx)).tolil()
 70 |     features[test_idx_reorder, :] = features[test_idx_range, :]
 71 | 
 72 |     labels = np.vstack((ally, ty))
 73 |     labels[test_idx_reorder, :] = labels[test_idx_range, :]
 74 | 
 75 |     idx_test = test_idx_range.tolist()
 76 |     idx_train = range(len(y))
 77 |     idx_val = range(len(y), len(y)+500)
 78 | 
 79 |     train_mask = sample_mask(idx_train, labels.shape[0])
 80 |     val_mask = sample_mask(idx_val, labels.shape[0])
 81 |     test_mask = sample_mask(idx_test, labels.shape[0])
 82 | 
 83 |     y_train = np.zeros(labels.shape)
 84 |     y_val = np.zeros(labels.shape)
 85 |     y_test = np.zeros(labels.shape)
 86 |     y_train[train_mask, :] = labels[train_mask, :]
 87 |     y_val[val_mask, :] = labels[val_mask, :]
 88 |     y_test[test_mask, :] = labels[test_mask, :]
 89 | 
 90 |     return graph, features, y_train, y_val, y_test, train_mask, val_mask, test_mask
 91 | 
 92 | 
 93 | def sparse_to_tuple(sparse_mx):
 94 |     """Convert sparse matrix to tuple representation."""
 95 |     def to_tuple(mx):
 96 |         if not sp.isspmatrix_coo(mx):
 97 |             mx = mx.tocoo()
 98 |         coords = np.vstack((mx.row, mx.col)).transpose()
 99 |         values = mx.data
100 |         shape = mx.shape
101 |         # All of these will need to be sorted:
102 |         sort_indices = np.lexsort(np.rot90(coords))
103 |         return coords[sort_indices], values[sort_indices], shape
104 | 
105 |     if isinstance(sparse_mx, list):
106 |         for i in range(len(sparse_mx)):
107 |             sparse_mx[i] = to_tuple(sparse_mx[i])
108 |     else:
109 |         sparse_mx = to_tuple(sparse_mx)
110 | 
111 |     return sparse_mx
112 | 
113 | 
114 | def preprocess_features(features):
115 |     """Row-normalize feature matrix and convert to tuple representation"""
116 |     rowsum = np.array(features.sum(1))
117 |     r_inv = np.power(rowsum, -1).flatten()
118 |     r_inv[np.isinf(r_inv)] = 0.
119 |     r_mat_inv = sp.diags(r_inv)
120 |     features = r_mat_inv.dot(features)
121 |     return features.toarray()  # densify -- these are tiny and we don't care
122 | 
123 | 
124 | def normalize_adj(adj):
125 |     """Symmetrically normalize adjacency matrix."""
126 |     adj = sp.coo_matrix(adj)
127 |     rowsum = np.array(adj.sum(1))
128 |     d_inv_sqrt = np.power(rowsum, -0.5).flatten()
129 |     d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
130 |     d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
131 |     return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()
132 | 
133 | 
134 | def preprocess_adj(adj):
135 |     """Preprocessing of adjacency matrix for simple GCN model and conversion to tuple representation."""
136 |     adj_normalized = normalize_adj(adj + sp.eye(adj.shape[0]))
137 |     return sparse_to_tuple(adj_normalized)
138 | 
139 | 
140 | 


--------------------------------------------------------------------------------
/utils/model_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | from typing import Tuple, Type, Dict, Any
 4 | 
 5 | import pickle
 6 | 
 7 | from models import (Sparse_Graph_Model, GGNN_Model, GNN_FiLM_Model, GNN_Edge_MLP_Model,
 8 |                     RGAT_Model, RGCN_Model, RGDCN_Model, RGIN_Model)
 9 | from tasks import Sparse_Graph_Task, QM9_Task, Citation_Network_Task, PPI_Task, VarMisuse_Task
10 | 
11 | 
12 | def name_to_task_class(name: str) -> Tuple[Type[Sparse_Graph_Task], Dict[str, Any]]:
13 |     name = name.lower()
14 |     if name == "qm9":
15 |         return QM9_Task, {}
16 |     if name == "cora":
17 |         return Citation_Network_Task, {"data_kind": "cora"}
18 |     if name == "citeseer":
19 |         return Citation_Network_Task, {"data_kind": "citeseer"}
20 |     if name == "pubmed":
21 |         return Citation_Network_Task, {"data_kind": "pubmed"}
22 |     if name == "citationnetwork":
23 |         return Citation_Network_Task, {}
24 |     if name == "ppi":
25 |         return PPI_Task, {}
26 |     if name == "varmisuse":
27 |         return VarMisuse_Task, {}
28 | 
29 |     raise ValueError("Unknown task type '%s'" % name)
30 | 
31 | 
32 | def name_to_model_class(name: str) -> Tuple[Type[Sparse_Graph_Model], Dict[str, Any]]:
33 |     name = name.lower()
34 |     if name in ["ggnn", "ggnn_model"]:
35 |         return GGNN_Model, {}
36 |     if name in ["gnn_edge_mlp", "gnn-edge-mlp", "gnn_edge_mlp_model"]:
37 |         return GNN_Edge_MLP_Model, {}
38 |     if name in ["gnn_edge_mlp0", "gnn-edge-mlp0", "gnn_edge_mlp0_model"]:
39 |         return GNN_Edge_MLP_Model, {'num_edge_hidden_layers': 0}
40 |     if name in ["gnn_edge_mlp1", "gnn-edge-mlp1", "gnn_edge_mlp1_model"]:
41 |         return GNN_Edge_MLP_Model, {'num_edge_hidden_layers': 1}
42 |     if name in ["gnn_edge_mlp", "gnn-edge-mlp"]:
43 |         return GNN_Edge_MLP_Model, {}
44 |     if name in ["gnn_film", "gnn-film", "gnn_film_model"]:
45 |         return GNN_FiLM_Model, {}
46 |     if name in ["rgat", "rgat_model"]:
47 |         return RGAT_Model, {}
48 |     if name in ["rgcn", "rgcn_model"]:
49 |         return RGCN_Model, {}
50 |     if name in ["rgdcn", "rgdcn_model"]:
51 |         return RGDCN_Model, {}
52 |     if name in ["rgin", "rgin_model"]:
53 |         return RGIN_Model, {}
54 | 
55 |     raise ValueError("Unknown model type '%s'" % name)
56 | 
57 | 
58 | def restore(saved_model_path: str, result_dir: str, run_id: str = None) -> Sparse_Graph_Model:
59 |     print("Loading model from file %s." % saved_model_path)
60 |     with open(saved_model_path, 'rb') as in_file:
61 |         data_to_load = pickle.load(in_file)
62 | 
63 |     model_cls, _ = name_to_model_class(data_to_load['model_class'])
64 |     task_cls, additional_task_params = name_to_task_class(data_to_load['task_class'])
65 | 
66 |     if run_id is None:
67 |         run_id = "_".join([task_cls.name(), model_cls.name(data_to_load['model_params']), time.strftime("%Y-%m-%d-%H-%M-%S"), str(os.getpid())])
68 | 
69 |     task = task_cls(data_to_load['task_params'])
70 |     task.restore_from_metadata(data_to_load['task_metadata'])
71 | 
72 |     model = model_cls(data_to_load['model_params'], task, run_id, result_dir)
73 |     model.load_weights(data_to_load['weights'])
74 | 
75 |     model.log_line("Loaded model from snapshot %s." % saved_model_path)
76 | 
77 |     return model
78 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional, Callable, Union, List
  2 | 
  3 | import tensorflow as tf
  4 | 
  5 | 
  6 | BIG_NUMBER = 1e7
  7 | SMALL_NUMBER = 1e-7
  8 | 
  9 | 
 10 | def get_gated_unit(units: int, gated_unit: str, activation_function: str):
 11 |     activation_fn = get_activation(activation_function)
 12 |     gated_unit_name = gated_unit.lower()
 13 |     if gated_unit_name == 'rnn':
 14 |         return tf.keras.layers.SimpleRNNCell(units, activation=activation_fn)
 15 |     if gated_unit_name == 'gru':
 16 |         return tf.keras.layers.GRUCell(units, activation=activation_fn)
 17 |     if gated_unit_name == 'lstm':
 18 |         return tf.keras.layers.LSTMCell(units, activation=activation_fn)
 19 |     else:
 20 |         raise Exception("Unknown RNN cell type '%s'." % gated_unit)
 21 | 
 22 | 
 23 | def get_aggregation_function(aggregation_fun: Optional[str]):
 24 |     if aggregation_fun in ['sum', 'unsorted_segment_sum']:
 25 |         return tf.unsorted_segment_sum
 26 |     if aggregation_fun in ['max', 'unsorted_segment_max']:
 27 |         return tf.unsorted_segment_max
 28 |     if aggregation_fun in ['mean', 'unsorted_segment_mean']:
 29 |         return tf.unsorted_segment_mean
 30 |     if aggregation_fun in ['sqrt_n', 'unsorted_segment_sqrt_n']:
 31 |         return tf.unsorted_segment_sqrt_n
 32 |     else:
 33 |         raise ValueError("Unknown aggregation function '%s'!" % aggregation_fun)
 34 | 
 35 | 
 36 | def get_activation(activation_fun: Optional[str]):
 37 |     if activation_fun is None:
 38 |         return None
 39 |     activation_fun = activation_fun.lower()
 40 |     if activation_fun == 'linear':
 41 |         return None
 42 |     if activation_fun == 'tanh':
 43 |         return tf.tanh
 44 |     if activation_fun == 'relu':
 45 |         return tf.nn.relu
 46 |     if activation_fun == 'leaky_relu':
 47 |         return tf.nn.leaky_relu
 48 |     if activation_fun == 'elu':
 49 |         return tf.nn.elu
 50 |     if activation_fun == 'selu':
 51 |         return tf.nn.selu
 52 |     if activation_fun == 'gelu':
 53 |         def gelu(input_tensor):
 54 |             cdf = 0.5 * (1.0 + tf.erf(input_tensor / tf.sqrt(2.0)))
 55 |             return input_tensor * cdf
 56 |         return gelu
 57 |     else:
 58 |         raise ValueError("Unknown activation function '%s'!" % activation_fun)
 59 | 
 60 | 
 61 | def micro_f1(logits, labels):
 62 |     # Everything on int, because who trusts float anyway?
 63 |     predicted = tf.round(tf.nn.sigmoid(logits))
 64 |     predicted = tf.cast(predicted, dtype=tf.int32)
 65 |     labels = tf.cast(labels, dtype=tf.int32)
 66 | 
 67 |     true_pos = tf.count_nonzero(predicted * labels)
 68 |     false_pos = tf.count_nonzero(predicted * (labels - 1))
 69 |     false_neg = tf.count_nonzero((predicted - 1) * labels)
 70 | 
 71 |     precision = true_pos / (true_pos + false_pos)
 72 |     recall = true_pos / (true_pos + false_neg)
 73 |     fmeasure = (2 * precision * recall) / (precision + recall)
 74 |     return tf.cast(fmeasure, tf.float32)
 75 | 
 76 | 
 77 | class MLP(object):
 78 |     def __init__(self,
 79 |                  out_size: int,
 80 |                  hidden_layers: Union[List[int], int] = 1,
 81 |                  use_biases: bool = False,
 82 |                  activation_fun: Optional[Callable[[tf.Tensor], tf.Tensor]] = tf.nn.relu,
 83 |                  dropout_rate: Union[float, tf.Tensor] = 0.0,
 84 |                  name: Optional[str] = "MLP",
 85 |                  ):
 86 |         """
 87 |         Create new MLP with given number of hidden layers.
 88 | 
 89 |         Arguments:
 90 |             out_size: Dimensionality of output.
 91 |             hidden_layers: Either an integer determining number of hidden layers, who will have out_size units each;
 92 |                 or list of integers whose lengths determines the number of hidden layers and whose contents the
 93 |                 number of units in each layer.
 94 |             use_biases: Flag indicating use of bias in fully connected layers.
 95 |             activation_fun: Activation function applied between hidden layers (NB: the output of the MLP
 96 |                 is always the direct result of a linear transformation)
 97 |             dropout_rate: Dropout applied to inputs of each MLP layer.
 98 |         """
 99 |         if isinstance(hidden_layers, int):
100 |             hidden_layer_sizes = [out_size] * hidden_layers
101 |         else:
102 |             hidden_layer_sizes = hidden_layers
103 | 
104 |         if len(hidden_layer_sizes) > 1:
105 |             assert activation_fun is not None, "Multiple linear layers without an activation"
106 | 
107 |         self.__dropout_rate = dropout_rate
108 |         self.__name = name
109 |         with tf.variable_scope(self.__name):
110 |             self.__layers = []  # type: List[tf.layers.Dense]
111 |             for hidden_layer_size in hidden_layer_sizes:
112 |                 self.__layers.append(tf.layers.Dense(units=hidden_layer_size,
113 |                                                      use_bias=use_biases,
114 |                                                      activation=activation_fun))
115 |             # Output layer:
116 |             self.__layers.append(tf.layers.Dense(units=out_size,
117 |                                                  use_bias=use_biases,
118 |                                                  activation=None))
119 | 
120 |     def __call__(self, input: tf.Tensor) -> tf.Tensor:
121 |         with tf.variable_scope(self.__name):
122 |             activations = input
123 |             for layer in self.__layers[:-1]:
124 |                 activations = tf.nn.dropout(activations, rate=self.__dropout_rate)
125 |                 activations = layer(activations)
126 |             return self.__layers[-1](activations)
127 | 


--------------------------------------------------------------------------------
/utils/varmisuse_data_splitter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | Usage:
 4 |    varmisuse_data_splitter.py [options] RAW_DATA_DIR OUT_DIR
 5 | 
 6 | Reads in datapoints from a set of files and creates smaller files mixing these, in a format
 7 | suitable for streaming them into the training process.
 8 | 
 9 | Options:
10 |     -h --help                       Show this screen.
11 |     --chunk-size NUM                Number of samples per output file. [default: 100]
12 |     --num-workers NUM               Number of worker processes. Defaults to number of CPU cores.
13 |     --window-size NUM               Number of samples to load before mixing and writing things out. [default: 5000]
14 |     --azure-info=<path>             Azure authentication information file (JSON). [default: azure_auth.json]
15 |     --debug                         Turn on debugger.
16 | """
17 | from typing import List, Any
18 | 
19 | import numpy as np
20 | from more_itertools import chunked
21 | from docopt import docopt
22 | from dpu_utils.utils import run_and_debug, RichPath
23 | from multiprocessing import Process, Queue, cpu_count
24 | 
25 | 
26 | def _data_loading_worker(file_queue: Queue, result_queue: Queue) -> None:
27 |     while True:
28 |         next_path = file_queue.get()
29 |         if next_path is None:  # Our signal that all files have been processed
30 |             file_queue.put(None)  # Signal to the other workers
31 |             result_queue.put(None)  # Signal to the controller that we are done
32 |             break
33 | 
34 |         # Read the file and push examples out as soon as we get them:
35 |         for raw_sample in next_path.read_by_file_suffix():
36 |             result_queue.put(raw_sample)
37 | 
38 | 
39 | def _write_data(out_dir: RichPath, window_idx: int, chunk_size: int, data_window: List[Any]):
40 |     np.random.shuffle(data_window)
41 |     for chunk_idx, data_chunk in enumerate(chunked(data_window, chunk_size)):
42 |         out_file = out_dir.join('chunk_%i-%i.jsonl.gz' % (window_idx, chunk_idx))
43 |         out_file.save_as_compressed_file(data_chunk)
44 | 
45 | 
46 | def run(args):
47 |     azure_info_path = args.get('--azure-info', None)
48 |     in_dir = RichPath.create(args['RAW_DATA_DIR'], azure_info_path)
49 |     out_dir = RichPath.create(args['OUT_DIR'], azure_info_path)
50 |     out_dir.make_as_dir()
51 | 
52 |     num_workers = int(args.get('--num-workers') or cpu_count())
53 |     chunk_size = int(args['--chunk-size'])
54 |     window_size = int(args['--window-size'])
55 | 
56 |     files_to_load = list(in_dir.iterate_filtered_files_in_dir("*.gz"))
57 |     path_queue = Queue(maxsize=len(files_to_load) + 1)
58 |     result_queue = Queue(1000)
59 | 
60 |     # Set up list of work to do:
61 |     for path in files_to_load:
62 |         path_queue.put(path)
63 |     path_queue.put(None)  # Signal for the end of the queue
64 | 
65 |     # Set up workers:
66 |     workers = []
67 |     for _ in range(num_workers):
68 |         workers.append(Process(target=_data_loading_worker,
69 |                                args=(path_queue, result_queue,)))
70 |         workers[-1].start()
71 | 
72 |     # Consume the data:
73 |     num_workers_terminated = 0
74 |     data_window = []
75 |     window_idx = 0
76 |     while num_workers_terminated < len(workers):
77 |         parsed_sample = result_queue.get()
78 |         if parsed_sample is None:
79 |             num_workers_terminated += 1  # Worker signaled that it's done
80 |         else:
81 |             data_window.append(parsed_sample)
82 |             if len(data_window) >= window_size:
83 |                 _write_data(out_dir, window_idx, chunk_size, data_window)
84 |                 data_window = []
85 |                 window_idx += 1
86 | 
87 |     # Write out the remainder of the data:
88 |     _write_data(out_dir, window_idx, chunk_size, data_window)
89 | 
90 |     # Clean up the workers:
91 |     for worker in workers:
92 |         worker.join()
93 | 
94 | 
95 | if __name__ == "__main__":
96 |     args = docopt(__doc__)
97 |     run_and_debug(lambda: run(args), enable_debugging=args['--debug'])
98 | 


--------------------------------------------------------------------------------