├── .flake8 ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── SECURITY.md ├── data └── qm9 │ ├── LICENSE │ ├── test.jsonl.gz │ ├── train.jsonl.gz │ └── valid.jsonl.gz ├── gnns ├── __init__.py ├── ggnn.py ├── gnn_edge_mlp.py ├── gnn_film.py ├── rgat.py ├── rgcn.py ├── rgdcn.py └── rgin.py ├── models ├── __init__.py ├── ggnn_model.py ├── gnn_edge_mlp_model.py ├── gnn_film_model.py ├── rgat_model.py ├── rgcn_model.py ├── rgdcn_model.py ├── rgin_model.py └── sparse_graph_model.py ├── reorg_varmisuse_data.sh ├── requirements.txt ├── run_ppi_benchs.py ├── run_qm9_benchs.py ├── run_varmisuse_benchs.py ├── tasks ├── __init__.py ├── citation_network_task.py ├── default_hypers │ ├── PPI_GGNN.json │ ├── PPI_GNN-Edge-MLP0.json │ ├── PPI_GNN-Edge-MLP1.json │ ├── PPI_GNN-FiLM.json │ ├── PPI_RGAT.json │ ├── PPI_RGCN.json │ ├── PPI_RGIN.json │ ├── QM9_GGNN.json │ ├── QM9_GNN-Edge-MLP0.json │ ├── QM9_GNN-Edge-MLP1.json │ ├── QM9_GNN-FiLM.json │ ├── QM9_RGAT.json │ ├── QM9_RGCN.json │ ├── QM9_RGIN.json │ ├── VarMisuse_GGNN.json │ ├── VarMisuse_GNN-Edge-MLP0.json │ ├── VarMisuse_GNN-Edge-MLP1.json │ ├── VarMisuse_GNN-FiLM.json │ ├── VarMisuse_RGAT.json │ ├── VarMisuse_RGCN.json │ └── VarMisuse_RGIN.json ├── ppi_task.py ├── qm9_task.py ├── sparse_graph_task.py └── varmisuse_task.py ├── test.py ├── train.py └── utils ├── __init__.py ├── citation_network_utils.py ├── model_utils.py ├── utils.py └── varmisuse_data_splitter.py /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 150 3 | ignore = 4 | # W605: invalid escape sequence -- triggered by pseudo-LaTeX in comments 5 | W605, -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | molecules_*.json 104 | data/* -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## CONTRIBUTING 2 | 3 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 4 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 5 | the rights to use your contribution. For details, visit https://cla.microsoft.com. 6 | 7 | When you submit a pull request, a CLA-bot will automatically determine whether you need to provide 8 | a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions 9 | provided by the bot. You will only need to do this once across all repos using our CLA. 10 | 11 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 12 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 13 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TF Graph Neural Network Samples 2 | This repository is the code release corresponding to an article introducing 3 | graph neural networks (GNNs) with feature-wise linear modulation ([Brockschmidt, 2019](#brockschmidt-2019)). 4 | In the paper, a number of GNN architectures are discussed: 5 | * Gated Graph Neural Networks (GGNN) ([Li et al., 2015](#li-et-al-2015)). 6 | * Relational Graph Convolutional Networks (RGCN) ([Schlichtkrull et al., 2016](#schlichtkrull-et-al-2016)). 7 | * Relational Graph Attention Networks (RGAT) - a generalisation of Graph Attention Networks ([Veličković et al., 2018](#veličković-et-al-2018)) to several edge types. 8 | * Relational Graph Isomorphism Networks (RGIN) - a generalisation of Graph Isomorphism Networks ([Xu et al., 2019](#xu-et-al-2019)) to several edge types. 9 | * Graph Neural Network with Edge MLPs (GNN-Edge-MLP) - a variant of RGCN in which messages on edges are computed using full MLPs, not just a single layer. 10 | * Relational Graph Dynamic Convolution Networks (RGDCN) - a new variant of RGCN in which the weights of convolutional layers are dynamically computed. 11 | * Graph Neural Networks with Feature-wise Linear Modulation (GNN-FiLM) - a new extension of RGCN with FiLM layers. 12 | 13 | The results presented in the paper are based on the implementations of models 14 | and tasks provided in this repository. 15 | 16 | This code was tested in Python 3.6 with TensorFlow 1.13.1. 17 | To install required packages, run `pip install -r requirements.txt`. 18 | 19 | The code is maintained by the [Deep Program Understanding](https://www.microsoft.com/en-us/research/project/program/) project at Microsoft Research, Cambridge, UK. We are [hiring](https://www.microsoft.com/en-us/research/theme/ada/#!opportunities). 20 | 21 | # Running 22 | To train a model, it suffices to run `python train.py MODEL_TYPE TASK`, for 23 | example as follows: 24 | ``` 25 | $ python train.py RGCN PPI 26 | Loading task/model-specific default parameters from tasks/default_hypers/PPI_RGCN.json. 27 | Loading PPI train data from data/ppi. 28 | Loading PPI valid data from data/ppi. 29 | Model has 699257 parameters. 30 | Run PPI_RGCN_2019-06-26-14-33-58_17208 starting. 31 | Using the following task params: {"add_self_loop_edges": true, "tie_fwd_bkwd_edges": false, "out_layer_dropout_keep_prob": 1.0} 32 | Using the following model params: {"max_nodes_in_batch": 12500, "graph_num_layers": 3, "graph_num_timesteps_per_layer": 1, "graph_layer_input_dropout_keep_prob": 1.0, "graph_dense_between_every_num_gnn_layers": 10000, "graph_model_activation_function": "tanh", "graph_residual_connection_every_num_layers": 10000, "graph_inter_layer_norm": false, "max_epochs": 10000, "patience": 25, "optimizer": "Adam", "learning_rate": 0.001, "learning_rate_decay": 0.98, "momentum": 0.85, "clamp_gradient_norm": 1.0, "random_seed": 0, "hidden_size": 256, "graph_activation_function": "ReLU", "message_aggregation_function": "sum"} 33 | == Epoch 1 34 | Train: loss: 77.42656 || Avg MicroF1: 0.395 || graphs/sec: 15.09 | nodes/sec: 33879 | edges/sec: 1952084 35 | Valid: loss: 68.86771 || Avg MicroF1: 0.370 || graphs/sec: 14.85 | nodes/sec: 48360 | edges/sec: 3098674 36 | (Best epoch so far, target metric decreased to 224302.10938 from inf. Saving to 'trained_models/PPI_RGCN_2019-06-26-14-33-58_17208_best_model.pickle') 37 | [...] 38 | ``` 39 | An overview of options can be obtained by `python train.py --help`. 40 | 41 | Note that task and model parameters can be overriden (note that every training 42 | run prints their current settings) using the `--task-param-overrides` and 43 | `--model-param-overrides` command line options, which take dictionaries in JSON 44 | form. 45 | So for example, to choose a different number of layers, 46 | `--model-param-overrides '{"graph_num_layers": 4}'` can be used. 47 | 48 | Results of the training run will be saved as well in a directory (by default 49 | `trained_models/`, but this can be set using the `--result_dir` flag). 50 | Concretely, the following three files are created: 51 | * `${RESULT_DIR}/${RUN_NAME}.log`: A log of the training run. 52 | * `${RESULT_DIR}/${RUN_NAME}_best_model.pickle`: A dump of the model weights 53 | achieving the best results on the validation set. 54 | 55 | To evaluate a model, use the `test.py` script as follows on one of the 56 | model dumps generated by `train.py`: 57 | ``` 58 | $ python test.py trained_models/PPI_RGCN_2019-06-26-14-33-58_17208_best_model.pickle 59 | Loading model from file trained_models/PPI_RGCN_2019-06-26-14-33-58_17208_best_model.pickle. 60 | Model has 699257 parameters. 61 | == Running Test on data/ppi == 62 | Loading PPI test data from data/ppi. 63 | Loss 11.13117 on 2 graphs 64 | Metrics: Avg MicroF1: 0.954 65 | ``` 66 | `python test.py --help` provides more options, for example to specify a different 67 | test data set. 68 | A run on the default test set can be be automatically triggered after training 69 | using the `--run-test` option to `train.py` as well. 70 | 71 | # Experimental Results 72 | Experimental results reported in the accompanying article can be reproduced 73 | using the code in the repository. 74 | More precisely, `python run_ppi_benchs.py ppi_eval_results/` should 75 | produce an ASCII rendering of Table 1 - note, however, that this will take 76 | quite a while. 77 | Similarly, `python run_qm9_benchs.py qm9_eval_results/` should 78 | produce an ASCII rendering of Table 2 - this will take a very long time 79 | (approx. 13 * 4 * 45 * 5 minutes, i.e., around 8 days), and 80 | in practice, we used a different version of this parallelising the runs 81 | across many hosts using Microsoft-internal infrastructure. 82 | 83 | Note that the training script loads fitting default hyperparameters for 84 | model/task combinations from `tasks/default_hypers/{TASK}_{MODEL}.json`. 85 | 86 | # Models 87 | Currently, five model types are implemented: 88 | * `GGNN`: Gated Graph Neural Networks ([Li et al., 2015](#li-et-al-2015)). 89 | * `RGCN`: Relational Graph Convolutional Networks ([Schlichtkrull et al., 2017](#schlichtkrull-et-al-2017)). 90 | * `RGAT`: Relational Graph Attention Networks ([Veličković et al., 2018](#veličković-et-al-2018)). 91 | * `RGIN`: Relational Graph Isomorphism Networks ([Xu et al., 2019](#xu-et-al-2019)). 92 | * `GNN-Edge-MLP`: Graph Neural Network with Edge MLPs - a variant of RGCN in which messages on edges are computed using full MLPs, not just a single layer applied to the source state. 93 | * `RGDCN`: Relational Graph Dynamic Convolution Networks - a new variant of RGCN in which the weights of convolutional layers are dynamically computed. 94 | * `GNN-FiLM`: Graph Neural Networks with Feature-wise Linear Modulation - a new extension of RGCN with FiLM layers. 95 | 96 | # Tasks 97 | New tasks can be added by implementing the `tasks.sparse_graph_task` interface. 98 | This provides hooks to load data, create a task-specific output layers and 99 | compute task-specific metrics. 100 | The documentation in `tasks/sparse_graph_task.py` provides a detailed overview 101 | of the interface. Currently, four tasks are implemented, exposing different 102 | aspects. 103 | 104 | ## Citation networks 105 | The `CitationNetwork` task (implemented in `tasks/citation_network_task.py`) 106 | handles the Cora, Pubmed and Citeseer citation network datasets often used 107 | in evaluation of GNNs ([Sen et al., 2008](#sen-et-al-2008)). 108 | The implementation illustrates how to handle the case of transductive graph 109 | learning on a single graph instance by masking out nodes that shouldn't be 110 | considered. 111 | You can call this by running `python train.py MODEL Cora` (or `Pubmed` or 112 | `Citeseer` instead of `Cora`). 113 | 114 | To run experiments on this task, you need to download the data from 115 | https://github.com/kimiyoung/planetoid/raw/master/data. By default, the 116 | code looks for this data in `data/citation-networks`, but this can be changed 117 | by using `--data-path "SOME/OTHER/DIR"`. 118 | 119 | ## PPI 120 | The `PPI` task (implemented in `tasks/ppi_task.py`) handles the protein-protein 121 | interaction task first described by [Zitnik & Leskovec, 2017](#zitnik-leskovec-2017). 122 | The implementation illustrates how to handle the case of inductive graph 123 | learning with node-level predictions. 124 | You can call this by running `python train.py MODEL PPI`. 125 | 126 | To run experiments on this task, you need to download the [data](https://data.dgl.ai/dataset/ppi.zip) 127 | from. 128 | 129 | ``` 130 | curl -LO https://data.dgl.ai/dataset/ppi.zip 131 | unzip ppi.zip -d 132 | ``` 133 | 134 | By default, the code looks for this data in `data/ppi`, but this can be changed 135 | by using `--data-path "SOME/OTHER/DIR"`. 136 | 137 | ### Current Results 138 | Running `python run_ppi_benchs.py ppi_results/` should yield results looking 139 | like this (on an NVidia V100): 140 | 141 | | Model | Avg. MicroF1 | Avg. Time | 142 | |---------------|-------------------|------------| 143 | | GGNN | 0.990 (+/- 0.001) | 432.6 | 144 | | RGCN | 0.989 (+/- 0.000) | 759.0 | 145 | | RGAT | 0.989 (+/- 0.001) | 782.3 | 146 | | RGIN | 0.991 (+/- 0.001) | 704.8 | 147 | | GNN-Edge-MLP0 | 0.992 (+/- 0.000) | 556.9 | 148 | | GNN-Edge-MLP1 | 0.992 (+/- 0.001) | 479.2 | 149 | | GNN_FiLM | 0.992 (+/- 0.000) | 308.1 | 150 | 151 | ## QM9 152 | The `QM9` task (implemented in `tasks/qm9_task.py`) handles the quantum chemistry 153 | prediction tasks first described by [Ramakrishnan et al., 2014](#ramakrishnan-et-al-2014) 154 | The implementation illustrates how to handle the case of inductive graph 155 | learning with graph-level predictions. 156 | You can call this by running `python train.py MODEL QM9`. 157 | 158 | The data for this task is included in the repository in `data/qm9`, which just 159 | contains a JSON representation of a pre-processed version of the dataset originally 160 | released by [Ramakrishnan et al., 2014](#ramakrishnan-et-al-2014). 161 | 162 | The results shown in Table 2 of the [technical report](#brockschmidt-2019) can 163 | be reproduced by running `python run_qm9_benchs.py qm9_results/`, but this will 164 | take a very long time (several days) and should best be distributed onto different 165 | compute nodes. 166 | 167 | ## VarMisuse 168 | The `VarMisuse` task (implemented in `tasks/varmisuse_task.py`) handles the 169 | variable misuse task first described by [Allamanis et al., 2018](#allamanis-et-al-2018). 170 | Note that we do not fully re-implement the original model here, and so 171 | results are not (quite) comparable with the results reported in the original 172 | paper. 173 | The implementation illustrates how to handle the case of inductive graph 174 | learning with predictions based on node selection. 175 | You can call this by running `python train.py MODEL VarMisuse`. 176 | 177 | To run experiments on this task, you need to download the dataset from 178 | https://aka.ms/iclr18-prog-graphs-dataset. 179 | To make this usable for the data loading code in this repository, you then need 180 | to edit the top lines of the script `reorg_varmisuse_data.sh` (from this repo) 181 | to point to the downloaded zip file and the directory you want to extract the 182 | data to, and then run it. Note that this will take a relatively long time. 183 | By default, the code looks for this data in `data/varmisuse/`, but this can be 184 | changed by using `--data-path "SOME/OTHER/DIR"`. 185 | 186 | ### Current Results 187 | Running `python run_varmisuse_benchs.py varmisuse_results/` should yield results 188 | looking like this (on a single NVidia V100, this will take about 2 weeks): 189 | 190 | | Model | Valid Acc | Test Acc | TestOnly Acc | 191 | |----------------|-------------------|-------------------|-------------------| 192 | | GGNN | 0.821 (+/- 0.009) | 0.857 (+/- 0.005) | 0.793 (+/- 0.012) | 193 | | RGCN | 0.857 (+/- 0.016) | 0.872 (+/- 0.015) | 0.814 (+/- 0.023) | 194 | | RGAT | 0.842 (+/- 0.010) | 0.869 (+/- 0.007) | 0.812 (+/- 0.009) | 195 | | RGIN | 0.842 (+/- 0.010) | 0.871 (+/- 0.001) | 0.811 (+/- 0.009) | 196 | | GNN-Edge-MLP0 | 0.834 (+/- 0.003) | 0.865 (+/- 0.002) | 0.805 (+/- 0.014) | 197 | | GNN-Edge-MLP1 | 0.844 (+/- 0.004) | 0.869 (+/- 0.003) | 0.814 (+/- 0.007) | 198 | | GNN_FiLM | 0.846 (+/- 0.006) | 0.870 (+/- 0.002) | 0.813 (+/- 0.009) | 199 | 200 | 201 | # References 202 | 203 | #### Allamanis et al., 2018 204 | Miltiadis Allamanis, Marc Brockschmidt, and Mahmoud Khademi. Learning to 205 | Represent Programs with Graphs. In International Conference on Learning 206 | Representations (ICLR), 2018. (https://arxiv.org/pdf/1711.00740.pdf) 207 | 208 | #### Brockschmidt, 2019 209 | Marc Brockschmidt. GNN-FiLM: Graph Neural Networks with Feature-wise Linear 210 | Modulation. (https://arxiv.org/abs/1906.12192) 211 | 212 | #### Li et al., 2015 213 | Yujia Li, Daniel Tarlow, Marc Brockschmidt, and Richard Zemel. Gated Graph 214 | Sequence Neural Networks. In International Conference on Learning 215 | Representations (ICLR), 2016. (https://arxiv.org/pdf/1511.05493.pdf) 216 | 217 | #### Ramakrishnan et al., 2014 218 | Raghunathan Ramakrishnan, Pavlo O. Dral, Matthias Rupp, and O. Anatole 219 | Von Lilienfeld. Quantum Chemistry Structures and Properties of 134 Kilo 220 | Molecules. Scientific Data, 1, 2014. 221 | (https://www.nature.com/articles/sdata201422/) 222 | 223 | #### Schlichtkrull et al., 2017 224 | Michael Schlichtkrull, Thomas N. Kipf, Peter Bloem, Rianne van den Berg, 225 | Ivan Titov, and Max Welling. Modeling Relational Data with Graph 226 | Convolutional Networks. In Extended Semantic Web Conference (ESWC), 2018. 227 | (https://arxiv.org/pdf/1703.06103.pdf) 228 | 229 | #### Sen et al., 2008 230 | Prithviraj Sen, Galileo Namata, Mustafa Bilgic, Lise Getoor, Brian Galligher, 231 | and Tina Eliassi-Rad. Collective Classification in Network Data. AI magazine, 232 | 29, 2008. (https://www.aaai.org/ojs/index.php/aimagazine/article/view/2157) 233 | 234 | #### Veličković et al. 2018 235 | Petar Veličković, Guillem Cucurull, Arantxa Casanova, Adriana Romero, Pietro 236 | Liò, and Yoshua Bengio. Graph Attention Networks. In International Conference 237 | on Learning Representations (ICLR), 2018. (https://arxiv.org/pdf/1710.10903.pdf) 238 | 239 | #### Xu et al. 2019 240 | Keyulu Xu, Weihua Hu, Jure Leskovec, and Stefanie Jegelka. How Powerful are 241 | Graph Neural Networks? In International Conference on Learning Representations 242 | (ICLR), 2019. (https://arxiv.org/pdf/1810.00826.pdf) 243 | 244 | #### Zitnik & Leskovec, 2017 245 | Marinka Zitnik and Jure Leskovec. Predicting Multicellular Function Through 246 | Multi-layer Tissue Networks. Bioinformatics, 33, 2017. 247 | (https://arxiv.org/abs/1707.04638) 248 | 249 | # Contributing 250 | 251 | This project welcomes contributions and suggestions. Most contributions 252 | require you to agree to a Contributor License Agreement (CLA) declaring 253 | that you have the right to, and actually do, grant us the rights to use 254 | your contribution. For details, visit https://cla.microsoft.com. 255 | 256 | When you submit a pull request, a CLA-bot will automatically determine 257 | whether you need to provide a CLA and decorate the PR appropriately (e.g., 258 | label, comment). Simply follow the instructions provided by the bot. 259 | You will only need to do this once across all repos using our CLA. 260 | 261 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 262 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 263 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 264 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /data/qm9/LICENSE: -------------------------------------------------------------------------------- 1 | The data in this directory is a lightly processed and split version of the 2 | dataset originally released in the following article: 3 | 4 | Raghunathan Ramakrishnan, Pavlo O. Dral, Matthias Rupp, and O. Anatole Von Lilienfeld. 5 | Quantum chemistry structures and properties of 134 kilo molecules. 6 | Scientific Data, 1, 2014. 7 | https://www.nature.com/articles/sdata201422/ 8 | 9 | The data is released under the CC0 license: 10 | 11 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER. 12 | 13 | Statement of Purpose 14 | 15 | The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work"). 16 | 17 | Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others. 18 | 19 | For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights. 20 | 21 | 1. Copyright and Related Rights. A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following: 22 | 23 | the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work; 24 | moral rights retained by the original author(s) and/or performer(s); 25 | publicity and privacy rights pertaining to a person's image or likeness depicted in a Work; 26 | rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below; 27 | rights protecting the extraction, dissemination, use and reuse of data in a Work; 28 | database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and 29 | other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof. 30 | 31 | 2. Waiver. To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose. 32 | 33 | 3. Public License Fallback. Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose. 34 | 35 | 4. Limitations and Disclaimers. 36 | 37 | No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document. 38 | Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law. 39 | Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work. 40 | Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work. 41 | -------------------------------------------------------------------------------- /data/qm9/test.jsonl.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/tf-gnn-samples/ff14b96ad97be3dc0c6829d2ad54a63e10779a94/data/qm9/test.jsonl.gz -------------------------------------------------------------------------------- /data/qm9/train.jsonl.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/tf-gnn-samples/ff14b96ad97be3dc0c6829d2ad54a63e10779a94/data/qm9/train.jsonl.gz -------------------------------------------------------------------------------- /data/qm9/valid.jsonl.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/tf-gnn-samples/ff14b96ad97be3dc0c6829d2ad54a63e10779a94/data/qm9/valid.jsonl.gz -------------------------------------------------------------------------------- /gnns/__init__.py: -------------------------------------------------------------------------------- 1 | from .ggnn import sparse_ggnn_layer 2 | from .gnn_edge_mlp import sparse_gnn_edge_mlp_layer 3 | from .gnn_film import sparse_gnn_film_layer 4 | from .rgat import sparse_rgat_layer 5 | from .rgcn import sparse_rgcn_layer 6 | from .rgdcn import sparse_rgdcn_layer 7 | from .rgin import sparse_rgin_layer 8 | -------------------------------------------------------------------------------- /gnns/ggnn.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import tensorflow as tf 4 | 5 | from utils import get_gated_unit, get_aggregation_function 6 | 7 | 8 | def sparse_ggnn_layer(node_embeddings: tf.Tensor, 9 | adjacency_lists: List[tf.Tensor], 10 | state_dim: Optional[int], 11 | num_timesteps: int = 1, 12 | gated_unit_type: str = "gru", 13 | activation_function: str = "tanh", 14 | message_aggregation_function: str = "sum" 15 | ) -> tf.Tensor: 16 | """ 17 | Compute new graph states by neural message passing and gated units on the nodes. 18 | For this, we assume existing node states h^t_v and a list of per-edge-type adjacency 19 | matrices A_\ell. 20 | 21 | We compute new states as follows: 22 | h^{t+1}_v := Cell(h^t_v, \sum_\ell 23 | \sum_{(u, v) \in A_\ell} 24 | W_\ell * h^t_u) 25 | The learnable parameters of this are the recurrent Cell and the W_\ell \in R^{D,D}. 26 | 27 | We use the following abbreviations in shape descriptions: 28 | * V: number of nodes 29 | * D: state dimension 30 | * L: number of different edge types 31 | * E: number of edges of a given edge type 32 | 33 | Arguments: 34 | node_embeddings: float32 tensor of shape [V, D], the original representation of 35 | each node in the graph. 36 | adjacency_lists: List of L adjacency lists, represented as int32 tensors of shape 37 | [E, 2]. Concretely, adjacency_lists[l][k,:] == [v, u] means that the k-th edge 38 | of type l connects node v to node u. 39 | state_dim: Optional size of output dimension of the GNN layer. If not set, defaults 40 | to D, the dimensionality of the input. If different from the input dimension, 41 | parameter num_timesteps has to be 1. 42 | num_timesteps: Number of repeated applications of this message passing layer. 43 | gated_unit_type: Type of the recurrent unit used (one of RNN, GRU and LSTM). 44 | activation_function: Type of activation function used. 45 | message_aggregation_function: Type of aggregation function used for messages. 46 | 47 | Returns: 48 | float32 tensor of shape [V, state_dim] 49 | """ 50 | num_nodes = tf.shape(node_embeddings, out_type=tf.int32)[0] 51 | if state_dim is None: 52 | state_dim = tf.shape(node_embeddings, out_type=tf.int32)[1] 53 | 54 | # === Prepare things we need across all timesteps: 55 | message_aggregation_fn = get_aggregation_function(message_aggregation_function) 56 | gated_cell = get_gated_unit(state_dim, gated_unit_type, activation_function) 57 | edge_type_to_message_transformation_layers = [] # Layers to compute the message from a source state 58 | edge_type_to_message_targets = [] # List of tensors of message targets 59 | for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists): 60 | edge_type_to_message_transformation_layers.append( 61 | tf.keras.layers.Dense(units=state_dim, 62 | use_bias=False, 63 | activation=None, 64 | name="Edge_%i_Weight" % edge_type_idx)) 65 | edge_type_to_message_targets.append(adjacency_list_for_edge_type[:, 1]) 66 | 67 | # Let M be the number of messages (sum of all E): 68 | message_targets = tf.concat(edge_type_to_message_targets, axis=0) # Shape [M] 69 | 70 | cur_node_states = node_embeddings 71 | for _ in range(num_timesteps): 72 | messages = [] # list of tensors of messages of shape [E, D] 73 | message_source_states = [] # list of tensors of edge source states of shape [E, D] 74 | 75 | # Collect incoming messages per edge type 76 | for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists): 77 | edge_sources = adjacency_list_for_edge_type[:, 0] 78 | edge_source_states = tf.nn.embedding_lookup(params=cur_node_states, 79 | ids=edge_sources) # Shape [E, D] 80 | all_messages_for_edge_type = \ 81 | edge_type_to_message_transformation_layers[edge_type_idx](edge_source_states) # Shape [E,D] 82 | messages.append(all_messages_for_edge_type) 83 | message_source_states.append(edge_source_states) 84 | 85 | messages = tf.concat(messages, axis=0) # Shape [M, D] 86 | aggregated_messages = \ 87 | message_aggregation_fn(data=messages, 88 | segment_ids=message_targets, 89 | num_segments=num_nodes) # Shape [V, D] 90 | 91 | # pass updated vertex features into RNN cell 92 | new_node_states = gated_cell(aggregated_messages, [cur_node_states])[0] # Shape [V, D] 93 | cur_node_states = new_node_states 94 | 95 | return cur_node_states 96 | -------------------------------------------------------------------------------- /gnns/gnn_edge_mlp.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | import tensorflow as tf 3 | 4 | from utils import get_activation, get_aggregation_function, SMALL_NUMBER, MLP 5 | 6 | 7 | def sparse_gnn_edge_mlp_layer( 8 | node_embeddings: tf.Tensor, 9 | adjacency_lists: List[tf.Tensor], 10 | type_to_num_incoming_edges: tf.Tensor, 11 | state_dim: Optional[int], 12 | num_timesteps: int = 1, 13 | activation_function: Optional[str] = "ReLU", 14 | message_aggregation_function: str = "sum", 15 | normalize_by_num_incoming: bool = False, 16 | use_target_state_as_input: bool = True, 17 | num_edge_hidden_layers: int = 1 18 | ) -> tf.Tensor: 19 | """ 20 | Compute new graph states by neural message passing using an edge MLP. 21 | For this, we assume existing node states h^t_v and a list of per-edge-type adjacency 22 | matrices A_\ell. 23 | 24 | We compute new states as follows: 25 | h^{t+1}_v := \sum_\ell 26 | \sum_{(u, v) \in A_\ell} 27 | \sigma(1/c_{v,\ell} * MLP(h^t_u || h^t_v)) 28 | c_{\v,\ell} is usually 1 (but could also be the number of incoming edges). 29 | The learnable parameters of this are the W_\ell, F_{\ell,\alpha}, F_{\ell,\beta} \in R^{D, D}. 30 | 31 | We use the following abbreviations in shape descriptions: 32 | * V: number of nodes 33 | * D: state dimension 34 | * L: number of different edge types 35 | * E: number of edges of a given edge type 36 | 37 | Arguments: 38 | node_embeddings: float32 tensor of shape [V, D], the original representation of 39 | each node in the graph. 40 | adjacency_lists: List of L adjacency lists, represented as int32 tensors of shape 41 | [E, 2]. Concretely, adjacency_lists[l][k,:] == [v, u] means that the k-th edge 42 | of type l connects node v to node u. 43 | type_to_num_incoming_edges: float32 tensor of shape [L, V] representing the number 44 | of incoming edges of a given type. Concretely, type_to_num_incoming_edges[l, v] 45 | is the number of edge of type l connecting to node v. 46 | state_dim: Optional size of output dimension of the GNN layer. If not set, defaults 47 | to D, the dimensionality of the input. If different from the input dimension, 48 | parameter num_timesteps has to be 1. 49 | num_timesteps: Number of repeated applications of this message passing layer. 50 | activation_function: Type of activation function used. 51 | message_aggregation_function: Type of aggregation function used for messages. 52 | normalize_by_num_incoming: Flag indicating if messages should be scaled by 1/(number 53 | of incoming edges). 54 | use_target_state_as_input: Flag indicating if the edge MLP should consume both 55 | source and target state (True) or only source state (False). 56 | num_edge_hidden_layers: Number of hidden layers of the edge MLP. 57 | message_weights_dropout_ratio: Dropout ratio applied to the weights used 58 | to compute message passing functions. 59 | 60 | Returns: 61 | float32 tensor of shape [V, state_dim] 62 | """ 63 | num_nodes = tf.shape(node_embeddings, out_type=tf.int32)[0] 64 | if state_dim is None: 65 | state_dim = tf.shape(node_embeddings, out_type=tf.int32)[1] 66 | 67 | # === Prepare things we need across all timesteps: 68 | activation_fn = get_activation(activation_function) 69 | message_aggregation_fn = get_aggregation_function(message_aggregation_function) 70 | edge_type_to_edge_mlp = [] # MLPs to compute the edge messages 71 | edge_type_to_message_targets = [] # List of tensors of message targets 72 | for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists): 73 | edge_type_to_edge_mlp.append( 74 | MLP(out_size=state_dim, 75 | hidden_layers=num_edge_hidden_layers, 76 | activation_fun=tf.nn.elu, 77 | name="Edge_%i_MLP" % edge_type_idx)) 78 | edge_type_to_message_targets.append(adjacency_list_for_edge_type[:, 1]) 79 | 80 | # Let M be the number of messages (sum of all E): 81 | message_targets = tf.concat(edge_type_to_message_targets, axis=0) # Shape [M] 82 | 83 | cur_node_states = node_embeddings 84 | for _ in range(num_timesteps): 85 | messages_per_type = [] # list of tensors of messages of shape [E, D] 86 | # Collect incoming messages per edge type 87 | for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists): 88 | edge_sources = adjacency_list_for_edge_type[:, 0] 89 | edge_targets = adjacency_list_for_edge_type[:, 1] 90 | edge_source_states = \ 91 | tf.nn.embedding_lookup(params=cur_node_states, 92 | ids=edge_sources) # Shape [E, D] 93 | 94 | edge_mlp_inputs = edge_source_states 95 | if use_target_state_as_input: 96 | edge_target_states = \ 97 | tf.nn.embedding_lookup(params=cur_node_states, 98 | ids=edge_targets) # Shape [E, D] 99 | edge_mlp_inputs = tf.concat([edge_source_states, edge_target_states], 100 | axis=1) # Shape [E, 2*D] 101 | 102 | messages = edge_type_to_edge_mlp[edge_type_idx](edge_mlp_inputs) # Shape [E, D] 103 | 104 | if normalize_by_num_incoming: 105 | per_message_num_incoming_edges = \ 106 | tf.nn.embedding_lookup(params=type_to_num_incoming_edges[edge_type_idx, :], 107 | ids=edge_targets) # Shape [E, H] 108 | messages = tf.expand_dims(1.0 / (per_message_num_incoming_edges + SMALL_NUMBER), axis=-1) * messages 109 | messages_per_type.append(messages) 110 | 111 | all_messages = tf.concat(messages_per_type, axis=0) # Shape [M, D] 112 | all_messages = activation_fn(all_messages) # Shape [M, D] (Apply nonlinearity to Edge-MLP outputs as well) 113 | aggregated_messages = \ 114 | message_aggregation_fn(data=all_messages, 115 | segment_ids=message_targets, 116 | num_segments=num_nodes) # Shape [V, D] 117 | 118 | new_node_states = aggregated_messages 119 | new_node_states = tf.contrib.layers.layer_norm(new_node_states) 120 | cur_node_states = new_node_states 121 | 122 | return cur_node_states 123 | -------------------------------------------------------------------------------- /gnns/gnn_film.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | import tensorflow as tf 3 | 4 | 5 | from utils import get_activation, get_aggregation_function, SMALL_NUMBER 6 | 7 | 8 | def sparse_gnn_film_layer(node_embeddings: tf.Tensor, 9 | adjacency_lists: List[tf.Tensor], 10 | type_to_num_incoming_edges: tf.Tensor, 11 | state_dim: Optional[int], 12 | num_timesteps: int = 1, 13 | activation_function: Optional[str] = "ReLU", 14 | message_aggregation_function: str = "sum", 15 | normalize_by_num_incoming: bool = False, 16 | ) -> tf.Tensor: 17 | """ 18 | Compute new graph states by neural message passing modulated by the target state. 19 | For this, we assume existing node states h^t_v and a list of per-edge-type adjacency 20 | matrices A_\ell. 21 | 22 | We compute new states as follows: 23 | h^{t+1}_v := \sum_\ell 24 | \sum_{(u, v) \in A_\ell} 25 | \sigma(1/c_{v,\ell} * \alpha_{\ell,v} * (W_\ell * h^t_u) + \beta_{\ell,v}) 26 | \alpha_{\ell,v} := F_{\ell,\alpha} * h^t_v 27 | \beta_{\ell,v} := F_{\ell,\beta} * h^t_v 28 | c_{\v,\ell} is usually 1 (but could also be the number of incoming edges). 29 | The learnable parameters of this are the W_\ell, F_{\ell,\alpha}, F_{\ell,\beta} \in R^{D, D}. 30 | 31 | We use the following abbreviations in shape descriptions: 32 | * V: number of nodes 33 | * D: state dimension 34 | * L: number of different edge types 35 | * E: number of edges of a given edge type 36 | 37 | Arguments: 38 | node_embeddings: float32 tensor of shape [V, D], the original representation of 39 | each node in the graph. 40 | adjacency_lists: List of L adjacency lists, represented as int32 tensors of shape 41 | [E, 2]. Concretely, adjacency_lists[l][k,:] == [v, u] means that the k-th edge 42 | of type l connects node v to node u. 43 | type_to_num_incoming_edges: float32 tensor of shape [L, V] representing the number 44 | of incoming edges of a given type. Concretely, type_to_num_incoming_edges[l, v] 45 | is the number of edge of type l connecting to node v. 46 | state_dim: Optional size of output dimension of the GNN layer. If not set, defaults 47 | to D, the dimensionality of the input. If different from the input dimension, 48 | parameter num_timesteps has to be 1. 49 | num_timesteps: Number of repeated applications of this message passing layer. 50 | activation_function: Type of activation function used. 51 | message_aggregation_function: Type of aggregation function used for messages. 52 | normalize_by_num_incoming: Flag indicating if messages should be scaled by 1/(number 53 | of incoming edges). 54 | 55 | Returns: 56 | float32 tensor of shape [V, state_dim] 57 | """ 58 | num_nodes = tf.shape(node_embeddings, out_type=tf.int32)[0] 59 | if state_dim is None: 60 | state_dim = tf.shape(node_embeddings, out_type=tf.int32)[1] 61 | 62 | # === Prepare things we need across all timesteps: 63 | activation_fn = get_activation(activation_function) 64 | message_aggregation_fn = get_aggregation_function(message_aggregation_function) 65 | edge_type_to_message_transformation_layers = [] # Layers to compute the message from a source state 66 | edge_type_to_film_computation_layers = [] # Layers to compute the \beta/\gamma weights for FiLM 67 | edge_type_to_message_targets = [] # List of tensors of message targets 68 | for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists): 69 | edge_type_to_message_transformation_layers.append( 70 | tf.keras.layers.Dense(units=state_dim, 71 | use_bias=False, 72 | activation=None, # Activation only after FiLM modulation 73 | name="Edge_%i_Weight" % edge_type_idx)) 74 | edge_type_to_film_computation_layers.append( 75 | tf.keras.layers.Dense(units=2 * state_dim, # Computes \gamma, \beta in one go 76 | use_bias=False, 77 | activation=None, 78 | name="Edge_%i_FiLM_Computations" % edge_type_idx)) 79 | edge_type_to_message_targets.append(adjacency_list_for_edge_type[:, 1]) 80 | 81 | # Let M be the number of messages (sum of all E): 82 | message_targets = tf.concat(edge_type_to_message_targets, axis=0) # Shape [M] 83 | 84 | cur_node_states = node_embeddings 85 | for _ in range(num_timesteps): 86 | messages_per_type = [] # list of tensors of messages of shape [E, D] 87 | # Collect incoming messages per edge type 88 | for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists): 89 | edge_sources = adjacency_list_for_edge_type[:, 0] 90 | edge_targets = adjacency_list_for_edge_type[:, 1] 91 | edge_source_states = \ 92 | tf.nn.embedding_lookup(params=cur_node_states, 93 | ids=edge_sources) # Shape [E, D] 94 | messages = edge_type_to_message_transformation_layers[edge_type_idx](edge_source_states) # Shape [E, D] 95 | 96 | if normalize_by_num_incoming: 97 | per_message_num_incoming_edges = \ 98 | tf.nn.embedding_lookup(params=type_to_num_incoming_edges[edge_type_idx, :], 99 | ids=edge_targets) # Shape [E, H] 100 | messages = tf.expand_dims(1.0 / (per_message_num_incoming_edges + SMALL_NUMBER), axis=-1) * messages 101 | 102 | film_weights = edge_type_to_film_computation_layers[edge_type_idx](cur_node_states) 103 | per_message_film_weights = \ 104 | tf.nn.embedding_lookup(params=film_weights, ids=edge_targets) 105 | per_message_film_gamma_weights = per_message_film_weights[:, :state_dim] # Shape [E, D] 106 | per_message_film_beta_weights = per_message_film_weights[:, state_dim:] # Shape [E, D] 107 | 108 | modulated_messages = per_message_film_gamma_weights * messages + per_message_film_beta_weights 109 | messages_per_type.append(modulated_messages) 110 | 111 | all_messages = tf.concat(messages_per_type, axis=0) # Shape [M, D] 112 | all_messages = activation_fn(all_messages) # Shape [M, D] 113 | aggregated_messages = \ 114 | message_aggregation_fn(data=all_messages, 115 | segment_ids=message_targets, 116 | num_segments=num_nodes) # Shape [V, D] 117 | new_node_states = aggregated_messages 118 | # new_node_states = activation_fn(new_node_states) 119 | 120 | cur_node_states = tf.contrib.layers.layer_norm(new_node_states) 121 | 122 | return cur_node_states 123 | -------------------------------------------------------------------------------- /gnns/rgat.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import tensorflow as tf 4 | from dpu_utils.tfutils import unsorted_segment_log_softmax 5 | 6 | from utils import get_activation 7 | 8 | 9 | def sparse_rgat_layer(node_embeddings: tf.Tensor, 10 | adjacency_lists: List[tf.Tensor], 11 | state_dim: Optional[int], 12 | num_heads: int = 4, 13 | num_timesteps: int = 1, 14 | activation_function: Optional[str] = "tanh" 15 | ) -> tf.Tensor: 16 | """ 17 | Compute new graph states by neural message passing using attention. This generalises 18 | the original GAT model (Velickovic et al., https://arxiv.org/pdf/1710.10903.pdf) 19 | to multiple edge types by using different weights for different edge types. 20 | For this, we assume existing node states h^t_v and a list of per-edge-type adjacency 21 | matrices A_\ell. 22 | 23 | In the setting for a single attention head, we compute new states as follows: 24 | h^t_{v, \ell} := W_\ell h^t_v 25 | e_{u, \ell, v} := LeakyReLU(\alpha_\ell^T * concat(h^t_{u, \ell}, h^t_{v, \ell})) 26 | a_v := softmax_{\ell, u with (u, v) \in A_\ell}(e_{u, \ell, v}) 27 | h^{t+1}_v := \sigma(\sum_{ell, (u, v) \in A_\ell} 28 | a_v_{u, \ell} * h^_{u, \ell}) 29 | The learnable parameters of this are the W_\ell \in R^{D, D} and \alpha_\ell \in R^{2*D}. 30 | 31 | In practice, we use K attention heads, computing separate, partial new states h^{t+1}_{v,k} 32 | and compute h^{t+1}_v as the concatentation of the partial states. 33 | For this, we reduce the shape of W_\ell to R^{D, D/K} and \alpha_\ell to R^{2*D/K}. 34 | 35 | We use the following abbreviations in shape descriptions: 36 | * V: number of nodes 37 | * D: state dimension 38 | * K: number of attention heads 39 | * L: number of different edge types 40 | * E: number of edges of a given edge type 41 | 42 | Arguments: 43 | node_embeddings: float32 tensor of shape [V, D], the original representation of 44 | each node in the graph. 45 | adjacency_lists: List of L adjacency lists, represented as int32 tensors of shape 46 | [E, 2]. Concretely, adjacency_lists[l][k,:] == [v, u] means that the k-th edge 47 | of type l connects node v to node u. 48 | state_dim: Optional size of output dimension of the GNN layer. If not set, defaults 49 | to D, the dimensionality of the input. If different from the input dimension, 50 | parameter num_timesteps has to be 1. 51 | num_heads: Number of attention heads to use. 52 | num_timesteps: Number of repeated applications of this message passing layer. 53 | activation_function: Type of activation function used. 54 | 55 | Returns: 56 | float32 tensor of shape [V, state_dim] 57 | """ 58 | num_nodes = tf.shape(node_embeddings, out_type=tf.int32)[0] 59 | if state_dim is None: 60 | state_dim = tf.shape(node_embeddings, out_type=tf.int32)[1] 61 | per_head_dim = state_dim // num_heads 62 | 63 | # === Prepare things we need across all timesteps: 64 | activation_fn = get_activation(activation_function) 65 | edge_type_to_state_transformation_layers = [] # Layers to compute the message from a source state 66 | edge_type_to_attention_parameters = [] # Parameters for the attention mechanism 67 | edge_type_to_message_targets = [] # List of tensors of message targets 68 | for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists): 69 | edge_type_to_state_transformation_layers.append( 70 | tf.keras.layers.Dense(units=state_dim, 71 | use_bias=False, 72 | activation=None, 73 | name="Edge_%i_Weight" % edge_type_idx)) 74 | edge_type_to_attention_parameters.append( 75 | tf.get_variable(shape=(2 * state_dim), 76 | name="Edge_%i_Attention_Parameters" % edge_type_idx)) 77 | edge_type_to_message_targets.append(adjacency_list_for_edge_type[:, 1]) 78 | 79 | # Let M be the number of messages (sum of all E): 80 | message_targets = tf.concat(edge_type_to_message_targets, axis=0) # Shape [M] 81 | 82 | cur_node_states = node_embeddings 83 | for _ in range(num_timesteps): 84 | edge_type_to_per_head_messages = [] # type: List[tf.Tensor] # list of lists of tensors of messages of shape [E, K, D/K] 85 | edge_type_to_per_head_attention_coefficients = [] # type: List[tf.Tensor] # list of lists of tensors of shape [E, K] 86 | 87 | # Collect incoming messages per edge type 88 | # Note: 89 | # We compute the state transformations (to make use of the wider, faster matrix multiplication), 90 | # and then split into the individual attention heads via some reshapes: 91 | for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists): 92 | edge_sources = adjacency_list_for_edge_type[:, 0] 93 | edge_targets = adjacency_list_for_edge_type[:, 1] 94 | 95 | transformed_states = \ 96 | edge_type_to_state_transformation_layers[edge_type_idx](cur_node_states) # Shape [V, D] 97 | 98 | edge_transformed_source_states = \ 99 | tf.nn.embedding_lookup(params=transformed_states, ids=edge_sources) # Shape [E, D] 100 | edge_transformed_target_states = \ 101 | tf.nn.embedding_lookup(params=transformed_states, ids=edge_targets) # Shape [E, D] 102 | 103 | per_edge_per_head_transformed_source_states = \ 104 | tf.reshape(edge_transformed_source_states, shape=(-1, num_heads, per_head_dim)) 105 | 106 | per_edge_per_head_transformed_states = \ 107 | tf.concat([per_edge_per_head_transformed_source_states, 108 | tf.reshape(edge_transformed_target_states, shape=(-1, num_heads, per_head_dim))], 109 | axis=-1) # Shape [E, K, 2*D/K] 110 | per_head_attention_pars = tf.reshape(edge_type_to_attention_parameters[edge_type_idx], 111 | shape=(num_heads, 2 * per_head_dim)) # Shape [K, 2*D/K] 112 | per_edge_per_head_attention_coefficients = \ 113 | tf.nn.leaky_relu(tf.einsum('vki,ki->vk', 114 | per_edge_per_head_transformed_states, 115 | per_head_attention_pars)) # Shape [E, K] 116 | 117 | edge_type_to_per_head_messages.append(per_edge_per_head_transformed_source_states) 118 | edge_type_to_per_head_attention_coefficients.append(per_edge_per_head_attention_coefficients) 119 | 120 | per_head_messages = tf.concat(edge_type_to_per_head_messages, axis=0) 121 | per_head_attention_coefficients = tf.concat(edge_type_to_per_head_attention_coefficients, axis=0) 122 | 123 | head_to_aggregated_messages = [] # list of tensors of shape [V, D/K] 124 | for head_idx in range(num_heads): 125 | # Compute the softmax over all the attention coefficients for all messages going to this state: 126 | attention_coefficients = tf.concat(per_head_attention_coefficients[:, head_idx], axis=0) # Shape [M] 127 | attention_values = \ 128 | tf.exp(unsorted_segment_log_softmax(logits=attention_coefficients, 129 | segment_ids=message_targets, 130 | num_segments=num_nodes)) # Shape [M] 131 | messages = per_head_messages[:, head_idx, :] # Shape [M, D/K] 132 | # Compute weighted sum per target node for this head: 133 | head_to_aggregated_messages.append( 134 | tf.unsorted_segment_sum(data=tf.expand_dims(attention_values, -1) * messages, 135 | segment_ids=message_targets, 136 | num_segments=num_nodes)) 137 | 138 | new_node_states = activation_fn(tf.concat(head_to_aggregated_messages, axis=-1)) 139 | cur_node_states = new_node_states 140 | 141 | return cur_node_states 142 | -------------------------------------------------------------------------------- /gnns/rgcn.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import tensorflow as tf 4 | 5 | from utils import get_activation, get_aggregation_function, SMALL_NUMBER 6 | 7 | 8 | def sparse_rgcn_layer(node_embeddings: tf.Tensor, 9 | adjacency_lists: List[tf.Tensor], 10 | type_to_num_incoming_edges: tf.Tensor, 11 | state_dim: Optional[int], 12 | num_timesteps: int = 1, 13 | activation_function: Optional[str] = "tanh", 14 | message_aggregation_function: str = "sum", 15 | normalize_by_num_incoming: bool = True, 16 | use_both_source_and_target: bool = False, 17 | ) -> tf.Tensor: 18 | """ 19 | Compute new graph states by neural message passing. 20 | This implements the R-GCN model (Schlichtkrull et al., https://arxiv.org/pdf/1703.06103.pdf) 21 | for the case of few relations / edge types, i.e., we do not use the dimensionality-reduction 22 | tricks from section 2.2 of that paper. 23 | For this, we assume existing node states h^t_v and a list of per-edge-type adjacency 24 | matrices A_\ell. 25 | 26 | We compute new states as follows: 27 | h^{t+1}_v := \sigma(\sum_\ell 28 | \sum_{(u, v) \in A_\ell} 29 | 1/c_{v,\ell} * (W_\ell * h^t_u)) 30 | c_{\v,\ell} is usually the number of \ell edges going into v. 31 | The learnable parameters of this are the W_\ell \in R^{D,D}. 32 | 33 | We use the following abbreviations in shape descriptions: 34 | * V: number of nodes 35 | * D: state dimension 36 | * L: number of different edge types 37 | * E: number of edges of a given edge type 38 | 39 | Arguments: 40 | node_embeddings: float32 tensor of shape [V, D], the original representation of 41 | each node in the graph. 42 | adjacency_lists: List of L adjacency lists, represented as int32 tensors of shape 43 | [E, 2]. Concretely, adjacency_lists[l][k,:] == [v, u] means that the k-th edge 44 | of type l connects node v to node u. 45 | type_to_num_incoming_edges: float32 tensor of shape [L, V] representing the number 46 | of incoming edges of a given type. Concretely, type_to_num_incoming_edges[l, v] 47 | is the number of edge of type l connecting to node v. 48 | state_dim: Optional size of output dimension of the GNN layer. If not set, defaults 49 | to D, the dimensionality of the input. If different from the input dimension, 50 | parameter num_timesteps has to be 1. 51 | num_timesteps: Number of repeated applications of this message passing layer. 52 | activation_function: Type of activation function used. 53 | message_aggregation_function: Type of aggregation function used for messages. 54 | normalize_by_num_incoming: Flag indicating if messages should be scaled by 1/(number 55 | of incoming edges). 56 | 57 | Returns: 58 | float32 tensor of shape [V, state_dim] 59 | """ 60 | num_nodes = tf.shape(node_embeddings, out_type=tf.int32)[0] 61 | if state_dim is None: 62 | state_dim = tf.shape(node_embeddings, out_type=tf.int32)[1] 63 | 64 | # === Prepare things we need across all timesteps: 65 | activation_fn = get_activation(activation_function) 66 | message_aggregation_fn = get_aggregation_function(message_aggregation_function) 67 | edge_type_to_message_transformation_layers = [] # Layers to compute the message from a source state 68 | edge_type_to_message_targets = [] # List of tensors of message targets 69 | for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists): 70 | edge_type_to_message_transformation_layers.append( 71 | tf.keras.layers.Dense(units=state_dim, 72 | use_bias=False, 73 | activation=None, 74 | name="Edge_%i_Weight" % edge_type_idx)) 75 | edge_type_to_message_targets.append(adjacency_list_for_edge_type[:, 1]) 76 | 77 | # Let M be the number of messages (sum of all E): 78 | message_targets = tf.concat(edge_type_to_message_targets, axis=0) # Shape [M] 79 | 80 | cur_node_states = node_embeddings 81 | for _ in range(num_timesteps): 82 | messages_per_type = [] # list of tensors of messages of shape [E, H] 83 | # Collect incoming messages per edge type 84 | for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists): 85 | edge_sources = adjacency_list_for_edge_type[:, 0] 86 | edge_targets = adjacency_list_for_edge_type[:, 1] 87 | edge_source_states = \ 88 | tf.nn.embedding_lookup(params=cur_node_states, 89 | ids=edge_sources) # Shape [E, H] 90 | 91 | if use_both_source_and_target: 92 | edge_target_states = \ 93 | tf.nn.embedding_lookup(params=cur_node_states, 94 | ids=edge_targets) # Shape [E, H] 95 | edge_state_pairs = tf.concat([edge_source_states, edge_target_states], axis=-1) # Shape [E, 2H] 96 | messages = edge_type_to_message_transformation_layers[edge_type_idx](edge_state_pairs) # Shape [E, H] 97 | else: 98 | messages = edge_type_to_message_transformation_layers[edge_type_idx](edge_source_states) # Shape [E, H] 99 | 100 | if normalize_by_num_incoming: 101 | num_incoming_to_node_per_message = \ 102 | tf.nn.embedding_lookup(params=type_to_num_incoming_edges[edge_type_idx, :], 103 | ids=edge_targets) # Shape [E, H] 104 | messages = tf.expand_dims(1.0 / (num_incoming_to_node_per_message + SMALL_NUMBER), axis=-1) * messages 105 | 106 | messages_per_type.append(messages) 107 | 108 | cur_messages = tf.concat(messages_per_type, axis=0) # Shape [M, H] 109 | aggregated_messages = \ 110 | message_aggregation_fn(data=cur_messages, 111 | segment_ids=message_targets, 112 | num_segments=num_nodes) # Shape [V, H] 113 | 114 | new_node_states = activation_fn(aggregated_messages) # Shape [V, H] 115 | cur_node_states = new_node_states 116 | 117 | return cur_node_states 118 | -------------------------------------------------------------------------------- /gnns/rgdcn.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import tensorflow as tf 4 | 5 | from utils import get_activation, get_aggregation_function, SMALL_NUMBER 6 | 7 | 8 | def sparse_rgdcn_layer(node_embeddings: tf.Tensor, 9 | adjacency_lists: List[tf.Tensor], 10 | type_to_num_incoming_edges: tf.Tensor, 11 | num_channels: int = 8, 12 | channel_dim: int = 16, 13 | num_timesteps: int = 1, 14 | use_full_state_for_channel_weights: bool = False, 15 | tie_channel_weights: bool = False, 16 | activation_function: Optional[str] = "tanh", 17 | message_aggregation_function: str = "sum", 18 | normalize_by_num_incoming: bool = True, 19 | ) -> tf.Tensor: 20 | """ 21 | Compute new graph states by message passing using dynamic convolutions for edge kernels. 22 | For this, we assume existing node states h^t_v and a list of per-edge-type adjacency 23 | matrices A_\ell. 24 | We split each state h^t_v into C "channels" of dimension K, and use h^t_{v,c,:} to refer to 25 | the slice of the node state corresponding to the c-th channel. 26 | 27 | Four variants of the model are implemented: 28 | 29 | (1) Edge kernels computed from full target node state using weights shared across all channels: 30 | [use_full_state_for_channel_weights = True, tie_channel_weights = True] 31 | h^{t+1}_v := \sigma(Concat(\sum_\ell \sum_{(u, v) \in A_\ell} W^t_{\ell,v} * h^t_{u,c,:} 32 | | 1 <= c <= C)) 33 | W^t_{\ell,v} := F_\ell * h^t_{v,:,:} 34 | The learnable parameters of this are the F_\ell \in R^{C*K, K*K}. 35 | 36 | (2) Edge kernels computed from full target node state using separate weights for each channel: 37 | [use_full_state_for_channel_weights = True, tie_channel_weights = False] 38 | h^{t+1}_v := \sigma(Concat(\sum_\ell \sum_{(u, v) \in A_\ell} \sigma(W^t_{\ell,v,c} * h^t_{u,c,:} 39 | | 1 <= c <= C) 40 | W^t_{\ell,v,c} := F_{\ell,c} * h^t_{v,:,:} 41 | The learnable parameters of this are the F_{\ell,c} \in R^{C*K, K*K}. 42 | 43 | (3) Edge kernels computed from corresponding channel of target node using weights shared across all channels: 44 | [use_full_state_for_channel_weights = False, tie_channel_weights = True] 45 | h^{t+1}_v := \sigma(Concat(\sum_\ell \sum_{(u, v) \in A_\ell} \sigma(W^t_{\ell,v,c} * h^t_{u,c,:} 46 | | 1 <= c <= C) 47 | W^t_{\ell,v,c} := F_{\ell} * h^t_{v,c,:} 48 | The learnable parameters of this are the F_\ell \in R^{K, K*K}. 49 | 50 | (4) Edge kernels computed from corresponding channel of target node using separate weights for each channel: 51 | [use_full_state_for_channel_weights = False, tie_channel_weights = False] 52 | h^{t+1}_v := \sigma(Concat(\sum_\ell \sum_{(u, v) \in A_\ell} W^t_{\ell,v,c} * h^t_{u,c,:} 53 | | 1 <= c <= C)) 54 | W^t_{\ell,v,c} := F_{\ell,c} * h^t_{v,c,:} 55 | The learnable parameters of this are the F_{\ell,c} \in R^{K, K*K}. 56 | 57 | We use the following abbreviations in shape descriptions: 58 | * V: number of nodes 59 | * C: number of "channels" 60 | * K: dimension of each "channel" 61 | * D: state dimension, fixed to C * K. 62 | * L: number of different edge types 63 | * E: number of edges of a given edge type 64 | 65 | Args: 66 | node_embeddings: float32 tensor of shape [V, D], the original representation of 67 | each node in the graph. 68 | adjacency_lists: List of L adjacency lists, represented as int32 tensors of shape 69 | [E, 2]. Concretely, adjacency_lists[l][k,:] == [v, u] means that the k-th edge 70 | of type l connects node v to node u. 71 | num_channels: Number of "channels" to split state information into. 72 | channel_dim: Size of each "channel" 73 | num_timesteps: Number of repeated applications of this message passing layer. 74 | use_full_state_for_channel_weights: Flag indicating if the full state is used to 75 | compute the weights for individual channels, or only the corresponding channel. 76 | tie_channel_weights: Flag indicating if the weights for computing the per-channel 77 | linear layer are shared or not. 78 | activation_function: Type of activation function used. 79 | message_aggregation_function: Type of aggregation function used for messages. 80 | normalize_by_num_incoming: Flag indicating if messages should be scaled by 1/(number 81 | of incoming edges). 82 | 83 | Returns: 84 | float32 tensor of shape [V, D] 85 | """ 86 | num_nodes = tf.shape(node_embeddings, out_type=tf.int32)[0] 87 | 88 | # === Prepare things we need across all timesteps: 89 | activation_fn = get_activation(activation_function) 90 | message_aggregation_fn = get_aggregation_function(message_aggregation_function) 91 | edge_type_to_channel_to_weight_computation_layers = [] # Layers to compute the dynamic computation weights 92 | edge_type_to_message_targets = [] # List of tensors of message targets 93 | 94 | for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists): 95 | channel_to_weight_computation_layers = [] 96 | for channel in range(num_channels): 97 | if channel == 0 or not(tie_channel_weights): 98 | channel_to_weight_computation_layers.append( 99 | tf.keras.layers.Dense( 100 | units=channel_dim * channel_dim, 101 | use_bias=False, 102 | kernel_initializer=tf.initializers.truncated_normal(mean=0.0, stddev=1.0 / (channel_dim**2)), 103 | activation=activation_fn, 104 | name="Edge_%i_Channel_%i_Weight_Computation" % (edge_type_idx, channel))) 105 | else: # Case channel > 0 and tie_channel_weights 106 | channel_to_weight_computation_layers.append( 107 | channel_to_weight_computation_layers[-1]) 108 | edge_type_to_channel_to_weight_computation_layers.append(channel_to_weight_computation_layers) 109 | 110 | edge_type_to_message_targets.append(adjacency_list_for_edge_type[:, 1]) 111 | 112 | # Let M be the number of messages (sum of all E): 113 | message_targets = tf.concat(edge_type_to_message_targets, axis=0) # Shape [M] 114 | 115 | cur_node_states = node_embeddings # Shape [V, D] 116 | for _ in range(num_timesteps): 117 | node_states_chunked = tf.reshape(cur_node_states, 118 | shape=(-1, num_channels, channel_dim)) # shape [V, C, K] 119 | 120 | new_node_states_chunked = [] # type: List[tf.Tensor] # C tensors of shape [V, K] 121 | for channel_idx in range(num_channels): 122 | cur_channel_node_states = node_states_chunked[:, channel_idx, :] # shape [V, K] 123 | cur_channel_message_per_type = [] # list of tensors of messages of shape [E, K] 124 | 125 | # Collect incoming messages per edge type 126 | for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists): 127 | edge_sources = adjacency_list_for_edge_type[:, 0] 128 | edge_targets = adjacency_list_for_edge_type[:, 1] 129 | edge_source_states = \ 130 | tf.nn.embedding_lookup(params=cur_channel_node_states, 131 | ids=edge_sources) # Shape [E, K] 132 | 133 | if use_full_state_for_channel_weights: 134 | weight_computation_input = cur_node_states 135 | else: 136 | weight_computation_input = cur_channel_node_states 137 | # TODO: In the tie_channel_weights && use_full_state_for_channel_weights case, 138 | # this is the same for each channel: 139 | weight_compute_layer = edge_type_to_channel_to_weight_computation_layers[edge_type_idx][channel_idx] 140 | edge_weights = weight_compute_layer(weight_computation_input) # Shape [V, K*K] 141 | edge_weights = tf.reshape(edge_weights, shape=(-1, channel_dim, channel_dim)) # Shape [V, K, K] 142 | edge_weights_for_targets = \ 143 | tf.nn.embedding_lookup(params=edge_weights, ids=edge_targets) # Shape [E, K, K] 144 | 145 | # Matrix multiply between edge_source_states[v] and edge_weights_for_targets[v]: 146 | messages = tf.einsum('vi,vij->vj', edge_source_states, edge_weights_for_targets) # Shape [E, K] 147 | if normalize_by_num_incoming: 148 | num_incoming_to_node_per_message = \ 149 | tf.nn.embedding_lookup(params=type_to_num_incoming_edges[edge_type_idx, :], 150 | ids=edge_targets) # Shape [E] 151 | messages = tf.expand_dims(1.0 / (num_incoming_to_node_per_message + SMALL_NUMBER), axis=-1) * messages 152 | 153 | cur_channel_message_per_type.append(messages) 154 | 155 | cur_channel_messages = tf.concat(cur_channel_message_per_type, axis=0) # Shape [M, K] 156 | cur_channel_aggregated_incoming_messages = \ 157 | message_aggregation_fn(data=cur_channel_messages, 158 | segment_ids=message_targets, 159 | num_segments=num_nodes) # Shape [V, K] 160 | cur_channel_aggregated_incoming_messages = activation_fn(cur_channel_aggregated_incoming_messages) 161 | 162 | new_node_states_chunked.append(cur_channel_aggregated_incoming_messages) 163 | 164 | new_node_states = tf.concat(new_node_states_chunked, axis=1) # Shape [V, C * K] 165 | cur_node_states = new_node_states 166 | 167 | return cur_node_states 168 | -------------------------------------------------------------------------------- /gnns/rgin.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | import tensorflow as tf 3 | 4 | from utils import get_activation, get_aggregation_function, MLP 5 | 6 | 7 | def sparse_rgin_layer( 8 | node_embeddings: tf.Tensor, 9 | adjacency_lists: List[tf.Tensor], 10 | state_dim: Optional[int], 11 | num_timesteps: int = 1, 12 | activation_function: Optional[str] = "ReLU", 13 | message_aggregation_function: str = "sum", 14 | use_target_state_as_input: bool = False, 15 | num_edge_MLP_hidden_layers: Optional[int] = 1, 16 | num_aggr_MLP_hidden_layers: Optional[int] = None, 17 | ) -> tf.Tensor: 18 | """ 19 | Compute new graph states by neural message passing using MLPs for state updates 20 | and message computation. 21 | For this, we assume existing node states h^t_v and a list of per-edge-type adjacency 22 | matrices A_\ell. 23 | 24 | We compute new states as follows: 25 | h^{t+1}_v := \sigma(MLP_{aggr}(\sum_\ell \sum_{(u, v) \in A_\ell} MLP_\ell(h^t_u))) 26 | The learnable parameters of this are the MLPs MLP_\ell. 27 | This is derived from Cor. 6 of arXiv:1810.00826, instantiating the functions f, \phi 28 | with _separate_ MLPs. This is more powerful than the GIN formulation in Eq. (4.1) of 29 | arXiv:1810.00826, as we want to be able to distinguish graphs of the form 30 | G_1 = (V={1, 2, 3}, E_1={(1, 2)}, E_2={(3, 2)}) 31 | and 32 | G_2 = (V={1, 2, 3}, E_1={(3, 2)}, E_2={(1, 2)}) 33 | from each other. If we would treat all edges the same, 34 | G_1.E_1 \cup G_1.E_2 == G_2.E_1 \cup G_2.E_2 would imply that the two graphs 35 | become indistuingishable. 36 | Hence, we introduce per-edge-type MLPs, which also means that we have to drop 37 | the optimisation of modelling f \circ \phi by a single MLP used in the original 38 | GIN formulation. 39 | 40 | We use the following abbreviations in shape descriptions: 41 | * V: number of nodes 42 | * D: state dimension 43 | * L: number of different edge types 44 | * E: number of edges of a given edge type 45 | 46 | Arguments: 47 | node_embeddings: float32 tensor of shape [V, D], the original representation of 48 | each node in the graph. 49 | adjacency_lists: List of L adjacency lists, represented as int32 tensors of shape 50 | [E, 2]. Concretely, adjacency_lists[l][k,:] == [v, u] means that the k-th edge 51 | of type l connects node v to node u. 52 | state_dim: Optional size of output dimension of the GNN layer. If not set, defaults 53 | to D, the dimensionality of the input. If different from the input dimension, 54 | parameter num_timesteps has to be 1. 55 | num_timesteps: Number of repeated applications of this message passing layer. 56 | activation_function: Type of activation function used. 57 | message_aggregation_function: Type of aggregation function used for messages. 58 | use_target_state_as_input: Flag indicating if the edge MLP should consume both 59 | source and target state (True) or only source state (False). 60 | num_edge_MLP_hidden_layers: Number of hidden layers of the MLPs used to transform 61 | messages from neighbouring nodes. If None, the raw states are used directly. 62 | num_aggr_MLP_hidden_layers: Number of hidden layers of the MLPs used on the 63 | aggregation of messages from neighbouring nodes. If none, the aggregated messages 64 | are used directly. 65 | 66 | Returns: 67 | float32 tensor of shape [V, state_dim] 68 | """ 69 | num_nodes = tf.shape(node_embeddings, out_type=tf.int32)[0] 70 | if state_dim is None: 71 | state_dim = tf.shape(node_embeddings, out_type=tf.int32)[1] 72 | 73 | # === Prepare things we need across all timesteps: 74 | activation_fn = get_activation(activation_function) 75 | message_aggregation_fn = get_aggregation_function(message_aggregation_function) 76 | 77 | if num_aggr_MLP_hidden_layers is not None: 78 | aggregation_MLP = MLP(out_size=state_dim, 79 | hidden_layers=num_aggr_MLP_hidden_layers, 80 | activation_fun=activation_fn, 81 | name="Aggregation_MLP") # type: Optional[MLP] 82 | else: 83 | aggregation_MLP = None 84 | 85 | if num_edge_MLP_hidden_layers is not None: 86 | edge_type_to_edge_mlp = [] # type: Optional[List[MLP]] # MLPs to compute the edge messages 87 | else: 88 | edge_type_to_edge_mlp = None 89 | edge_type_to_message_targets = [] # List of tensors of message targets 90 | for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists): 91 | if edge_type_to_edge_mlp is not None and num_edge_MLP_hidden_layers is not None: 92 | edge_type_to_edge_mlp.append( 93 | MLP(out_size=state_dim, 94 | hidden_layers=num_edge_MLP_hidden_layers, 95 | activation_fun=activation_fn, 96 | name="Edge_%i_MLP" % edge_type_idx)) 97 | edge_type_to_message_targets.append(adjacency_list_for_edge_type[:, 1]) 98 | 99 | # Let M be the number of messages (sum of all E): 100 | message_targets = tf.concat(edge_type_to_message_targets, axis=0) # Shape [M] 101 | 102 | cur_node_states = node_embeddings 103 | for _ in range(num_timesteps): 104 | messages_per_type = [] # list of tensors of messages of shape [E, D] 105 | # Collect incoming messages per edge type 106 | for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists): 107 | edge_sources = adjacency_list_for_edge_type[:, 0] 108 | edge_targets = adjacency_list_for_edge_type[:, 1] 109 | edge_source_states = \ 110 | tf.nn.embedding_lookup(params=cur_node_states, 111 | ids=edge_sources) # Shape [E, D] 112 | 113 | edge_mlp_inputs = edge_source_states 114 | if use_target_state_as_input: 115 | edge_target_states = \ 116 | tf.nn.embedding_lookup(params=cur_node_states, 117 | ids=edge_targets) # Shape [E, D] 118 | edge_mlp_inputs = tf.concat([edge_source_states, edge_target_states], 119 | axis=1) # Shape [E, 2*D] 120 | 121 | if edge_type_to_edge_mlp is not None: 122 | messages = edge_type_to_edge_mlp[edge_type_idx](edge_mlp_inputs) # Shape [E, D] 123 | else: 124 | messages = edge_mlp_inputs 125 | messages_per_type.append(messages) 126 | 127 | all_messages = tf.concat(messages_per_type, axis=0) # Shape [M, D] 128 | if edge_type_to_edge_mlp is not None: 129 | all_messages = activation_fn(all_messages) # Shape [M, D] (Apply nonlinearity to Edge-MLP outputs as well) 130 | aggregated_messages = \ 131 | message_aggregation_fn(data=all_messages, 132 | segment_ids=message_targets, 133 | num_segments=num_nodes) # Shape [V, D] 134 | 135 | new_node_states = aggregated_messages 136 | if aggregation_MLP is not None: 137 | new_node_states = aggregation_MLP(new_node_states) 138 | new_node_states = activation_fn(new_node_states) # Note that the final MLP layer has no activation, so we do that here explicitly 139 | new_node_states = tf.contrib.layers.layer_norm(new_node_states) 140 | cur_node_states = new_node_states 141 | 142 | return cur_node_states 143 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | from .sparse_graph_model import Sparse_Graph_Model 2 | from .ggnn_model import GGNN_Model 3 | from .gnn_edge_mlp_model import GNN_Edge_MLP_Model 4 | from .gnn_film_model import GNN_FiLM_Model 5 | from .rgat_model import RGAT_Model 6 | from .rgcn_model import RGCN_Model 7 | from .rgdcn_model import RGDCN_Model 8 | from .rgin_model import RGIN_Model 9 | -------------------------------------------------------------------------------- /models/ggnn_model.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any, List 2 | 3 | import tensorflow as tf 4 | 5 | from .sparse_graph_model import Sparse_Graph_Model 6 | from tasks import Sparse_Graph_Task 7 | from gnns import sparse_ggnn_layer 8 | 9 | 10 | class GGNN_Model(Sparse_Graph_Model): 11 | @classmethod 12 | def default_params(cls): 13 | params = super().default_params() 14 | params.update({ 15 | 'hidden_size': 128, 16 | 'graph_rnn_cell': 'GRU', # RNN, GRU, or LSTM 17 | 'graph_activation_function': "tanh", 18 | "message_aggregation_function": "sum", 19 | 'graph_layer_input_dropout_keep_prob': 1.0, 20 | 'graph_dense_between_every_num_gnn_layers': 10000, 21 | 'graph_residual_connection_every_num_layers': 10000, 22 | }) 23 | return params 24 | 25 | @staticmethod 26 | def name(params: Dict[str, Any]) -> str: 27 | return "GGNN" 28 | 29 | def __init__(self, params: Dict[str, Any], task: Sparse_Graph_Task, run_id: str, result_dir: str) -> None: 30 | super().__init__(params, task, run_id, result_dir) 31 | 32 | def _apply_gnn_layer(self, 33 | node_representations: tf.Tensor, 34 | adjacency_lists: List[tf.Tensor], 35 | type_to_num_incoming_edges: tf.Tensor, 36 | num_timesteps: int) -> tf.Tensor: 37 | return sparse_ggnn_layer( 38 | node_embeddings=node_representations, 39 | adjacency_lists=adjacency_lists, 40 | state_dim=self.params['hidden_size'], 41 | num_timesteps=num_timesteps, 42 | gated_unit_type=self.params['graph_rnn_cell'], 43 | activation_function=self.params['graph_activation_function'], 44 | message_aggregation_function=self.params['message_aggregation_function'], 45 | ) 46 | -------------------------------------------------------------------------------- /models/gnn_edge_mlp_model.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any, List 2 | 3 | import tensorflow as tf 4 | 5 | from .sparse_graph_model import Sparse_Graph_Model 6 | from tasks import Sparse_Graph_Task 7 | from gnns import sparse_gnn_edge_mlp_layer 8 | 9 | 10 | class GNN_Edge_MLP_Model(Sparse_Graph_Model): 11 | @classmethod 12 | def default_params(cls): 13 | params = super().default_params() 14 | params.update({ 15 | 'max_nodes_in_batch': 25000, 16 | 'hidden_size': 128, 17 | "graph_activation_function": "gelu", 18 | "message_aggregation_function": "sum", 19 | 'graph_inter_layer_norm': True, 20 | 'use_target_state_as_input': True, 21 | 'num_edge_hidden_layers': 1, 22 | }) 23 | return params 24 | 25 | @staticmethod 26 | def name(params: Dict[str, Any]) -> str: 27 | return "GNN-Edge-MLP%i" % (params['num_edge_hidden_layers']) 28 | 29 | def __init__(self, params: Dict[str, Any], task: Sparse_Graph_Task, run_id: str, result_dir: str) -> None: 30 | super().__init__(params, task, run_id, result_dir) 31 | 32 | def _apply_gnn_layer(self, 33 | node_representations: tf.Tensor, 34 | adjacency_lists: List[tf.Tensor], 35 | type_to_num_incoming_edges: tf.Tensor, 36 | num_timesteps: int, 37 | ) -> tf.Tensor: 38 | return sparse_gnn_edge_mlp_layer( 39 | node_embeddings=node_representations, 40 | adjacency_lists=adjacency_lists, 41 | type_to_num_incoming_edges=type_to_num_incoming_edges, 42 | state_dim=self.params['hidden_size'], 43 | num_timesteps=num_timesteps, 44 | activation_function=self.params['graph_activation_function'], 45 | message_aggregation_function=self.params['message_aggregation_function'], 46 | use_target_state_as_input=self.params['use_target_state_as_input'], 47 | num_edge_hidden_layers=self.params['num_edge_hidden_layers'], 48 | ) 49 | -------------------------------------------------------------------------------- /models/gnn_film_model.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any, List 2 | 3 | import tensorflow as tf 4 | 5 | from .sparse_graph_model import Sparse_Graph_Model 6 | from tasks import Sparse_Graph_Task 7 | from gnns import sparse_gnn_film_layer 8 | 9 | 10 | class GNN_FiLM_Model(Sparse_Graph_Model): 11 | @classmethod 12 | def default_params(cls): 13 | params = super().default_params() 14 | params.update({ 15 | "hidden_size": 128, 16 | "graph_activation_function": "ReLU", 17 | "message_aggregation_function": "sum", 18 | "normalize_messages_by_num_incoming": False, 19 | }) 20 | return params 21 | 22 | @staticmethod 23 | def name(params: Dict[str, Any]) -> str: 24 | return "GNN-FiLM" 25 | 26 | def __init__(self, params: Dict[str, Any], task: Sparse_Graph_Task, run_id: str, result_dir: str) -> None: 27 | super().__init__(params, task, run_id, result_dir) 28 | 29 | def _apply_gnn_layer(self, 30 | node_representations: tf.Tensor, 31 | adjacency_lists: List[tf.Tensor], 32 | type_to_num_incoming_edges: tf.Tensor, 33 | num_timesteps: int) -> tf.Tensor: 34 | return sparse_gnn_film_layer( 35 | node_embeddings=node_representations, 36 | adjacency_lists=adjacency_lists, 37 | type_to_num_incoming_edges=type_to_num_incoming_edges, 38 | state_dim=self.params['hidden_size'], 39 | num_timesteps=num_timesteps, 40 | activation_function=self.params['graph_activation_function'], 41 | message_aggregation_function=self.params['message_aggregation_function'], 42 | normalize_by_num_incoming=self.params["normalize_messages_by_num_incoming"], 43 | ) 44 | -------------------------------------------------------------------------------- /models/rgat_model.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any, List 2 | 3 | import tensorflow as tf 4 | 5 | from .sparse_graph_model import Sparse_Graph_Model 6 | from tasks import Sparse_Graph_Task 7 | from gnns import sparse_rgat_layer 8 | 9 | 10 | class RGAT_Model(Sparse_Graph_Model): 11 | @classmethod 12 | def default_params(cls): 13 | params = super().default_params() 14 | params.update({ 15 | 'hidden_size': 128, 16 | 'num_heads': 4, 17 | 'graph_activation_function': 'tanh', 18 | 'graph_layer_input_dropout_keep_prob': 1.0, 19 | 'graph_dense_between_every_num_gnn_layers': 10000, 20 | 'graph_residual_connection_every_num_layers': 10000, 21 | }) 22 | return params 23 | 24 | @staticmethod 25 | def name(params: Dict[str, Any]) -> str: 26 | return "RGAT" 27 | 28 | def __init__(self, params: Dict[str, Any], task: Sparse_Graph_Task, run_id: str, result_dir: str) -> None: 29 | super().__init__(params, task, run_id, result_dir) 30 | 31 | def _apply_gnn_layer(self, 32 | node_representations: tf.Tensor, 33 | adjacency_lists: List[tf.Tensor], 34 | type_to_num_incoming_edges: tf.Tensor, 35 | num_timesteps: int) -> tf.Tensor: 36 | return sparse_rgat_layer( 37 | node_embeddings=node_representations, 38 | adjacency_lists=adjacency_lists, 39 | state_dim=self.params['hidden_size'], 40 | num_timesteps=num_timesteps, 41 | num_heads=self.params['num_heads'], 42 | activation_function=self.params['graph_activation_function'], 43 | ) 44 | -------------------------------------------------------------------------------- /models/rgcn_model.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any, List 2 | 3 | import tensorflow as tf 4 | 5 | from .sparse_graph_model import Sparse_Graph_Model 6 | from tasks import Sparse_Graph_Task 7 | from gnns import sparse_rgcn_layer 8 | 9 | 10 | class RGCN_Model(Sparse_Graph_Model): 11 | @classmethod 12 | def default_params(cls): 13 | params = super().default_params() 14 | params.update({ 15 | 'hidden_size': 128, 16 | "graph_activation_function": "ReLU", 17 | "message_aggregation_function": "sum", 18 | 'graph_layer_input_dropout_keep_prob': 1.0, 19 | 'graph_dense_between_every_num_gnn_layers': 10000, 20 | 'graph_residual_connection_every_num_layers': 10000, 21 | }) 22 | return params 23 | 24 | @staticmethod 25 | def name(params: Dict[str, Any]) -> str: 26 | return "RGCN" 27 | 28 | def __init__(self, params: Dict[str, Any], task: Sparse_Graph_Task, run_id: str, result_dir: str) -> None: 29 | super().__init__(params, task, run_id, result_dir) 30 | 31 | def _apply_gnn_layer(self, 32 | node_representations: tf.Tensor, 33 | adjacency_lists: List[tf.Tensor], 34 | type_to_num_incoming_edges: tf.Tensor, 35 | num_timesteps: int) -> tf.Tensor: 36 | return sparse_rgcn_layer( 37 | node_embeddings=node_representations, 38 | adjacency_lists=adjacency_lists, 39 | type_to_num_incoming_edges=type_to_num_incoming_edges, 40 | state_dim=self.params['hidden_size'], 41 | num_timesteps=num_timesteps, 42 | activation_function=self.params['graph_activation_function'], 43 | message_aggregation_function=self.params['message_aggregation_function'], 44 | ) 45 | -------------------------------------------------------------------------------- /models/rgdcn_model.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any, List 2 | 3 | import tensorflow as tf 4 | 5 | from .sparse_graph_model import Sparse_Graph_Model 6 | from tasks import Sparse_Graph_Task 7 | from gnns import sparse_rgdcn_layer 8 | 9 | 10 | class RGDCN_Model(Sparse_Graph_Model): 11 | @classmethod 12 | def default_params(cls): 13 | params = super().default_params() 14 | params.update({ 15 | 'max_nodes_in_batch': 25000, 16 | 'hidden_size': 128, 17 | 'num_channels': 8, 18 | "use_full_state_for_channel_weights": False, 19 | "tie_channel_weights": False, 20 | "graph_activation_function": "ReLU", 21 | "message_aggregation_function": "sum", 22 | 'graph_inter_layer_norm': True, 23 | }) 24 | return params 25 | 26 | @staticmethod 27 | def name(params: Dict[str, Any]) -> str: 28 | return "RGDCN" 29 | 30 | def __init__(self, params: Dict[str, Any], task: Sparse_Graph_Task, run_id: str, result_dir: str) -> None: 31 | params['channel_dim'] = params['hidden_size'] // params['num_channels'] 32 | super().__init__(params, task, run_id, result_dir) 33 | 34 | def _apply_gnn_layer(self, 35 | node_representations: tf.Tensor, 36 | adjacency_lists: List[tf.Tensor], 37 | type_to_num_incoming_edges: tf.Tensor, 38 | num_timesteps: int) -> tf.Tensor: 39 | return sparse_rgdcn_layer( 40 | node_embeddings=node_representations, 41 | adjacency_lists=adjacency_lists, 42 | type_to_num_incoming_edges=type_to_num_incoming_edges, 43 | num_channels=self.params['num_channels'], 44 | channel_dim=self.params['channel_dim'], 45 | num_timesteps=num_timesteps, 46 | use_full_state_for_channel_weights=self.params['use_full_state_for_channel_weights'], 47 | tie_channel_weights=self.params['tie_channel_weights'], 48 | activation_function=self.params['graph_activation_function'], 49 | message_aggregation_function=self.params['message_aggregation_function'], 50 | ) 51 | -------------------------------------------------------------------------------- /models/rgin_model.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any, List 2 | 3 | import tensorflow as tf 4 | 5 | from .sparse_graph_model import Sparse_Graph_Model 6 | from tasks import Sparse_Graph_Task 7 | from gnns import sparse_rgin_layer 8 | 9 | 10 | class RGIN_Model(Sparse_Graph_Model): 11 | @classmethod 12 | def default_params(cls): 13 | params = super().default_params() 14 | params.update({ 15 | 'hidden_size': 128, 16 | "graph_activation_function": "ReLU", 17 | 'message_aggregation_function': "sum", 18 | 'graph_dense_between_every_num_gnn_layers': 10000, 19 | 'graph_inter_layer_norm': True, 20 | 'use_target_state_as_input': False, 21 | 'graph_num_edge_MLP_hidden_layers': 1, 22 | 'graph_num_aggr_MLP_hidden_layers': None, 23 | }) 24 | return params 25 | 26 | @staticmethod 27 | def name(params: Dict[str, Any]) -> str: 28 | return "RGIN" 29 | 30 | def __init__(self, params: Dict[str, Any], task: Sparse_Graph_Task, run_id: str, result_dir: str) -> None: 31 | super().__init__(params, task, run_id, result_dir) 32 | 33 | def _apply_gnn_layer(self, 34 | node_representations: tf.Tensor, 35 | adjacency_lists: List[tf.Tensor], 36 | type_to_num_incoming_edges: tf.Tensor, 37 | num_timesteps: int, 38 | ) -> tf.Tensor: 39 | return sparse_rgin_layer( 40 | node_embeddings=node_representations, 41 | adjacency_lists=adjacency_lists, 42 | state_dim=self.params['hidden_size'], 43 | num_timesteps=num_timesteps, 44 | activation_function=self.params['graph_activation_function'], 45 | message_aggregation_function=self.params['message_aggregation_function'], 46 | use_target_state_as_input=self.params['use_target_state_as_input'], 47 | num_edge_MLP_hidden_layers=self.params['graph_num_edge_MLP_hidden_layers'], 48 | num_aggr_MLP_hidden_layers=self.params['graph_num_aggr_MLP_hidden_layers'], 49 | ) 50 | -------------------------------------------------------------------------------- /models/sparse_graph_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import random 4 | import time 5 | from abc import ABC, abstractmethod 6 | from typing import Any, Dict, Optional, Tuple, List, Iterable 7 | 8 | import tensorflow as tf 9 | import numpy as np 10 | from dpu_utils.utils import ThreadedIterator, RichPath 11 | 12 | from tasks import Sparse_Graph_Task, DataFold 13 | from utils import get_activation 14 | 15 | 16 | class Sparse_Graph_Model(ABC): 17 | """ 18 | Abstract superclass of all graph models, defining core model functionality 19 | such as training loops, interaction with tasks, etc. Needs to be extended by 20 | concrete GNN implementations. 21 | """ 22 | @classmethod 23 | def default_params(cls): 24 | return { 25 | 'max_nodes_in_batch': 50000, 26 | 27 | 'graph_num_layers': 8, 28 | 'graph_num_timesteps_per_layer': 1, 29 | 30 | 'graph_layer_input_dropout_keep_prob': 0.8, 31 | 'graph_dense_between_every_num_gnn_layers': 1, 32 | 'graph_model_activation_function': 'tanh', 33 | 'graph_residual_connection_every_num_layers': 2, 34 | 'graph_inter_layer_norm': False, 35 | 36 | 'max_epochs': 10000, 37 | 'patience': 25, 38 | 'optimizer': 'Adam', 39 | 'learning_rate': 0.001, 40 | 'learning_rate_decay': 0.98, 41 | 'lr_for_num_graphs_per_batch': None, # The LR is normalised so that we use it for exactly that number of graphs; no normalisation happens if the value is None 42 | 'momentum': 0.85, 43 | 'clamp_gradient_norm': 1.0, 44 | 'random_seed': 0, 45 | } 46 | 47 | @staticmethod 48 | @abstractmethod 49 | def name(params: Dict[str, Any]) -> str: 50 | raise NotImplementedError() 51 | 52 | def __init__(self, 53 | params: Dict[str, Any], 54 | task: Sparse_Graph_Task, 55 | run_id: str, 56 | result_dir: str) -> None: 57 | self.params = params 58 | self.task = task 59 | self.run_id = run_id 60 | self.result_dir = result_dir 61 | 62 | self.__placeholders = {} # type: Dict[str, tf.Tensor] 63 | self.__ops = {} # type: Dict[str, tf.Tensor] 64 | 65 | # Build the actual model 66 | random.seed(params['random_seed']) 67 | np.random.seed(params['random_seed']) 68 | config = tf.ConfigProto() 69 | config.gpu_options.allow_growth = True 70 | self.graph = tf.Graph() 71 | self.sess = tf.Session(graph=self.graph, config=config) 72 | with self.graph.as_default(): 73 | tf.set_random_seed(self.params['random_seed']) 74 | self.__make_model() 75 | 76 | @property 77 | def log_file(self): 78 | return os.path.join(self.result_dir, "%s.log" % self.run_id) 79 | 80 | @property 81 | def best_model_file(self): 82 | return os.path.join(self.result_dir, "%s_best_model.pickle" % self.run_id) 83 | 84 | # -------------------- Model Saving/Loading -------------------- 85 | def initialize_model(self) -> None: 86 | with self.sess.graph.as_default(): 87 | init_op = tf.group(tf.global_variables_initializer(), 88 | tf.local_variables_initializer()) 89 | self.sess.run(init_op) 90 | 91 | def save_model(self, path: str) -> None: 92 | vars_to_retrieve = {} # type: Dict[str, tf.Tensor] 93 | for variable in self.sess.graph.get_collection(tf.GraphKeys.GLOBAL_VARIABLES): 94 | assert variable.name not in vars_to_retrieve 95 | vars_to_retrieve[variable.name] = variable 96 | weights_to_save = self.sess.run(vars_to_retrieve) 97 | 98 | data_to_save = { 99 | "model_class": self.name(self.params), 100 | "task_class": self.task.name(), 101 | "model_params": self.params, 102 | "task_params": self.task.params, 103 | "task_metadata": self.task.get_metadata(), 104 | "weights": weights_to_save, 105 | } 106 | with open(path, 'wb') as out_file: 107 | pickle.dump(data_to_save, out_file, pickle.HIGHEST_PROTOCOL) 108 | 109 | def load_weights(self, weights: Dict[str, np.ndarray]) -> None: 110 | with self.graph.as_default(): 111 | variables_to_initialize = [] 112 | with tf.name_scope("restore"): 113 | restore_ops = [] 114 | used_vars = set() 115 | for variable in self.graph.get_collection(tf.GraphKeys.GLOBAL_VARIABLES): 116 | used_vars.add(variable.name) 117 | if variable.name in weights: 118 | restore_ops.append(variable.assign(weights[variable.name])) 119 | else: 120 | print('Freshly initializing %s since no saved value was found.' % variable.name) 121 | variables_to_initialize.append(variable) 122 | for var_name in weights: 123 | if var_name not in used_vars: 124 | print('Saved weights for %s not used by model.' % var_name) 125 | restore_ops.append(tf.variables_initializer(variables_to_initialize)) 126 | self.sess.run(restore_ops) 127 | 128 | # -------------------- Model Construction -------------------- 129 | def __make_model(self): 130 | self.task.make_task_input_model(self.__placeholders, self.__ops) 131 | 132 | with tf.variable_scope("graph_model"): 133 | self.__placeholders['num_graphs'] = \ 134 | tf.placeholder(dtype=tf.int64, shape=[], name='num_graphs') 135 | self.__placeholders['graph_layer_input_dropout_keep_prob'] = \ 136 | tf.placeholder_with_default(1.0, shape=[], name='graph_layer_input_dropout_keep_prob') 137 | 138 | self.__build_graph_propagation_model() 139 | 140 | self.task.make_task_output_model(self.__placeholders, self.__ops) 141 | 142 | tf.summary.scalar('loss', self.__ops['task_metrics']['loss']) 143 | total_num_graphs_variable = \ 144 | tf.get_variable(name='total_num_graphs', 145 | shape=(), 146 | dtype=tf.int64, 147 | initializer=tf.zeros_initializer, 148 | trainable=False) 149 | self.__ops['total_num_graphs'] = \ 150 | tf.assign_add(total_num_graphs_variable, self.__placeholders['num_graphs']) 151 | self.__ops['tf_summaries'] = tf.summary.merge_all() 152 | 153 | # Print some stats: 154 | num_pars = 0 155 | for variable in tf.trainable_variables(): 156 | num_pars += np.prod([dim.value for dim in variable.get_shape()]) 157 | self.log_line("Model has %i parameters." % num_pars) 158 | 159 | # Now add the optimizer bits: 160 | self.__make_train_step() 161 | 162 | def __build_graph_propagation_model(self) -> tf.Tensor: 163 | h_dim = self.params['hidden_size'] 164 | activation_fn = get_activation(self.params['graph_model_activation_function']) 165 | if self.task.initial_node_feature_size != self.params['hidden_size']: 166 | self.__ops['projected_node_features'] = \ 167 | tf.keras.layers.Dense(units=h_dim, 168 | use_bias=False, 169 | activation=activation_fn, 170 | )(self.__ops['initial_node_features']) 171 | else: 172 | self.__ops['projected_node_features'] = self.__ops['initial_node_features'] 173 | 174 | cur_node_representations = self.__ops['projected_node_features'] 175 | last_residual_representations = tf.zeros_like(cur_node_representations) 176 | for layer_idx in range(self.params['graph_num_layers']): 177 | with tf.variable_scope('gnn_layer_%i' % layer_idx): 178 | cur_node_representations = \ 179 | tf.nn.dropout(cur_node_representations, rate=1.0 - self.__placeholders['graph_layer_input_dropout_keep_prob']) 180 | if layer_idx % self.params['graph_residual_connection_every_num_layers'] == 0: 181 | t = cur_node_representations 182 | if layer_idx > 0: 183 | cur_node_representations += last_residual_representations 184 | cur_node_representations /= 2 185 | last_residual_representations = t 186 | cur_node_representations = \ 187 | self._apply_gnn_layer( 188 | cur_node_representations, 189 | self.__ops['adjacency_lists'], 190 | self.__ops['type_to_num_incoming_edges'], 191 | self.params['graph_num_timesteps_per_layer']) 192 | if self.params['graph_inter_layer_norm']: 193 | cur_node_representations = tf.contrib.layers.layer_norm(cur_node_representations) 194 | if layer_idx % self.params['graph_dense_between_every_num_gnn_layers'] == 0: 195 | cur_node_representations = \ 196 | tf.keras.layers.Dense(units=h_dim, 197 | use_bias=False, 198 | activation=activation_fn, 199 | name="Dense", 200 | )(cur_node_representations) 201 | 202 | self.__ops['final_node_representations'] = cur_node_representations 203 | 204 | @abstractmethod 205 | def _apply_gnn_layer(self, 206 | node_representations: tf.Tensor, 207 | adjacency_lists: List[tf.Tensor], 208 | type_to_num_incoming_edges: tf.Tensor, 209 | num_timesteps: int) -> tf.Tensor: 210 | """ 211 | Run a GNN layer on a graph. 212 | 213 | Arguments: 214 | node_features: float32 tensor of shape [V, D], where V is the number of nodes. 215 | adjacency_lists: list of L int32 tensors of shape [E, 2], where L is the number 216 | of edge types and E the number of edges of that type. 217 | Hence, adjacency_lists[l][e,:] == [u, v] means that u has an edge of type l 218 | to v. 219 | type_to_num_incoming_edges: int32 tensor of shape [L, V], where L is the number 220 | of edge types. 221 | type_to_num_incoming_edges[l, v] = k indicates that node v has k incoming 222 | edges of type l. 223 | num_timesteps: Number of propagation steps in to run in this GNN layer. 224 | """ 225 | raise Exception("Models have to implement _apply_gnn_layer!") 226 | 227 | def __make_train_step(self): 228 | trainable_vars = self.sess.graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) 229 | 230 | learning_rate = self.params['learning_rate'] 231 | 232 | lr_for_num_graphs_per_batch = self.params.get('lr_for_num_graphs_per_batch') 233 | if lr_for_num_graphs_per_batch is not None: 234 | # This ensures that the learning rate _per_ graph in the batch stays the same, 235 | # which can be important for tasks in which the loss is defined per-graph 236 | # (e.g., full graph regression tasks, or one-node-per-graph classification) 237 | lr_norm_factor = (tf.cast(self.__placeholders['num_graphs'], tf.float32) 238 | / tf.constant(lr_for_num_graphs_per_batch, dtype=tf.float32)) 239 | learning_rate *= lr_norm_factor 240 | 241 | optimizer_name = self.params['optimizer'].lower() 242 | if optimizer_name == 'sgd': 243 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) 244 | elif optimizer_name == 'rmsprop': 245 | optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate, 246 | decay=self.params['learning_rate_decay'], 247 | momentum=self.params['momentum']) 248 | elif optimizer_name == 'adam': 249 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 250 | else: 251 | raise Exception('Unknown optimizer "%s".' % (self.params['optimizer'])) 252 | 253 | grads_and_vars = optimizer.compute_gradients(self.__ops['task_metrics']['loss'], var_list=trainable_vars) 254 | clipped_grads = [] 255 | for grad, var in grads_and_vars: 256 | if grad is not None: 257 | clipped_grads.append((tf.clip_by_norm(grad, self.params['clamp_gradient_norm']), var)) 258 | else: 259 | clipped_grads.append((grad, var)) 260 | self.__ops['train_step'] = optimizer.apply_gradients(clipped_grads) 261 | 262 | # -------------------- Training Loop -------------------- 263 | def __run_epoch(self, 264 | epoch_name: str, 265 | data: Iterable[Any], 266 | data_fold: DataFold, 267 | quiet: Optional[bool] = False, 268 | summary_writer: Optional[tf.summary.FileWriter] = None) \ 269 | -> Tuple[float, List[Dict[str, Any]], int, float, float, float]: 270 | batch_iterator = self.task.make_minibatch_iterator( 271 | data, data_fold, self.__placeholders, self.params['max_nodes_in_batch']) 272 | batch_iterator = ThreadedIterator(batch_iterator, max_queue_size=5) 273 | task_metric_results = [] 274 | start_time = time.time() 275 | processed_graphs, processed_nodes, processed_edges = 0, 0, 0 276 | epoch_loss = 0.0 277 | for step, batch_data in enumerate(batch_iterator): 278 | if data_fold == DataFold.TRAIN: 279 | batch_data.feed_dict[self.__placeholders['graph_layer_input_dropout_keep_prob']] = \ 280 | self.params['graph_layer_input_dropout_keep_prob'] 281 | batch_data.feed_dict[self.__placeholders['num_graphs']] = batch_data.num_graphs 282 | # Collect some statistics: 283 | processed_graphs += batch_data.num_graphs 284 | processed_nodes += batch_data.num_nodes 285 | processed_edges += batch_data.num_edges 286 | 287 | fetch_dict = {'task_metrics': self.__ops['task_metrics']} 288 | if summary_writer: 289 | fetch_dict['tf_summaries'] = self.__ops['tf_summaries'] 290 | fetch_dict['total_num_graphs'] = self.__ops['total_num_graphs'] 291 | if data_fold == DataFold.TRAIN: 292 | fetch_dict['train_step'] = self.__ops['train_step'] 293 | fetch_results = self.sess.run(fetch_dict, feed_dict=batch_data.feed_dict) 294 | epoch_loss += fetch_results['task_metrics']['loss'] * batch_data.num_graphs 295 | task_metric_results.append(fetch_results['task_metrics']) 296 | 297 | if not quiet: 298 | print("Running %s, batch %i (has %i graphs). Loss so far: %.4f" 299 | % (epoch_name, step, batch_data.num_graphs, epoch_loss / processed_graphs), 300 | end='\r') 301 | if summary_writer: 302 | summary_writer.add_summary(fetch_results['tf_summaries'], fetch_results['total_num_graphs']) 303 | 304 | assert processed_graphs > 0, "Can't run epoch over empty dataset." 305 | 306 | epoch_time = time.time() - start_time 307 | per_graph_loss = epoch_loss / processed_graphs 308 | graphs_per_sec = processed_graphs / epoch_time 309 | nodes_per_sec = processed_nodes / epoch_time 310 | edges_per_sec = processed_edges / epoch_time 311 | return per_graph_loss, task_metric_results, processed_graphs, graphs_per_sec, nodes_per_sec, edges_per_sec 312 | 313 | def log_line(self, msg): 314 | with open(self.log_file, 'a') as log_fh: 315 | log_fh.write(msg + '\n') 316 | print(msg) 317 | 318 | def train(self, quiet: Optional[bool] = False, tf_summary_path: Optional[str] = None): 319 | total_time_start = time.time() 320 | with self.graph.as_default(): 321 | if tf_summary_path is not None: 322 | os.makedirs(tf_summary_path, exist_ok=True) 323 | train_writer = tf.summary.FileWriter(os.path.join(tf_summary_path, "train"), graph=self.graph) 324 | valid_writer = tf.summary.FileWriter(os.path.join(tf_summary_path, "valid")) 325 | else: 326 | train_writer, valid_writer = None, None 327 | 328 | (best_valid_metric, best_val_metric_epoch, best_val_metric_descr) = (float("+inf"), 0, "") 329 | for epoch in range(1, self.params['max_epochs'] + 1): 330 | self.log_line("== Epoch %i" % epoch) 331 | 332 | train_loss, train_task_metrics, train_num_graphs, train_graphs_p_s, train_nodes_p_s, train_edges_p_s = \ 333 | self.__run_epoch("epoch %i (training)" % epoch, 334 | self.task._loaded_data[DataFold.TRAIN], 335 | DataFold.TRAIN, 336 | quiet=quiet, 337 | summary_writer=train_writer) 338 | if not quiet: 339 | print("\r\x1b[K", end='') 340 | self.log_line(" Train: loss: %.5f || %s || graphs/sec: %.2f | nodes/sec: %.0f | edges/sec: %.0f" 341 | % (train_loss, 342 | self.task.pretty_print_epoch_task_metrics(train_task_metrics, train_num_graphs), 343 | train_graphs_p_s, train_nodes_p_s, train_edges_p_s)) 344 | 345 | valid_loss, valid_task_metrics, valid_num_graphs, valid_graphs_p_s, valid_nodes_p_s, valid_edges_p_s = \ 346 | self.__run_epoch("epoch %i (validation)" % epoch, 347 | self.task._loaded_data[DataFold.VALIDATION], 348 | DataFold.VALIDATION, 349 | quiet=quiet, 350 | summary_writer=valid_writer) 351 | if not quiet: 352 | print("\r\x1b[K", end='') 353 | early_stopping_metric = self.task.early_stopping_metric(valid_task_metrics, valid_num_graphs) 354 | valid_metric_descr = \ 355 | self.task.pretty_print_epoch_task_metrics(valid_task_metrics, valid_num_graphs) 356 | self.log_line(" Valid: loss: %.5f || %s || graphs/sec: %.2f | nodes/sec: %.0f | edges/sec: %.0f" 357 | % (valid_loss, valid_metric_descr, valid_graphs_p_s, valid_nodes_p_s, valid_edges_p_s)) 358 | 359 | if early_stopping_metric < best_valid_metric: 360 | self.save_model(self.best_model_file) 361 | self.log_line(" (Best epoch so far, target metric decreased to %.5f from %.5f. Saving to '%s')" 362 | % (early_stopping_metric, best_valid_metric, self.best_model_file)) 363 | best_valid_metric = early_stopping_metric 364 | best_val_metric_epoch = epoch 365 | best_val_metric_descr = valid_metric_descr 366 | elif epoch - best_val_metric_epoch >= self.params['patience']: 367 | total_time = time.time() - total_time_start 368 | self.log_line("Stopping training after %i epochs without improvement on validation loss." % self.params['patience']) 369 | self.log_line("Training took %is. Best validation results: %s" 370 | % (total_time, best_val_metric_descr)) 371 | break 372 | 373 | def test(self, path: RichPath, quiet: Optional[bool] = False): 374 | with self.graph.as_default(): 375 | self.log_line("== Running Test on %s ==" % (path,)) 376 | data = self.task._loaded_data.get(DataFold.TEST) 377 | if data is None: 378 | data = self.task.load_eval_data_from_path(path) 379 | test_loss, test_task_metrics, test_num_graphs, _, _, _ = \ 380 | self.__run_epoch("Test", data, DataFold.TEST, quiet=quiet) 381 | if not quiet: 382 | print("\r\x1b[K", end='') 383 | self.log_line("Loss %.5f on %i graphs" % (test_loss, test_num_graphs)) 384 | self.log_line("Metrics: %s" % self.task.pretty_print_epoch_task_metrics(test_task_metrics, test_num_graphs)) 385 | -------------------------------------------------------------------------------- /reorg_varmisuse_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Set this to the path of the downloaded dataset: 4 | DOWNLOADED_ZIP="graph-dataset.zip" 5 | # Set this to the path where the data will be extracted to (requires ~15 GB of space): 6 | OUTDIR="reorged-varmisuse-dataset" 7 | 8 | ### The following bits should not require any changes: 9 | CODEDIR=$(dirname $0) 10 | TESTONLY_PROJS="commandline humanizer lean" 11 | 12 | for fold in train valid test testonly; do 13 | mkdir -p "${OUTDIR}/graphs-${fold}-raw" 14 | done 15 | 16 | 7za x "${DOWNLOADED_ZIP}" 17 | 18 | for test_proj in $TESTONLY_PROJS; do 19 | mv graph-dataset/${test_proj}/graphs-test/* "${OUTDIR}/graphs-testonly-raw" 20 | rm -rf graph-dataset/${test_proj} 21 | done 22 | 23 | for fold in train valid test; do 24 | mv graph-dataset/*/graphs-${fold}/* "${OUTDIR}/graphs-${fold}-raw" 25 | done 26 | 27 | for file in "${OUTDIR}"/*/*.gz; do 28 | new_file=$(echo "${file}" | sed -e 's/.gz$/.json.gz/') 29 | mv "${file}" "${new_file}" 30 | done 31 | 32 | for fold in train valid test testonly; do 33 | python3 "$CODEDIR/utils/varmisuse_data_splitter.py" "${OUTDIR}/graphs-${fold}-raw/" "${OUTDIR}/graphs-${fold}/" 34 | rm -rf "${OUTDIR}/graphs-${fold}-raw/" 35 | done 36 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | docopt 2 | numpy 3 | dpu-utils>=0.1.30 4 | tensorflow-gpu>=1.13.1 -------------------------------------------------------------------------------- /run_ppi_benchs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Usage: 4 | run_ppi_benchs.py [options] LOG_TARGET_DIR 5 | 6 | Options: 7 | -h --help Show this screen. 8 | --num-runs NUM Number of runs to perform for each configuration. [default: 10] 9 | --debug Turn on debugger. 10 | """ 11 | import os 12 | import subprocess 13 | import re 14 | import numpy as np 15 | 16 | from docopt import docopt 17 | from dpu_utils.utils import run_and_debug 18 | 19 | MODEL_TYPES = ["GGNN", "RGCN", "RGAT", "RGIN", "GNN-Edge-MLP0", "GNN-Edge-MLP1", "GNN_FiLM"] 20 | 21 | TEST_RES_RE = re.compile('^Metrics: Avg MicroF1: (0.\d+)') 22 | TIME_RE = re.compile('^Training took (\d+)s') 23 | 24 | 25 | def run(args): 26 | target_dir = args['LOG_TARGET_DIR'] 27 | os.makedirs(target_dir, exist_ok=True) 28 | print("Starting PPI experiments, will write logfiles for runs into %s." % target_dir) 29 | num_seeds = int(args.get('--num-runs')) 30 | print("| %- 13s | %- 17s | %- 10s |" % ("Model", "Avg. MicroF1", "Avg. Time")) 31 | print("|" + "-" * 15 + "|" + "-" * 19 + "|" + "-" * 12 + "|") 32 | for model in MODEL_TYPES: 33 | model_f1s = [] 34 | model_times = [] 35 | for seed in range(1, 1 + num_seeds): 36 | logfile = os.path.join(target_dir, "%s_seed%i.txt" % (model.lower(), seed)) 37 | with open(logfile, "w") as log_fh: 38 | subprocess.check_call(["python", 39 | "train.py", 40 | "--quiet", 41 | "--run-test", 42 | model, 43 | "PPI", 44 | "--model-param-overrides", 45 | "{\"random_seed\": %i}" % seed, 46 | ], 47 | stdout=log_fh, 48 | stderr=log_fh) 49 | with open(logfile, "r") as log_fh: 50 | for line in log_fh.readlines(): 51 | time_match = TIME_RE.search(line) 52 | res_match = TEST_RES_RE.search(line) 53 | if time_match is not None: 54 | model_times.append(int(time_match.groups()[0])) 55 | elif res_match is not None: 56 | model_f1s.append(float(res_match.groups()[0])) 57 | 58 | print("| %- 13s | %.3f (+/- %.3f) | % 4.1f |" 59 | % (model, 60 | np.mean(model_f1s), 61 | np.std(model_f1s), 62 | np.mean(model_times))) 63 | 64 | 65 | if __name__ == "__main__": 66 | args = docopt(__doc__) 67 | run_and_debug(lambda: run(args), enable_debugging=args['--debug']) 68 | -------------------------------------------------------------------------------- /run_qm9_benchs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Usage: 4 | run_qm9_benchs.py [options] LOG_TARGET_DIR 5 | 6 | Options: 7 | -h --help Show this screen. 8 | --num-runs NUM Number of runs to perform for each configuration. [default: 5] 9 | --debug Turn on debugger. 10 | """ 11 | import os 12 | import subprocess 13 | import re 14 | import numpy as np 15 | 16 | from docopt import docopt 17 | from dpu_utils.utils import run_and_debug 18 | 19 | MODEL_TYPES = ["GGNN", "RGCN", "RGAT", "RGIN", "GNN-Edge-MLP0", "GNN-Edge-MLP1", "GNN_FiLM"] 20 | TASKS = ["mu", "alpha", "HOMO", "LUMO", "gap", "R2", "ZPVE", "U0", "U", "H", "G", "Cv", "Omega"] 21 | 22 | TEST_RES_RE = re.compile('^Metrics: MAEs: \d+:([0-9.]+) \| Error Ratios: \d+:([0-9.]+)') 23 | TIME_RE = re.compile('^Training took (\d+)s') 24 | 25 | 26 | def run(args): 27 | target_dir = args['LOG_TARGET_DIR'] 28 | os.makedirs(target_dir, exist_ok=True) 29 | print("Starting QM9 experiments, will write logfiles for runs into %s." % target_dir) 30 | num_seeds = int(args.get('--num-runs')) 31 | results = {} 32 | for model in MODEL_TYPES: 33 | results[model] = [{"test_errors": [], "times": []} for _ in TASKS] 34 | for task_id in range(len(TASKS)): 35 | for seed in range(1, 1 + num_seeds): 36 | logfile = os.path.join(target_dir, "%s_task%i_seed%i.txt" % (model, task_id, seed)) 37 | with open(logfile, "w") as log_fh: 38 | subprocess.check_call(["python", 39 | "train.py", 40 | "--run-test", 41 | model, 42 | "QM9", 43 | "--model-param-overrides", 44 | "{\"random_seed\": %i}" % seed, 45 | "--task-param-overrides", 46 | "{\"task_ids\": [%i]}" % task_id, 47 | ], 48 | stdout=log_fh, 49 | stderr=log_fh) 50 | with open(logfile, "r") as log_fh: 51 | for line in log_fh.readlines(): 52 | time_match = TIME_RE.search(line) 53 | res_match = TEST_RES_RE.search(line) 54 | if time_match is not None: 55 | results[model][task_id]["times"].append(int(time_match.groups()[0])) 56 | elif res_match is not None: 57 | results[model][task_id]["test_errors"].append(float(res_match.groups()[1])) 58 | 59 | row_fmt_string = "%7s " + "&% 35s " * len(MODEL_TYPES) + "\\\\" 60 | print(row_fmt_string % tuple([""] + MODEL_TYPES)) 61 | for task_id, task in enumerate(TASKS): 62 | model_results = [] 63 | for model in MODEL_TYPES: 64 | err = np.mean(results[model][task_id]["test_errors"]) 65 | std = np.std(results[model][task_id]["test_errors"]) 66 | time_in_min = np.mean(results[model][task_id]["times"]) / 60 67 | model_results.append("%.2f & ($\pm %.2f$; $%.1f$min)" % (err, std, time_in_min)) 68 | print(row_fmt_string % tuple([task] + model_results)) 69 | 70 | 71 | if __name__ == "__main__": 72 | args = docopt(__doc__) 73 | run_and_debug(lambda: run(args), enable_debugging=args['--debug']) 74 | -------------------------------------------------------------------------------- /run_varmisuse_benchs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Usage: 4 | run_varmisuse_benchs.py [options] LOG_TARGET_DIR 5 | 6 | Options: 7 | -h --help Show this screen. 8 | --num-runs NUM Number of runs to perform for each configuration. [default: 5] 9 | --debug Turn on debugger. 10 | """ 11 | import os 12 | import subprocess 13 | import re 14 | import numpy as np 15 | 16 | from docopt import docopt 17 | from dpu_utils.utils import run_and_debug 18 | 19 | MODEL_TYPES = ["GGNN", "RGCN", "RGAT", "RGIN", "GNN-Edge-MLP0", "GNN-Edge-MLP1", "GNN_FiLM"] 20 | 21 | TEST_RES_RE = re.compile('^Metrics: Accuracy: (0.\d+)') 22 | VALID_RES_RE = re.compile('Best validation results: Accuracy: (0.\d+)') 23 | MODEL_FILE_RE = re.compile('^Loading model from file (.+)\.') 24 | 25 | 26 | def run(args): 27 | target_dir = args['LOG_TARGET_DIR'] 28 | os.makedirs(target_dir, exist_ok=True) 29 | print("Starting VarMisuse experiments, will write logfiles for runs into %s." % target_dir) 30 | num_seeds = int(args.get('--num-runs')) 31 | print("| %- 14s | %- 17s | %- 17s | %- 17s |" % ("Model", 32 | "Valid Acc", 33 | "Test Acc", 34 | "TestOnly Acc")) 35 | print("|" + "-" * 16 + "|" + "-" * 19 + "|" + "-" * 19 + "|" + "-" * 19 + "|") 36 | for model in MODEL_TYPES: 37 | valid_accs, test_accs, testonly_accs = [], [], [] 38 | for seed in range(1, 1 + num_seeds): 39 | logfile = os.path.join(target_dir, "%s_seed%i.txt" % (model.lower(), seed)) 40 | test_logfile = os.path.join(target_dir, "%s_seed%i-testonly.txt" % (model.lower(), seed)) 41 | with open(logfile, "w") as log_fh: 42 | subprocess.check_call(["python", 43 | "train.py", 44 | "--quiet", 45 | "--run-test", 46 | model, 47 | "VarMisuse", 48 | "--model-param-overrides", 49 | "{\"random_seed\": %i}" % seed, 50 | ], 51 | stdout=log_fh, 52 | stderr=log_fh) 53 | model_file = None 54 | with open(logfile, "r") as log_fh: 55 | for line in log_fh.readlines(): 56 | valid_res_match = VALID_RES_RE.search(line) 57 | test_res_match = TEST_RES_RE.search(line) 58 | model_file_match = MODEL_FILE_RE.search(line) 59 | if valid_res_match is not None: 60 | valid_accs.append(float(valid_res_match.groups()[0])) 61 | elif test_res_match is not None: 62 | test_accs.append(float(test_res_match.groups()[0])) 63 | elif model_file_match is not None: 64 | model_file = model_file_match.groups()[0] 65 | 66 | # Run TestOnly 67 | assert model_file is not None, "Could not find saved model file" 68 | with open(test_logfile, "w") as log_fh: 69 | subprocess.check_call(["python", 70 | "test.py", 71 | "--quiet", 72 | model_file, 73 | "data/varmisuse/graphs-testonly", 74 | ], 75 | stdout=log_fh, 76 | stderr=log_fh) 77 | with open(test_logfile, "r") as log_fh: 78 | for line in log_fh.readlines(): 79 | test_res_match = TEST_RES_RE.search(line) 80 | if test_res_match is not None: 81 | testonly_accs.append(float(test_res_match.groups()[0])) 82 | 83 | print("| %- 14s | %.3f (+/- %.3f) | %.3f (+/- %.3f) | %.3f (+/- %.3f) |" 84 | % (model, 85 | np.mean(valid_accs), 86 | np.std(valid_accs), 87 | np.mean(test_accs), 88 | np.std(test_accs), 89 | np.mean(testonly_accs), 90 | np.std(testonly_accs), 91 | )) 92 | 93 | 94 | if __name__ == "__main__": 95 | args = docopt(__doc__) 96 | run_and_debug(lambda: run(args), enable_debugging=args['--debug']) 97 | -------------------------------------------------------------------------------- /tasks/__init__.py: -------------------------------------------------------------------------------- 1 | from .sparse_graph_task import Sparse_Graph_Task, DataFold 2 | from .qm9_task import QM9_Task 3 | from .citation_network_task import Citation_Network_Task 4 | from .ppi_task import PPI_Task 5 | from .varmisuse_task import VarMisuse_Task 6 | -------------------------------------------------------------------------------- /tasks/citation_network_task.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | from typing import Any, Dict, List, Iterable, Iterator 3 | 4 | import numpy as np 5 | import tensorflow as tf 6 | from dpu_utils.utils import RichPath, LocalPath 7 | 8 | from .sparse_graph_task import Sparse_Graph_Task, DataFold, MinibatchData 9 | from utils.citation_network_utils import load_data, preprocess_features 10 | 11 | 12 | CitationData = namedtuple('CitationData', ['adj_lists', 'num_incoming_edges', 'features', 'labels', 'mask']) 13 | 14 | 15 | class Citation_Network_Task(Sparse_Graph_Task): 16 | @classmethod 17 | def default_params(cls): 18 | params = super().default_params() 19 | params.update({ 20 | 'add_self_loop_edges': True, 21 | 'use_graph': True, 22 | 'activation_function': "tanh", 23 | 'out_layer_dropout_keep_prob': 1.0, 24 | }) 25 | return params 26 | 27 | @staticmethod 28 | def name() -> str: 29 | return "CitationNetwork" 30 | 31 | @staticmethod 32 | def default_data_path() -> str: 33 | return "data/citation-networks" 34 | 35 | def __init__(self, params: Dict[str, Any]): 36 | super().__init__(params) 37 | 38 | # Things that will be filled once we load data: 39 | self.__num_edge_types = 2 40 | self.__initial_node_feature_size = 0 41 | self.__num_output_classes = 0 42 | 43 | def get_metadata(self) -> Dict[str, Any]: 44 | metadata = super().get_metadata() 45 | metadata['initial_node_feature_size'] = self.__initial_node_feature_size 46 | metadata['num_output_classes'] = self.__num_output_classes 47 | return metadata 48 | 49 | def restore_from_metadata(self, metadata: Dict[str, Any]) -> None: 50 | super().restore_from_metadata(metadata) 51 | self.__initial_node_feature_size = metadata['initial_node_feature_size'] 52 | self.__num_output_classes = metadata['num_output_classes'] 53 | 54 | @property 55 | def num_edge_types(self) -> int: 56 | return self.__num_edge_types 57 | 58 | @property 59 | def initial_node_feature_size(self) -> int: 60 | return self.__initial_node_feature_size 61 | 62 | # -------------------- Data Loading -------------------- 63 | def load_data(self, path: RichPath) -> None: 64 | train_data, valid_data, _ = self.__load_data(path) 65 | self._loaded_data[DataFold.TRAIN] = train_data 66 | self._loaded_data[DataFold.VALIDATION] = valid_data 67 | 68 | def load_eval_data_from_path(self, path: RichPath) -> Iterable[Any]: 69 | _, _, test_data = self.__load_data(path) 70 | return test_data 71 | 72 | def __load_data(self, data_directory: RichPath): 73 | assert isinstance(data_directory, LocalPath), "CitationNetworkTask can only handle local data" 74 | data_path = data_directory.path 75 | print(" Loading CitationNetwork data from %s." % (data_path,)) 76 | (adj_list, features, train_labels, valid_labels, test_labels, train_mask, valid_mask, test_mask) = \ 77 | load_data(data_path, self.params['data_kind']) 78 | self.__initial_node_feature_size = features.shape[1] 79 | self.__num_output_classes = train_labels.shape[1] 80 | features = preprocess_features(features) 81 | 82 | train_data = \ 83 | [self.__preprocess_data(adj_list, features, np.argmax(train_labels, axis=1), train_mask)] 84 | valid_data = \ 85 | [self.__preprocess_data(adj_list, features, np.argmax(valid_labels, axis=1), valid_mask)] 86 | test_data = \ 87 | [self.__preprocess_data(adj_list, features, np.argmax(test_labels, axis=1), test_mask)] 88 | return train_data, valid_data, test_data 89 | 90 | def __preprocess_data(self, adj_list: Dict[int, List[int]], features, labels, mask) -> CitationData: 91 | flat_adj_list = [] 92 | self_loop_adj_list = [] 93 | num_incoming_edges = np.zeros(shape=[len(adj_list)], dtype=np.int32) 94 | for node, neighbours in adj_list.items(): 95 | for neighbour in neighbours: 96 | flat_adj_list.append((node, neighbour)) 97 | flat_adj_list.append((neighbour, node)) 98 | num_incoming_edges[neighbour] += 1 99 | num_incoming_edges[node] += 1 100 | self_loop_adj_list.append((node, node)) 101 | 102 | # Prepend the self-loop information: 103 | num_incoming_edges = np.stack([np.ones_like(num_incoming_edges, dtype=np.int32), 104 | num_incoming_edges]) # Shape [2, V] 105 | return CitationData(adj_lists=[self_loop_adj_list, flat_adj_list], 106 | num_incoming_edges=num_incoming_edges, 107 | features=features, 108 | labels=labels, 109 | mask=mask) 110 | 111 | # -------------------- Model Construction -------------------- 112 | def make_task_output_model(self, 113 | placeholders: Dict[str, tf.Tensor], 114 | model_ops: Dict[str, tf.Tensor], 115 | ) -> None: 116 | placeholders['labels'] = tf.placeholder(tf.int32, [None], name='labels') 117 | placeholders['mask'] = tf.placeholder(tf.float32, [None], name='mask') 118 | placeholders['out_layer_dropout_keep_prob'] =\ 119 | tf.placeholder_with_default(input=tf.constant(1.0, dtype=tf.float32), 120 | shape=[], 121 | name='out_layer_dropout_keep_prob') 122 | 123 | final_node_representations = \ 124 | tf.nn.dropout(model_ops['final_node_representations'], 125 | rate=1.0 - placeholders['out_layer_dropout_keep_prob']) 126 | output_label_logits = \ 127 | tf.keras.layers.Dense(units=self.__num_output_classes, 128 | use_bias=False, 129 | activation=None, 130 | name="OutputDenseLayer", 131 | )(final_node_representations) # Shape [V, Classes] 132 | 133 | num_masked_preds = tf.reduce_sum(placeholders['mask']) 134 | losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output_label_logits, 135 | labels=placeholders['labels']) 136 | total_loss = tf.reduce_sum(losses * placeholders['mask']) 137 | 138 | correct_preds = tf.equal(tf.argmax(output_label_logits, axis=1, output_type=tf.int32), 139 | placeholders['labels']) 140 | num_masked_correct = tf.reduce_sum(tf.cast(correct_preds, tf.float32) * placeholders['mask']) 141 | accuracy = num_masked_correct / num_masked_preds 142 | tf.summary.scalar('accuracy', accuracy) 143 | 144 | model_ops['task_metrics'] = { 145 | 'loss': total_loss / num_masked_preds, 146 | 'total_loss': total_loss, 147 | 'accuracy': accuracy, 148 | } 149 | 150 | # -------------------- Minibatching and training loop -------------------- 151 | def make_minibatch_iterator(self, 152 | data: Iterable[Any], 153 | data_fold: DataFold, 154 | model_placeholders: Dict[str, tf.Tensor], 155 | max_nodes_per_batch: int) \ 156 | -> Iterator[MinibatchData]: 157 | data = next(iter(data)) # type: CitationData 158 | if data_fold == DataFold.TRAIN: 159 | out_layer_dropout_keep_prob = self.params['out_layer_dropout_keep_prob'] 160 | else: 161 | out_layer_dropout_keep_prob = 1.0 162 | 163 | feed_dict = { 164 | model_placeholders['initial_node_features']: data.features, 165 | model_placeholders['adjacency_lists'][0]: data.adj_lists[0], 166 | model_placeholders['adjacency_lists'][1]: data.adj_lists[1], 167 | model_placeholders['type_to_num_incoming_edges']: data.num_incoming_edges, 168 | model_placeholders['num_graphs']: 1, 169 | model_placeholders['labels']: data.labels, 170 | model_placeholders['mask']: data.mask, 171 | model_placeholders['out_layer_dropout_keep_prob']: out_layer_dropout_keep_prob, 172 | } 173 | 174 | yield MinibatchData(feed_dict=feed_dict, 175 | num_graphs=1, 176 | num_nodes=data.features.shape[0], 177 | num_edges=sum(len(adj_list) for adj_list in data.adj_lists)) 178 | 179 | def early_stopping_metric(self, task_metric_results: List[Dict[str, np.ndarray]], num_graphs: int) -> float: 180 | # Early stopping based on average loss: 181 | return np.sum([m['total_loss'] for m in task_metric_results]) / num_graphs 182 | 183 | def pretty_print_epoch_task_metrics(self, task_metric_results: List[Dict[str, np.ndarray]], num_graphs: int) -> str: 184 | return "Acc: %.2f%%" % (task_metric_results[0]['accuracy'] * 100,) 185 | -------------------------------------------------------------------------------- /tasks/default_hypers/PPI_GGNN.json: -------------------------------------------------------------------------------- 1 | {"task_params": {}, 2 | "model_params": {"graph_num_layers": 3, 3 | "hidden_size": 320, 4 | "max_nodes_in_batch": 12500, 5 | "graph_layer_input_dropout_keep_prob": 0.9 6 | } 7 | } -------------------------------------------------------------------------------- /tasks/default_hypers/PPI_GNN-Edge-MLP0.json: -------------------------------------------------------------------------------- 1 | {"task_params": {}, 2 | "model_params": {"graph_num_layers": 5, 3 | "hidden_size": 256, 4 | "max_nodes_in_batch": 6000, 5 | "graph_layer_input_dropout_keep_prob": 0.8 6 | } 7 | } -------------------------------------------------------------------------------- /tasks/default_hypers/PPI_GNN-Edge-MLP1.json: -------------------------------------------------------------------------------- 1 | {"task_params": {}, 2 | "model_params": {"graph_num_layers": 4, 3 | "hidden_size": 320, 4 | "max_nodes_in_batch": 6000, 5 | "graph_layer_input_dropout_keep_prob": 0.9 6 | } 7 | } -------------------------------------------------------------------------------- /tasks/default_hypers/PPI_GNN-FiLM.json: -------------------------------------------------------------------------------- 1 | {"task_params": {}, 2 | "model_params": {"graph_num_layers": 4, 3 | "hidden_size": 320, 4 | "max_nodes_in_batch": 6000, 5 | "graph_layer_input_dropout_keep_prob": 0.9 6 | } 7 | } -------------------------------------------------------------------------------- /tasks/default_hypers/PPI_RGAT.json: -------------------------------------------------------------------------------- 1 | {"task_params": {}, 2 | "model_params": {"graph_num_layers": 3, 3 | "hidden_size": 320, 4 | "max_nodes_in_batch": 11000, 5 | "graph_layer_input_dropout_keep_prob": 0.9 6 | } 7 | } -------------------------------------------------------------------------------- /tasks/default_hypers/PPI_RGCN.json: -------------------------------------------------------------------------------- 1 | {"task_params": {}, 2 | "model_params": {"graph_num_layers": 4, 3 | "hidden_size": 320, 4 | "max_nodes_in_batch": 12500, 5 | "graph_layer_input_dropout_keep_prob": 0.9 6 | } 7 | } -------------------------------------------------------------------------------- /tasks/default_hypers/PPI_RGIN.json: -------------------------------------------------------------------------------- 1 | { 2 | "task_params": { 3 | "add_self_loop_edges": true 4 | }, 5 | "model_params": { 6 | "patience": 25, 7 | "graph_num_layers": 5, 8 | "hidden_size": 256, 9 | "max_nodes_in_batch": 8000, 10 | "graph_num_edge_MLP_hidden_layers": 1, 11 | "graph_num_aggr_MLP_hidden_layers": null, 12 | "graph_layer_input_dropout_keep_prob": 0.8 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /tasks/default_hypers/QM9_GGNN.json: -------------------------------------------------------------------------------- 1 | { 2 | "task_params": {}, 3 | "model_params": { 4 | "graph_dense_between_every_num_gnn_layers": 32, 5 | "learning_rate": 0.0008471209461829375, 6 | "graph_inter_layer_norm": true, 7 | "graph_activation_function": "relu", 8 | "graph_num_timesteps_per_layer": 1, 9 | "graph_model_activation_function": "tanh", 10 | "momentum": 0.85, 11 | "optimizer": "RMSProp", 12 | "clamp_gradient_norm": 1.0, 13 | "patience": 25, 14 | "max_epochs": 10000, 15 | "graph_rnn_cell": "RNN", 16 | "graph_layer_input_dropout_keep_prob": 1.0, 17 | "graph_num_layers": 6, 18 | "message_aggregation_function": "sum", 19 | "graph_residual_connection_every_num_layers": 2, 20 | "hidden_size": 128, 21 | "max_nodes_in_batch": 50000, 22 | "learning_rate_decay": 0.98 23 | } 24 | } -------------------------------------------------------------------------------- /tasks/default_hypers/QM9_GNN-Edge-MLP0.json: -------------------------------------------------------------------------------- 1 | { 2 | "task_params": {}, 3 | "model_params": { 4 | "max_nodes_in_batch": 50000, 5 | "graph_num_layers": 8, 6 | "graph_num_timesteps_per_layer": 1, 7 | "graph_layer_input_dropout_keep_prob": 0.9, 8 | "graph_dense_between_every_num_gnn_layers": 32, 9 | "graph_model_activation_function": "tanh", 10 | "graph_residual_connection_every_num_layers": 2, 11 | "graph_inter_layer_norm": true, 12 | "max_epochs": 10000, 13 | "patience": 25, 14 | "optimizer": "RMSProp", 15 | "learning_rate": 0.0005072060718321982, 16 | "learning_rate_decay": 0.98, 17 | "lr_for_num_graphs_per_batch": null, 18 | "momentum": 0.85, 19 | "clamp_gradient_norm": 1.0, 20 | "hidden_size": 128, 21 | "graph_activation_function": "relu", 22 | "message_aggregation_function": "sum", 23 | "graph_message_weights_dropout_ratio": 0.0, 24 | "use_target_state_as_input": true, 25 | "num_edge_hidden_layers": 0 26 | } 27 | } -------------------------------------------------------------------------------- /tasks/default_hypers/QM9_GNN-Edge-MLP1.json: -------------------------------------------------------------------------------- 1 | { 2 | "task_params": {}, 3 | "model_params": { 4 | "max_nodes_in_batch": 50000, 5 | "graph_num_layers": 8, 6 | "graph_num_timesteps_per_layer": 1, 7 | "graph_layer_input_dropout_keep_prob": 0.9, 8 | "graph_dense_between_every_num_gnn_layers": 32, 9 | "graph_model_activation_function": "tanh", 10 | "graph_residual_connection_every_num_layers": 2, 11 | "graph_inter_layer_norm": false, 12 | "max_epochs": 10000, 13 | "patience": 25, 14 | "optimizer": "Adam", 15 | "learning_rate": 0.0006482335154980316, 16 | "learning_rate_decay": 0.98, 17 | "lr_for_num_graphs_per_batch": null, 18 | "momentum": 0.85, 19 | "clamp_gradient_norm": 1.0, 20 | "hidden_size": 128, 21 | "graph_activation_function": "gelu", 22 | "message_aggregation_function": "sum", 23 | "graph_message_weights_dropout_ratio": 0.0, 24 | "use_target_state_as_input": true, 25 | "num_edge_hidden_layers": 1 26 | } 27 | } -------------------------------------------------------------------------------- /tasks/default_hypers/QM9_GNN-FiLM.json: -------------------------------------------------------------------------------- 1 | { 2 | "task_params": {}, 3 | "model_params": { 4 | "message_aggregation_function": "sum", 5 | "graph_activation_function": "elu", 6 | "momentum": 0.85, 7 | "learning_rate_decay": 0.98, 8 | "patience": 25, 9 | "normalize_messages_by_num_incoming": false, 10 | "max_epochs": 10000, 11 | "graph_num_timesteps_per_layer": 1, 12 | "optimizer": "RMSProp", 13 | "hidden_size": 128, 14 | "graph_num_layers": 8, 15 | "graph_residual_connection_every_num_layers": 2, 16 | "graph_layer_input_dropout_keep_prob": 0.9, 17 | "learning_rate": 0.0006654723503723253, 18 | "graph_inter_layer_norm": true, 19 | "graph_dense_between_every_num_gnn_layers": 32, 20 | "max_nodes_in_batch": 50000, 21 | "graph_model_activation_function": "tanh", 22 | "clamp_gradient_norm": 1.0 23 | } 24 | } -------------------------------------------------------------------------------- /tasks/default_hypers/QM9_RGAT.json: -------------------------------------------------------------------------------- 1 | { 2 | "task_params": {}, 3 | "model_params": { 4 | "graph_model_activation_function": "tanh", 5 | "patience": 25, 6 | "optimizer": "RMSProp", 7 | "graph_activation_function": "elu", 8 | "learning_rate_decay": 0.98, 9 | "max_nodes_in_batch": 50000, 10 | "graph_layer_input_dropout_keep_prob": 0.9, 11 | "graph_inter_layer_norm": false, 12 | "clamp_gradient_norm": 1.0, 13 | "graph_num_layers": 8, 14 | "momentum": 0.85, 15 | "graph_dense_between_every_num_gnn_layers": 32, 16 | "hidden_size": 128, 17 | "graph_residual_connection_every_num_layers": 2, 18 | "num_heads": 8, 19 | "learning_rate": 0.0005800837190772856, 20 | "graph_num_timesteps_per_layer": 1, 21 | "max_epochs": 10000 22 | } 23 | } -------------------------------------------------------------------------------- /tasks/default_hypers/QM9_RGCN.json: -------------------------------------------------------------------------------- 1 | { 2 | "task_params": {}, 3 | "model_params": { 4 | "graph_residual_connection_every_num_layers": 2, 5 | "max_nodes_in_batch": 50000, 6 | "graph_num_layers": 8, 7 | "graph_model_activation_function": "tanh", 8 | "graph_layer_input_dropout_keep_prob": 1.0, 9 | "graph_activation_function": "leaky_relu", 10 | "graph_num_timesteps_per_layer": 1, 11 | "learning_rate_decay": 0.98, 12 | "max_epochs": 10000, 13 | "momentum": 0.85, 14 | "message_aggregation_function": "sum", 15 | "graph_dense_between_every_num_gnn_layers": 32, 16 | "learning_rate": 0.0005720408870458782, 17 | "graph_inter_layer_norm": true, 18 | "hidden_size": 128, 19 | "clamp_gradient_norm": 1.0, 20 | "patience": 25, 21 | "optimizer": "RMSProp" 22 | } 23 | } -------------------------------------------------------------------------------- /tasks/default_hypers/QM9_RGIN.json: -------------------------------------------------------------------------------- 1 | { 2 | "task_params": { 3 | "add_self_loop_edges": true 4 | }, 5 | "model_params": { 6 | "max_nodes_in_batch": 50000, 7 | "graph_num_layers": 6, 8 | "graph_num_timesteps_per_layer": 1, 9 | "graph_layer_input_dropout_keep_prob": 0.9, 10 | "graph_dense_between_every_num_gnn_layers": 32, 11 | "graph_model_activation_function": "tanh", 12 | "graph_residual_connection_every_num_layers": 2, 13 | "graph_inter_layer_norm": false, 14 | "max_epochs": 10000, 15 | "patience": 25, 16 | "optimizer": "RMSProp", 17 | "learning_rate": 0.000700776770702023, 18 | "learning_rate_decay": 0.98, 19 | "lr_for_num_graphs_per_batch": null, 20 | "momentum": 0.85, 21 | "clamp_gradient_norm": 1.0, 22 | "hidden_size": 128, 23 | "graph_activation_function": "elu", 24 | "message_aggregation_function": "sum", 25 | "use_target_state_as_input": false, 26 | "graph_num_edge_MLP_hidden_layers": 1, 27 | "graph_num_aggr_MLP_hidden_layers": null 28 | } 29 | } -------------------------------------------------------------------------------- /tasks/default_hypers/VarMisuse_GGNN.json: -------------------------------------------------------------------------------- 1 | { 2 | "task_params": { 3 | "add_self_loop_edges": false 4 | }, 5 | "model_params": { 6 | "max_nodes_in_batch": 100000, 7 | "graph_num_layers": 6, 8 | "graph_num_timesteps_per_layer": 1, 9 | "graph_layer_input_dropout_keep_prob": 0.8, 10 | "graph_message_weights_dropout_ratio": 0.0, 11 | "graph_dense_between_every_num_gnn_layers": 10000, 12 | "graph_model_activation_function": "tanh", 13 | "graph_residual_connection_every_num_layers": 10000, 14 | "graph_inter_layer_norm": false, 15 | "max_epochs": 10000, 16 | "patience": 5, 17 | "optimizer": "Adam", 18 | "learning_rate": 0.00015, 19 | "learning_rate_decay": 0.98, 20 | "momentum": 0.85, 21 | "clamp_gradient_norm": 1.0, 22 | "random_seed": 0, 23 | "hidden_size": 96, 24 | "graph_rnn_cell": "GRU", 25 | "graph_activation_function": "tanh", 26 | "message_aggregation_function": "sum" 27 | } 28 | } -------------------------------------------------------------------------------- /tasks/default_hypers/VarMisuse_GNN-Edge-MLP0.json: -------------------------------------------------------------------------------- 1 | { 2 | "task_params": { 3 | "add_self_loop_edges": false 4 | }, 5 | "model_params": { 6 | "max_nodes_in_batch": 50000, 7 | "graph_num_layers": 8, 8 | "graph_num_timesteps_per_layer": 1, 9 | "graph_layer_input_dropout_keep_prob": 0.9, 10 | "graph_dense_between_every_num_gnn_layers": 1, 11 | "graph_model_activation_function": "tanh", 12 | "graph_residual_connection_every_num_layers": 2, 13 | "graph_inter_layer_norm": true, 14 | "max_epochs": 10000, 15 | "patience": 5, 16 | "optimizer": "Adam", 17 | "learning_rate": 0.00015, 18 | "learning_rate_decay": 0.98, 19 | "lr_for_num_graphs_per_batch": 30, 20 | "momentum": 0.85, 21 | "clamp_gradient_norm": 1.0, 22 | "random_seed": 0, 23 | "hidden_size": 128, 24 | "graph_activation_function": "gelu", 25 | "message_aggregation_function": "sum", 26 | "graph_message_weights_dropout_ratio": 0.0, 27 | "use_target_state_as_input": true, 28 | "num_edge_hidden_layers": 0 29 | } 30 | } -------------------------------------------------------------------------------- /tasks/default_hypers/VarMisuse_GNN-Edge-MLP1.json: -------------------------------------------------------------------------------- 1 | { 2 | "task_params": { 3 | "add_self_loop_edges": false 4 | }, 5 | "model_params": { 6 | "max_nodes_in_batch": 45000, 7 | "graph_num_layers": 10, 8 | "graph_num_timesteps_per_layer": 1, 9 | "graph_layer_input_dropout_keep_prob": 0.9, 10 | "graph_dense_between_every_num_gnn_layers": 1, 11 | "graph_model_activation_function": "tanh", 12 | "graph_residual_connection_every_num_layers": 2, 13 | "graph_inter_layer_norm": true, 14 | "max_epochs": 10000, 15 | "patience": 5, 16 | "optimizer": "Adam", 17 | "learning_rate": 0.00015, 18 | "learning_rate_decay": 0.98, 19 | "lr_for_num_graphs_per_batch": 30, 20 | "momentum": 0.85, 21 | "clamp_gradient_norm": 1.0, 22 | "random_seed": 0, 23 | "hidden_size": 128, 24 | "graph_activation_function": "gelu", 25 | "message_aggregation_function": "sum", 26 | "graph_message_weights_dropout_ratio": 0.0, 27 | "use_target_state_as_input": true, 28 | "num_edge_hidden_layers": 1 29 | } 30 | } -------------------------------------------------------------------------------- /tasks/default_hypers/VarMisuse_GNN-FiLM.json: -------------------------------------------------------------------------------- 1 | { 2 | "task_params": { 3 | "add_self_loop_edges": true 4 | }, 5 | "model_params": { 6 | "max_nodes_in_batch": 60000, 7 | "graph_num_layers": 10, 8 | "graph_num_timesteps_per_layer": 1, 9 | "graph_layer_input_dropout_keep_prob": 0.8, 10 | "graph_message_weights_dropout_ratio": 0.0, 11 | "graph_dense_between_every_num_gnn_layers": 1, 12 | "graph_model_activation_function": "tanh", 13 | "graph_residual_connection_every_num_layers": 2, 14 | "graph_inter_layer_norm": false, 15 | "max_epochs": 10000, 16 | "patience": 5, 17 | "optimizer": "Adam", 18 | "learning_rate": 0.00015, 19 | "learning_rate_decay": 0.98, 20 | "momentum": 0.85, 21 | "clamp_gradient_norm": 1.0, 22 | "random_seed": 0, 23 | "hidden_size": 128, 24 | "graph_activation_function": "ReLU", 25 | "message_aggregation_function": "sum", 26 | "normalize_messages_by_num_incoming": false 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /tasks/default_hypers/VarMisuse_RGAT.json: -------------------------------------------------------------------------------- 1 | { 2 | "task_params": { 3 | "add_self_loop_edges": true 4 | }, 5 | "model_params": { 6 | "max_nodes_in_batch": 50000, 7 | "graph_num_layers": 8, 8 | "graph_num_timesteps_per_layer": 1, 9 | "graph_layer_input_dropout_keep_prob": 0.9, 10 | "graph_dense_between_every_num_gnn_layers": 10000, 11 | "graph_model_activation_function": "tanh", 12 | "graph_residual_connection_every_num_layers": 10000, 13 | "graph_inter_layer_norm": false, 14 | "max_epochs": 10000, 15 | "patience": 5, 16 | "optimizer": "Adam", 17 | "learning_rate": 0.00015, 18 | "learning_rate_decay": 0.98, 19 | "lr_for_num_graphs_per_batch": 30, 20 | "momentum": 0.85, 21 | "clamp_gradient_norm": 1.0, 22 | "random_seed": 0, 23 | "hidden_size": 96, 24 | "num_heads": 8, 25 | "graph_activation_function": "tanh" 26 | } 27 | } -------------------------------------------------------------------------------- /tasks/default_hypers/VarMisuse_RGCN.json: -------------------------------------------------------------------------------- 1 | { 2 | "task_params": { 3 | "add_self_loop_edges": true 4 | }, 5 | "model_params": { 6 | "max_nodes_in_batch": 75000, 7 | "graph_num_layers": 10, 8 | "graph_num_timesteps_per_layer": 1, 9 | "graph_layer_input_dropout_keep_prob": 0.9, 10 | "graph_dense_between_every_num_gnn_layers": 10000, 11 | "graph_model_activation_function": "tanh", 12 | "graph_residual_connection_every_num_layers": 10000, 13 | "graph_inter_layer_norm": false, 14 | "max_epochs": 10000, 15 | "patience": 5, 16 | "optimizer": "Adam", 17 | "learning_rate": 0.00015, 18 | "learning_rate_decay": 0.98, 19 | "momentum": 0.85, 20 | "clamp_gradient_norm": 1.0, 21 | "random_seed": 0, 22 | "hidden_size": 128, 23 | "graph_activation_function": "ReLU", 24 | "message_aggregation_function": "sum" 25 | } 26 | } -------------------------------------------------------------------------------- /tasks/default_hypers/VarMisuse_RGIN.json: -------------------------------------------------------------------------------- 1 | { 2 | "task_params": { 3 | "add_self_loop_edges": true 4 | }, 5 | "model_params": { 6 | "max_nodes_in_batch": 50000, 7 | "graph_num_layers": 6, 8 | "graph_num_timesteps_per_layer": 1, 9 | "graph_layer_input_dropout_keep_prob": 0.8, 10 | "graph_dense_between_every_num_gnn_layers": 1, 11 | "graph_model_activation_function": "tanh", 12 | "graph_residual_connection_every_num_layers": 2, 13 | "graph_inter_layer_norm": true, 14 | "max_epochs": 10000, 15 | "patience": 5, 16 | "optimizer": "Adam", 17 | "learning_rate": 0.00015, 18 | "learning_rate_decay": 0.98, 19 | "lr_for_num_graphs_per_batch": 30, 20 | "momentum": 0.85, 21 | "clamp_gradient_norm": 1.0, 22 | "hidden_size": 128, 23 | "graph_activation_function": "ReLU", 24 | "message_aggregation_function": "sum", 25 | "use_target_state_as_input": false, 26 | "graph_num_edge_MLP_hidden_layers": 1, 27 | "graph_num_aggr_MLP_hidden_layers": null 28 | } 29 | } -------------------------------------------------------------------------------- /tasks/ppi_task.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | from typing import Any, Dict, Iterator, List, Iterable 3 | 4 | import tensorflow as tf 5 | import numpy as np 6 | from dpu_utils.utils import RichPath 7 | 8 | from .sparse_graph_task import Sparse_Graph_Task, DataFold, MinibatchData 9 | from utils import micro_f1 10 | 11 | 12 | GraphSample = namedtuple('GraphSample', ['adjacency_lists', 13 | 'type_to_node_to_num_incoming_edges', 14 | 'node_features', 15 | 'node_labels', 16 | ]) 17 | 18 | 19 | class PPI_Task(Sparse_Graph_Task): 20 | @classmethod 21 | def default_params(cls): 22 | params = super().default_params() 23 | params.update({ 24 | 'add_self_loop_edges': True, 25 | 'tie_fwd_bkwd_edges': False, 26 | 'out_layer_dropout_keep_prob': 1.0, 27 | }) 28 | return params 29 | 30 | @staticmethod 31 | def name() -> str: 32 | return "PPI" 33 | 34 | @staticmethod 35 | def default_data_path() -> str: 36 | return "data/ppi" 37 | 38 | def __init__(self, params: Dict[str, Any]): 39 | super().__init__(params) 40 | 41 | # Things that will be filled once we load data: 42 | self.__num_edge_types = 0 43 | self.__initial_node_feature_size = 0 44 | self.__num_labels = 0 45 | 46 | def get_metadata(self) -> Dict[str, Any]: 47 | metadata = super().get_metadata() 48 | metadata['num_edge_types'] = self.__num_edge_types 49 | metadata['initial_node_feature_size'] = self.__initial_node_feature_size 50 | metadata['num_labels'] = self.__num_labels 51 | return metadata 52 | 53 | def restore_from_metadata(self, metadata: Dict[str, Any]) -> None: 54 | super().restore_from_metadata(metadata) 55 | self.__num_edge_types = metadata['num_edge_types'] 56 | self.__initial_node_feature_size = metadata['initial_node_feature_size'] 57 | self.__num_labels = metadata['num_labels'] 58 | 59 | @property 60 | def num_edge_types(self) -> int: 61 | return self.__num_edge_types 62 | 63 | @property 64 | def initial_node_feature_size(self) -> int: 65 | return self.__initial_node_feature_size 66 | 67 | # -------------------- Data Loading -------------------- 68 | def load_data(self, path: RichPath) -> None: 69 | # Data in format as downloaded from https://s3.us-east-2.amazonaws.com/dgl.ai/dataset/ppi.zip 70 | self._loaded_data[DataFold.TRAIN] = self.__load_data(path, DataFold.TRAIN) 71 | self._loaded_data[DataFold.VALIDATION] = self.__load_data(path, DataFold.VALIDATION) 72 | 73 | def load_eval_data_from_path(self, path: RichPath) -> Iterable[Any]: 74 | return self.__load_data(path, DataFold.TEST) 75 | 76 | def __load_data(self, data_dir: RichPath, data_fold: DataFold) -> List[GraphSample]: 77 | if data_fold == DataFold.TRAIN: 78 | data_name = "train" 79 | elif data_fold == DataFold.VALIDATION: 80 | data_name = "valid" 81 | elif data_fold == DataFold.TEST: 82 | data_name = "test" 83 | else: 84 | raise ValueError("Unknown data fold '%s'" % str(data_fold)) 85 | print(" Loading PPI %s data from %s." % (data_name, data_dir)) 86 | 87 | graph_json_data = data_dir.join("%s_graph.json" % data_name).read_by_file_suffix() 88 | node_to_features = data_dir.join("%s_feats.npy" % data_name).read_by_file_suffix() 89 | node_to_labels = data_dir.join("%s_labels.npy" % data_name).read_by_file_suffix() 90 | node_to_graph_id = data_dir.join("%s_graph_id.npy" % data_name).read_by_file_suffix() 91 | self.__initial_node_feature_size = node_to_features.shape[-1] 92 | self.__num_labels = node_to_labels.shape[-1] 93 | 94 | # We read in all the data in two steps: 95 | # (1) Read features, labels and insert self-loop edges (edge type 0). 96 | # Implicitly, this gives us the number of nodes per graph. 97 | # (2) Read all edges, and shift them so that each graph starts with node 0. 98 | 99 | fwd_edge_type = 0 100 | self.__num_edge_types = 1 101 | if self.params['add_self_loop_edges']: 102 | self_loop_edge_type = self.__num_edge_types 103 | self.__num_edge_types += 1 104 | if not self.params['tie_fwd_bkwd_edges']: 105 | bkwd_edge_type = self.__num_edge_types 106 | self.__num_edge_types += 1 107 | 108 | graph_id_to_graph_data = {} # type: Dict[int, GraphSample] 109 | graph_id_to_node_offset = {} 110 | num_total_nodes = node_to_features.shape[0] 111 | for node_id in range(num_total_nodes): 112 | graph_id = node_to_graph_id[node_id] 113 | # In case we are entering a new graph, note its ID, so that we can normalise everything to start at 0 114 | if graph_id not in graph_id_to_graph_data: 115 | graph_id_to_graph_data[graph_id] = \ 116 | GraphSample(adjacency_lists=[[] for _ in range(self.__num_edge_types)], 117 | type_to_node_to_num_incoming_edges=[[] for _ in range(self.__num_edge_types)], 118 | node_features=[], 119 | node_labels=[]) 120 | graph_id_to_node_offset[graph_id] = node_id 121 | cur_graph_data = graph_id_to_graph_data[graph_id] 122 | cur_graph_data.node_features.append(node_to_features[node_id]) 123 | cur_graph_data.node_labels.append(node_to_labels[node_id]) 124 | shifted_node_id = node_id - graph_id_to_node_offset[graph_id] 125 | if self.params['add_self_loop_edges']: 126 | cur_graph_data.adjacency_lists[self_loop_edge_type].append((shifted_node_id, shifted_node_id)) 127 | cur_graph_data.type_to_node_to_num_incoming_edges[self_loop_edge_type].append(1) 128 | 129 | # Prepare reading of the edges by setting counters to 0: 130 | for graph_data in graph_id_to_graph_data.values(): 131 | num_graph_nodes = len(graph_data.node_features) 132 | graph_data.type_to_node_to_num_incoming_edges[fwd_edge_type] = np.zeros([num_graph_nodes], np.int32) 133 | if not self.params['tie_fwd_bkwd_edges']: 134 | graph_data.type_to_node_to_num_incoming_edges[bkwd_edge_type] = np.zeros([num_graph_nodes], np.int32) 135 | 136 | for edge_info in graph_json_data['links']: 137 | src_node, tgt_node = edge_info['source'], edge_info['target'] 138 | # First, shift node IDs so that each graph starts at node 0: 139 | graph_id = node_to_graph_id[src_node] 140 | graph_node_offset = graph_id_to_node_offset[graph_id] 141 | src_node, tgt_node = src_node - graph_node_offset, tgt_node - graph_node_offset 142 | 143 | cur_graph_data = graph_id_to_graph_data[graph_id] 144 | cur_graph_data.adjacency_lists[fwd_edge_type].append((src_node, tgt_node)) 145 | cur_graph_data.type_to_node_to_num_incoming_edges[fwd_edge_type][tgt_node] += 1 146 | if not self.params['tie_fwd_bkwd_edges']: 147 | cur_graph_data.adjacency_lists[bkwd_edge_type].append((tgt_node, src_node)) 148 | cur_graph_data.type_to_node_to_num_incoming_edges[bkwd_edge_type][src_node] += 1 149 | 150 | final_graphs = [] 151 | for graph_data in graph_id_to_graph_data.values(): 152 | # numpy-ize: 153 | adj_lists = [] 154 | for edge_type_idx in range(self.__num_edge_types): 155 | adj_lists.append(np.array(graph_data.adjacency_lists[edge_type_idx])) 156 | final_graphs.append( 157 | GraphSample(adjacency_lists=adj_lists, 158 | type_to_node_to_num_incoming_edges=np.array(graph_data.type_to_node_to_num_incoming_edges), 159 | node_features=np.array(graph_data.node_features), 160 | node_labels=np.array(graph_data.node_labels))) 161 | 162 | return final_graphs 163 | 164 | # -------------------- Model Construction -------------------- 165 | def make_task_output_model(self, 166 | placeholders: Dict[str, tf.Tensor], 167 | model_ops: Dict[str, tf.Tensor], 168 | ) -> None: 169 | placeholders['graph_nodes_list'] = \ 170 | tf.placeholder(dtype=tf.int32, shape=[None], name='graph_nodes_list') 171 | placeholders['target_labels'] = \ 172 | tf.placeholder(dtype=tf.float32, shape=[None, self.__num_labels], name='target_labels') 173 | placeholders['out_layer_dropout_keep_prob'] = \ 174 | tf.placeholder(dtype=tf.float32, shape=[], name='out_layer_dropout_keep_prob') 175 | 176 | per_node_logits = \ 177 | tf.keras.layers.Dense(units=self.__num_labels, 178 | use_bias=True, 179 | )(model_ops['final_node_representations']) 180 | 181 | losses = tf.nn.sigmoid_cross_entropy_with_logits(logits=per_node_logits, 182 | labels=placeholders['target_labels']) 183 | total_loss = tf.reduce_sum(losses) 184 | 185 | # Compute loss as average per node (to account for changing number of nodes per batch): 186 | num_nodes_in_batch = tf.shape(placeholders['target_labels'])[0] 187 | 188 | f1_score = micro_f1(per_node_logits, placeholders['target_labels']) 189 | tf.summary.scalar("Micro F1", f1_score) 190 | model_ops['task_metrics'] = { 191 | 'loss': total_loss / tf.cast(num_nodes_in_batch, tf.float32), 192 | 'total_loss': total_loss, 193 | 'f1_score': f1_score, 194 | } 195 | 196 | # -------------------- Minibatching and training loop -------------------- 197 | def make_minibatch_iterator(self, 198 | data: Iterable[Any], 199 | data_fold: DataFold, 200 | model_placeholders: Dict[str, tf.Tensor], 201 | max_nodes_per_batch: int) \ 202 | -> Iterator[MinibatchData]: 203 | if data_fold == DataFold.TRAIN: 204 | np.random.shuffle(data) 205 | out_layer_dropout_keep_prob = self.params['out_layer_dropout_keep_prob'] 206 | else: 207 | out_layer_dropout_keep_prob = 1.0 208 | 209 | # Pack until we cannot fit more graphs in the batch 210 | num_graphs = 0 211 | while num_graphs < len(data): 212 | num_graphs_in_batch = 0 213 | batch_node_features = [] # type: List[np.ndarray] 214 | batch_node_labels = [] 215 | batch_adjacency_lists = [[] for _ in range(self.num_edge_types)] # type: List[List[np.ndarray]] 216 | batch_type_to_num_incoming_edges = [] 217 | batch_graph_nodes_list = [] 218 | node_offset = 0 219 | 220 | while num_graphs < len(data) and node_offset + len(data[num_graphs].node_features) < max_nodes_per_batch: 221 | cur_graph = data[num_graphs] 222 | num_nodes_in_graph = len(data[num_graphs].node_features) 223 | batch_node_features.extend(cur_graph.node_features) 224 | batch_graph_nodes_list.append(np.full(shape=[num_nodes_in_graph], 225 | fill_value=num_graphs_in_batch, 226 | dtype=np.int32)) 227 | for i in range(self.num_edge_types): 228 | batch_adjacency_lists[i].append(cur_graph.adjacency_lists[i] + node_offset) 229 | batch_type_to_num_incoming_edges.append(cur_graph.type_to_node_to_num_incoming_edges) 230 | batch_node_labels.append(cur_graph.node_labels) 231 | num_graphs += 1 232 | num_graphs_in_batch += 1 233 | node_offset += num_nodes_in_graph 234 | 235 | batch_feed_dict = { 236 | model_placeholders['initial_node_features']: np.array(batch_node_features), 237 | model_placeholders['type_to_num_incoming_edges']: np.concatenate(batch_type_to_num_incoming_edges, axis=1), 238 | model_placeholders['graph_nodes_list']: np.concatenate(batch_graph_nodes_list), 239 | model_placeholders['target_labels']: np.concatenate(batch_node_labels, axis=0), 240 | model_placeholders['out_layer_dropout_keep_prob']: out_layer_dropout_keep_prob, 241 | } 242 | 243 | # Merge adjacency lists: 244 | num_edges = 0 245 | for i in range(self.num_edge_types): 246 | if len(batch_adjacency_lists[i]) > 0: 247 | adj_list = np.concatenate(batch_adjacency_lists[i]) 248 | else: 249 | adj_list = np.zeros((0, 2), dtype=np.int32) 250 | num_edges += adj_list.shape[0] 251 | batch_feed_dict[model_placeholders['adjacency_lists'][i]] = adj_list 252 | 253 | yield MinibatchData(feed_dict=batch_feed_dict, 254 | num_graphs=num_graphs_in_batch, 255 | num_nodes=node_offset, 256 | num_edges=num_edges) 257 | 258 | def early_stopping_metric(self, task_metric_results: List[Dict[str, np.ndarray]], num_graphs: int) -> float: 259 | # Early stopping based on average loss: 260 | return np.sum([m['total_loss'] for m in task_metric_results]) / num_graphs 261 | 262 | def pretty_print_epoch_task_metrics(self, task_metric_results: List[Dict[str, np.ndarray]], num_graphs: int) -> str: 263 | avg_microf1 = np.average([m['f1_score'] for m in task_metric_results]) 264 | return "Avg MicroF1: %.3f" % (avg_microf1,) 265 | -------------------------------------------------------------------------------- /tasks/qm9_task.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | from typing import Any, Dict, Tuple, List, Iterable 3 | 4 | import tensorflow as tf 5 | import numpy as np 6 | from dpu_utils.utils import RichPath 7 | 8 | from .sparse_graph_task import Sparse_Graph_Task, DataFold, MinibatchData 9 | from utils import MLP 10 | 11 | 12 | GraphSample = namedtuple('GraphSample', ['adjacency_lists', 13 | 'type_to_node_to_num_incoming_edges', 14 | 'node_features', 15 | 'target_values', 16 | ]) 17 | 18 | 19 | class QM9_Task(Sparse_Graph_Task): 20 | # These magic constants were obtained during dataset generation, as result of normalising 21 | # the values of target properties: 22 | CHEMICAL_ACC_NORMALISING_FACTORS = [0.066513725, 0.012235489, 0.071939046, 23 | 0.033730778, 0.033486113, 0.004278493, 24 | 0.001330901, 0.004165489, 0.004128926, 25 | 0.00409976, 0.004527465, 0.012292586, 26 | 0.037467458] 27 | 28 | @classmethod 29 | def default_params(cls): 30 | params = super().default_params() 31 | params.update({ 32 | 'task_ids': [0], 33 | 34 | 'add_self_loop_edges': True, 35 | 'tie_fwd_bkwd_edges': True, 36 | 'use_graph': True, 37 | 'activation_function': "tanh", 38 | 'out_layer_dropout_keep_prob': 1.0, 39 | }) 40 | return params 41 | 42 | @staticmethod 43 | def name() -> str: 44 | return "QM9" 45 | 46 | @staticmethod 47 | def default_data_path() -> str: 48 | return "data/qm9" 49 | 50 | def __init__(self, params: Dict[str, Any]): 51 | super().__init__(params) 52 | 53 | # Things that will be filled once we load data: 54 | self.__num_edge_types = 0 55 | self.__annotation_size = 0 56 | 57 | def get_metadata(self) -> Dict[str, Any]: 58 | metadata = super().get_metadata() 59 | metadata['num_edge_types'] = self.__num_edge_types 60 | metadata['annotation_size'] = self.__annotation_size 61 | return metadata 62 | 63 | def restore_from_metadata(self, metadata: Dict[str, Any]) -> None: 64 | super().restore_from_metadata(metadata) 65 | self.__num_edge_types = metadata['num_edge_types'] 66 | self.__annotation_size = metadata['annotation_size'] 67 | 68 | @property 69 | def num_edge_types(self) -> int: 70 | return self.__num_edge_types 71 | 72 | @property 73 | def initial_node_feature_size(self) -> int: 74 | return self.__annotation_size 75 | 76 | # -------------------- Data Loading -------------------- 77 | def load_data(self, path: RichPath) -> None: 78 | self._loaded_data[DataFold.TRAIN] = self.__load_data(path.join("train.jsonl.gz")) 79 | self._loaded_data[DataFold.VALIDATION] = self.__load_data(path.join("valid.jsonl.gz")) 80 | 81 | def load_eval_data_from_path(self, path: RichPath) -> Iterable[Any]: 82 | if path.path == self.default_data_path(): 83 | path = path.join("test.jsonl.gz") 84 | return self.__load_data(path) 85 | 86 | def __load_data(self, data_file: RichPath) -> List[GraphSample]: 87 | print(" Loading QM9 data from %s." % (data_file,)) 88 | data = list(data_file.read_by_file_suffix()) # list() needed for .jsonl case, where .read*() is just a generator 89 | 90 | # Get some common data out: 91 | num_fwd_edge_types = 0 92 | for g in data: 93 | num_fwd_edge_types = max(num_fwd_edge_types, max([e[1] for e in g['graph']])) 94 | if self.params['add_self_loop_edges']: 95 | num_fwd_edge_types += 1 96 | self.__num_edge_types = max(self.num_edge_types, 97 | num_fwd_edge_types * (1 if self.params['tie_fwd_bkwd_edges'] else 2)) 98 | self.__annotation_size = max(self.__annotation_size, len(data[0]["node_features"][0])) 99 | return self.__process_raw_graphs(data) 100 | 101 | def __process_raw_graphs(self, raw_data: Iterable[Any]) -> List[GraphSample]: 102 | processed_graphs = [] 103 | for d in raw_data: 104 | (type_to_adjacency_list, type_to_num_incoming_edges) = \ 105 | self.__graph_to_adjacency_lists(d['graph'], num_nodes=len(d["node_features"])) 106 | processed_graphs.append( 107 | GraphSample(adjacency_lists=type_to_adjacency_list, 108 | type_to_node_to_num_incoming_edges=type_to_num_incoming_edges, 109 | node_features=d["node_features"], 110 | target_values=[d["targets"][task_id][0] for task_id in self.params['task_ids']], 111 | )) 112 | return processed_graphs 113 | 114 | def __graph_to_adjacency_lists(self, graph: Iterable[Tuple[int, int, int]], num_nodes: int) \ 115 | -> Tuple[List[np.ndarray], np.ndarray]: 116 | type_to_adj_list = [[] for _ in range(self.num_edge_types)] # type: List[List[Tuple[int, int]]] 117 | type_to_num_incoming_edges = np.zeros(shape=(self.num_edge_types, num_nodes,)) 118 | for src, e, dest in graph: 119 | if self.params['add_self_loop_edges']: 120 | fwd_edge_type = e # 0 will be the self-loop type 121 | else: 122 | fwd_edge_type = e - 1 # Make edges start from 0 123 | type_to_adj_list[fwd_edge_type].append((src, dest)) 124 | type_to_num_incoming_edges[fwd_edge_type, dest] += 1 125 | if self.params['tie_fwd_bkwd_edges']: 126 | type_to_adj_list[fwd_edge_type].append((dest, src)) 127 | type_to_num_incoming_edges[fwd_edge_type, src] += 1 128 | 129 | if self.params['add_self_loop_edges']: 130 | # Add self-loop edges (idx 0, which isn't used in the data): 131 | for node in range(num_nodes): 132 | type_to_num_incoming_edges[0, node] = 1 133 | type_to_adj_list[0].append((node, node)) 134 | 135 | type_to_adj_list = [np.array(sorted(adj_list), dtype=np.int32) if len(adj_list) > 0 else np.zeros(shape=(0, 2), dtype=np.int32) 136 | for adj_list in type_to_adj_list] 137 | 138 | # Add backward edges as an additional edge type that goes backwards: 139 | if not (self.params['tie_fwd_bkwd_edges']): 140 | type_to_adj_list = type_to_adj_list[:self.num_edge_types // 2] # We allocated too much earlier... 141 | for (edge_type, adj_list) in enumerate(type_to_adj_list): 142 | bwd_edge_type = self.num_edge_types // 2 + edge_type 143 | type_to_adj_list.append(np.array(sorted((y, x) for (x, y) in adj_list), dtype=np.int32)) 144 | for (x, y) in adj_list: 145 | type_to_num_incoming_edges[bwd_edge_type][y] += 1 146 | 147 | return type_to_adj_list, type_to_num_incoming_edges 148 | 149 | # -------------------- Model Construction -------------------- 150 | def make_task_output_model(self, 151 | placeholders: Dict[str, tf.Tensor], 152 | model_ops: Dict[str, tf.Tensor], 153 | ) -> None: 154 | placeholders['graph_nodes_list'] = \ 155 | tf.placeholder(dtype=tf.int32, shape=[None], name='graph_nodes_list') 156 | placeholders['target_values'] = \ 157 | tf.placeholder(dtype=tf.float32, shape=[len(self.params['task_ids']), None], name='target_values') 158 | placeholders['out_layer_dropout_keep_prob'] = \ 159 | tf.placeholder(dtype=tf.float32, shape=[], name='out_layer_dropout_keep_prob') 160 | 161 | task_metrics = {} 162 | losses = [] 163 | for (internal_id, task_id) in enumerate(self.params['task_ids']): 164 | with tf.variable_scope("out_layer_task%i" % task_id): 165 | regression_gate = \ 166 | MLP(out_size=1, 167 | hidden_layers=[], 168 | use_biases=True, 169 | dropout_rate=1.0 - placeholders['out_layer_dropout_keep_prob'], 170 | name="regression_gate") 171 | regression_transform = \ 172 | MLP(out_size=1, 173 | hidden_layers=[], 174 | use_biases=True, 175 | dropout_rate=1.0 - placeholders['out_layer_dropout_keep_prob'], 176 | name="regression") 177 | 178 | per_node_outputs = regression_transform(model_ops['final_node_representations']) 179 | gate_input = tf.concat([model_ops['final_node_representations'], 180 | model_ops['initial_node_features']], 181 | axis=-1) 182 | per_node_gated_outputs = tf.nn.sigmoid(regression_gate(gate_input)) * per_node_outputs 183 | 184 | # Sum up all nodes per-graph 185 | per_graph_outputs = tf.unsorted_segment_sum(data=per_node_gated_outputs, 186 | segment_ids=placeholders['graph_nodes_list'], 187 | num_segments=placeholders['num_graphs']) 188 | per_graph_outputs = tf.squeeze(per_graph_outputs) # [g] 189 | 190 | per_graph_errors = per_graph_outputs - placeholders['target_values'][internal_id, :] 191 | task_metrics['abs_err_task%i' % task_id] = tf.reduce_sum(tf.abs(per_graph_errors)) 192 | tf.summary.scalar('mae_task%i' % task_id, 193 | task_metrics['abs_err_task%i' % task_id] / tf.cast(placeholders['num_graphs'], tf.float32)) 194 | losses.append(tf.reduce_mean(0.5 * tf.square(per_graph_errors))) 195 | model_ops['task_metrics'] = task_metrics 196 | model_ops['task_metrics']['loss'] = tf.reduce_sum(losses) 197 | model_ops['task_metrics']['total_loss'] = model_ops['task_metrics']['loss'] * tf.cast(placeholders['num_graphs'], tf.float32) 198 | 199 | # -------------------- Minibatching and training loop -------------------- 200 | def make_minibatch_iterator(self, 201 | data: Iterable[Any], 202 | data_fold: DataFold, 203 | model_placeholders: Dict[str, tf.Tensor], 204 | max_nodes_per_batch: int) \ 205 | -> Iterable[MinibatchData]: 206 | if data_fold == DataFold.TRAIN: 207 | np.random.shuffle(data) 208 | out_layer_dropout_keep_prob = self.params['out_layer_dropout_keep_prob'] 209 | else: 210 | out_layer_dropout_keep_prob = 1.0 211 | 212 | # Pack until we cannot fit more graphs in the batch 213 | num_graphs = 0 214 | while num_graphs < len(data): 215 | num_graphs_in_batch = 0 216 | batch_node_features = [] # type: List[np.ndarray] 217 | batch_target_task_values = [] 218 | batch_adjacency_lists = [[] for _ in range(self.num_edge_types)] # type: List[List[np.ndarray]] 219 | batch_type_to_num_incoming_edges = [] 220 | batch_graph_nodes_list = [] 221 | node_offset = 0 222 | 223 | while num_graphs < len(data) and node_offset + len(data[num_graphs].node_features) < max_nodes_per_batch: 224 | cur_graph = data[num_graphs] 225 | num_nodes_in_graph = len(cur_graph.node_features) 226 | batch_node_features.extend(cur_graph.node_features) 227 | batch_graph_nodes_list.append(np.full(shape=[num_nodes_in_graph], 228 | fill_value=num_graphs_in_batch, 229 | dtype=np.int32)) 230 | for i in range(self.num_edge_types): 231 | batch_adjacency_lists[i].append(cur_graph.adjacency_lists[i] + node_offset) 232 | 233 | # Turn counters for incoming edges into np array: 234 | batch_type_to_num_incoming_edges.append(cur_graph.type_to_node_to_num_incoming_edges) 235 | batch_target_task_values.append(cur_graph.target_values) 236 | num_graphs += 1 237 | num_graphs_in_batch += 1 238 | node_offset += num_nodes_in_graph 239 | 240 | batch_feed_dict = { 241 | model_placeholders['initial_node_features']: np.array(batch_node_features), 242 | model_placeholders['type_to_num_incoming_edges']: np.concatenate(batch_type_to_num_incoming_edges, axis=1), 243 | model_placeholders['graph_nodes_list']: np.concatenate(batch_graph_nodes_list), 244 | model_placeholders['target_values']: np.transpose(batch_target_task_values, axes=[1, 0]), 245 | model_placeholders['out_layer_dropout_keep_prob']: out_layer_dropout_keep_prob, 246 | } 247 | 248 | # Merge adjacency lists: 249 | num_edges = 0 250 | for i in range(self.num_edge_types): 251 | if len(batch_adjacency_lists[i]) > 0: 252 | adj_list = np.concatenate(batch_adjacency_lists[i]) 253 | else: 254 | adj_list = np.zeros((0, 2), dtype=np.int32) 255 | num_edges += adj_list.shape[0] 256 | batch_feed_dict[model_placeholders['adjacency_lists'][i]] = adj_list 257 | 258 | yield MinibatchData(feed_dict=batch_feed_dict, 259 | num_graphs=num_graphs_in_batch, 260 | num_nodes=node_offset, 261 | num_edges=num_edges) 262 | 263 | def early_stopping_metric(self, task_metric_results: List[Dict[str, np.ndarray]], num_graphs: int) -> float: 264 | # Early stopping based on average loss: 265 | return np.sum([m['total_loss'] for m in task_metric_results]) / num_graphs 266 | 267 | def pretty_print_epoch_task_metrics(self, task_metric_results: List[Dict[str, np.ndarray]], num_graphs: int) -> str: 268 | maes = {} 269 | for task_id in self.params['task_ids']: 270 | maes['mae_task%i' % task_id] = 0. 271 | fnum_graphs = float(num_graphs) 272 | for batch_task_metric_results in task_metric_results: 273 | for task_id in self.params['task_ids']: 274 | maes['mae_task%i' % task_id] += batch_task_metric_results['abs_err_task%i' % task_id] / fnum_graphs 275 | 276 | maes_str = " ".join("%i:%.5f" % (task_id, maes['mae_task%i' % task_id]) 277 | for task_id in self.params['task_ids']) 278 | # The following translates back from MAE on the property values normalised to the [0,1] range to the original scale: 279 | err_str = " ".join("%i:%.5f" % (task_id, maes['mae_task%i' % task_id] / self.CHEMICAL_ACC_NORMALISING_FACTORS[task_id]) 280 | for task_id in self.params['task_ids']) 281 | 282 | return "MAEs: %s | Error Ratios: %s" % (maes_str, err_str) 283 | -------------------------------------------------------------------------------- /tasks/sparse_graph_task.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from enum import Enum 3 | from typing import Any, Dict, Iterable, List, NamedTuple, Iterator, Optional 4 | 5 | import tensorflow as tf 6 | import numpy as np 7 | from dpu_utils.utils import RichPath 8 | 9 | 10 | class DataFold(Enum): 11 | TRAIN = 0 12 | VALIDATION = 1 13 | TEST = 2 14 | 15 | 16 | class MinibatchData(NamedTuple): 17 | feed_dict: Dict[str, tf.Tensor] 18 | num_graphs: int 19 | num_nodes: int 20 | num_edges: int 21 | 22 | 23 | class Sparse_Graph_Task(ABC): 24 | """ 25 | Abstract superclass of all graph tasks, defining the interface used by the 26 | remainder of the code to interact with a task. 27 | """ 28 | @classmethod 29 | def default_params(cls): 30 | return {} 31 | 32 | @staticmethod 33 | @abstractmethod 34 | def default_data_path() -> str: 35 | raise NotImplementedError() 36 | 37 | @staticmethod 38 | @abstractmethod 39 | def name() -> str: 40 | raise NotImplementedError() 41 | 42 | def __init__(self, params: Dict[str, Any]): 43 | self.params = params 44 | self._loaded_data = {} # type: Dict[DataFold, Any] 45 | 46 | def get_metadata(self) -> Dict[str, Any]: 47 | """ 48 | Returns: 49 | Dictionary with all metadata that defines this task, for example parameters 50 | or vocabularies. 51 | """ 52 | return {"params": self.params} 53 | 54 | def restore_from_metadata(self, metadata: Dict[str, Any]) -> None: 55 | """ 56 | Set up task to match passed metadata, e.g., by using the passed vocabulary. 57 | The input can be expected to be an output of get_metadata from another run. 58 | """ 59 | self.params = metadata["params"] 60 | 61 | @property 62 | @abstractmethod 63 | def num_edge_types(self) -> int: 64 | """ 65 | Returns: 66 | Number of edge types used in the dataset. 67 | """ 68 | raise NotImplementedError() 69 | 70 | @property 71 | @abstractmethod 72 | def initial_node_feature_size(self) -> int: 73 | """ 74 | Return: 75 | Size of the initial node representation. 76 | """ 77 | raise NotImplementedError() 78 | 79 | @property 80 | def has_test_data(self) -> bool: 81 | return DataFold.TEST in self._loaded_data 82 | 83 | @abstractmethod 84 | def load_data(self, path: Optional[RichPath]) -> None: 85 | """ 86 | Load data required to train on this task into memory. 87 | 88 | Arguments: 89 | path: Optional path to load from, if not specified, will use task-specific 90 | default under "./data/". 91 | """ 92 | raise NotImplementedError() 93 | 94 | def load_eval_data_from_path(self, path: RichPath) -> Iterable[Any]: 95 | """ 96 | Load data from a given path for evaluation purposes. 97 | 98 | Arguments: 99 | path: Depending on the task a file or directory containing data to load. 100 | 101 | Returns: 102 | An iterator over graph samples, suitable for being passed into 103 | task.make_minibatch_iterator(). 104 | """ 105 | raise NotImplementedError() 106 | 107 | def make_task_input_model(self, 108 | placeholders: Dict[str, tf.Tensor], 109 | model_ops: Dict[str, tf.Tensor], 110 | ) -> None: 111 | """ 112 | Create a task-specific input model. The default implementation 113 | simply creates placeholders to feed the input in, but more advanced 114 | variants could include sub-networks determining node features, 115 | for example. 116 | 117 | This method cannot assume the placeholders or model_ops dictionaries 118 | to be pre-populated, and needs to add at least the following 119 | entries to model_ops: 120 | * 'initial_node_features': float32 tensor of shape [V, D], where V 121 | is the number of nodes and D is the initial hidden dimension 122 | (needs to match the value of task.initial_node_feature_size). 123 | * 'adjacency_lists': list of L int32 tensors of shape [E, 2], where 124 | L is the number of edge types and E the number of edges of that 125 | type. 126 | Hence, adjacency_lists[l][e,:] == [u, v] means that u has an edge 127 | of type l to v. 128 | * 'type_to_num_incoming_edges': int32 tensor of shape [L, V], where 129 | L is the number of edge types and V the number of nodes. 130 | type_to_num_incoming_edges[l, v] = k indicates that node v has k 131 | incoming edges of type l. 132 | 133 | Arguments: 134 | placeholders: Dictionary of placeholders used by the model, to 135 | be extended with task-specific placeholders. 136 | model_ops: Dictionary of named operations in the model, to 137 | be extended with task-specific operations. 138 | """ 139 | placeholders['initial_node_features'] = \ 140 | tf.placeholder(dtype=tf.float32, shape=[None, self.initial_node_feature_size], name='initial_node_features') 141 | placeholders['adjacency_lists'] = \ 142 | [tf.placeholder(dtype=tf.int32, shape=[None, 2], name='adjacency_e%s' % e) 143 | for e in range(self.num_edge_types)] 144 | placeholders['type_to_num_incoming_edges'] = \ 145 | tf.placeholder(dtype=tf.float32, shape=[self.num_edge_types, None], name='type_to_num_incoming_edges') 146 | 147 | model_ops['initial_node_features'] = placeholders['initial_node_features'] 148 | model_ops['adjacency_lists'] = placeholders['adjacency_lists'] 149 | model_ops['type_to_num_incoming_edges'] = placeholders['type_to_num_incoming_edges'] 150 | 151 | @abstractmethod 152 | def make_task_output_model(self, 153 | placeholders: Dict[str, tf.Tensor], 154 | model_ops: Dict[str, tf.Tensor], 155 | ) -> None: 156 | """ 157 | Create task-specific output model. For this, additional placeholders 158 | can be created, but will need to be filled in the 159 | make_minibatch_iterator implementation. 160 | 161 | This method may assume existence of the placeholders and ops created in 162 | make_task_input_model and of the following: 163 | model_ops['final_node_representations']: a float32 tensor of shape 164 | [V, D], which holds the final node representations after the 165 | GNN layers. 166 | placeholders['num_graphs']: a int32 scalar holding the number of 167 | graphs in this batch. 168 | Order of nodes is preserved across all tensors. 169 | 170 | This method has to define model_ops['task_metrics'] to a dictionary, 171 | from which model_ops['task_metrics']['loss'] will be used for 172 | optimization. Other entries may hold additional metrics (accuracy, 173 | MAE, ...). 174 | 175 | Arguments: 176 | placeholders: Dictionary of placeholders used by the model, 177 | pre-populated by the generic graph model values, and to 178 | be extended with task-specific placeholders. 179 | model_ops: Dictionary of named operations in the model, 180 | pre-populated by the generic graph model values, and to 181 | be extended with task-specific operations. 182 | """ 183 | raise NotImplementedError() 184 | 185 | @abstractmethod 186 | def make_minibatch_iterator(self, 187 | data: Iterable[Any], 188 | data_fold: DataFold, 189 | model_placeholders: Dict[str, tf.Tensor], 190 | max_nodes_per_batch: int, 191 | ) -> Iterator[MinibatchData]: 192 | """ 193 | Create minibatches for a sparse graph model, usually by flattening 194 | many smaller graphs into one large graphs of disconnected components. 195 | This should produce one epoch's worth of minibatches. 196 | 197 | Arguments: 198 | data: Data to iterate over, created by either load_data or 199 | load_eval_data_from_path. 200 | data_fold: Fold of the loaded data to iterate over. 201 | model_placeholders: The placeholders of the model that need to be 202 | filled with data. Aside from the placeholders introduced by the 203 | task in make_task_input_model and make_task_output_model. 204 | max_nodes_per_batch: Maximal number of nodes that can be packed 205 | into one batch. 206 | 207 | Returns: 208 | Iterator over MinibatchData values, which provide feed dicts 209 | as well as some batch statistics. 210 | """ 211 | raise NotImplementedError() 212 | 213 | @abstractmethod 214 | def early_stopping_metric(self, 215 | task_metric_results: List[Dict[str, np.ndarray]], 216 | num_graphs: int, 217 | ) -> float: 218 | """ 219 | Given the results of the task's metric for all minibatches of an 220 | epoch, produce a metric that should go down (e.g., loss). This is used 221 | for early stopping of training. 222 | 223 | Arguments: 224 | task_metric_results: List of the values of model_ops['task_metrics'] 225 | (defined in make_task_model) for each of the minibatches produced 226 | by make_minibatch_iterator. 227 | num_graphs: Number of graphs processed in this epoch. 228 | 229 | Returns: 230 | Numeric value, where a lower value indicates more desirable results. 231 | """ 232 | raise NotImplementedError() 233 | 234 | @abstractmethod 235 | def pretty_print_epoch_task_metrics(self, 236 | task_metric_results: List[Dict[str, np.ndarray]], 237 | num_graphs: int, 238 | ) -> str: 239 | """ 240 | Given the results of the task's metric for all minibatches of an 241 | epoch, produce a human-readable result for the epoch (e.g., average 242 | accuracy). 243 | 244 | Arguments: 245 | task_metric_results: List of the values of model_ops['task_metrics'] 246 | (defined in make_task_model) for each of the minibatches produced 247 | by make_minibatch_iterator. 248 | num_graphs: Number of graphs processed in this epoch. 249 | 250 | Returns: 251 | String representation of the task-specific metrics for this epoch, 252 | e.g., mean absolute error for a regression task. 253 | """ 254 | raise NotImplementedError() 255 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Usage: 4 | test.py [options] STORED_MODEL_PATH [DATA_PATH] 5 | 6 | STORED_MODEL is the path of a model snapshot created by train.py. 7 | DATA_PATH is the location of the data to test on. 8 | 9 | Options: 10 | -h --help Show this screen. 11 | --result-dir DIR Directory to store logfiles and trained models. [default: trained_models] 12 | --azure-info PATH Azure authentication information file (JSON). [default: azure_auth.json] 13 | --quiet Show less output. 14 | --debug Turn on debugger. 15 | """ 16 | import json 17 | from typing import Optional 18 | 19 | from docopt import docopt 20 | from dpu_utils.utils import run_and_debug, RichPath 21 | 22 | from utils.model_utils import restore 23 | 24 | 25 | def test(model_path: str, test_data_path: Optional[RichPath], result_dir: str, quiet: bool = False, run_id: str = None): 26 | model = restore(model_path, result_dir, run_id) 27 | model.params['max_nodes_in_batch'] = 2 * model.params['max_nodes_in_batch'] # We can process larger batches if we don't do training 28 | test_data_path = test_data_path or RichPath.create(model.task.default_data_path()) 29 | model.log_line(" Using the following task params: %s" % json.dumps(model.task.params)) 30 | model.log_line(" Using the following model params: %s" % json.dumps(model.params)) 31 | model.test(test_data_path) 32 | 33 | 34 | def run(args): 35 | azure_info_path = args.get('--azure-info', None) 36 | model_path = args['STORED_MODEL_PATH'] 37 | test_data_path = args.get('DATA_PATH') 38 | if test_data_path is not None: 39 | test_data_path = RichPath.create(test_data_path, azure_info_path) 40 | result_dir = args.get('--result-dir', 'trained_models') 41 | test(model_path, test_data_path, result_dir, quiet=args.get('--quiet')) 42 | 43 | 44 | if __name__ == "__main__": 45 | args = docopt(__doc__) 46 | run_and_debug(lambda: run(args), enable_debugging=args['--debug']) 47 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Usage: 4 | train.py [options] MODEL_NAME TASK_NAME 5 | 6 | MODEL_NAME has to be one of the supported models, which currently are 7 | GGNN, GNN-Edge-MLP, GNN-FiLM, RGAT, RGCN, RGDCN 8 | 9 | Options: 10 | -h --help Show this screen. 11 | --data-path PATH Path to load data from, has task-specific defaults under data/. 12 | --result-dir DIR Directory to store logfiles and trained models. [default: trained_models] 13 | --run-test Indicate if the task's test should be run. 14 | --model-param-overrides PARAMS Parameter settings overriding model defaults (in JSON format). 15 | --task-param-overrides PARAMS Parameter settings overriding task defaults (in JSON format). 16 | --quiet Show less output. 17 | --tensorboard DIR Dump tensorboard event files to DIR. 18 | --azure-info= Azure authentication information file (JSON). [default: azure_auth.json] 19 | --debug Turn on debugger. 20 | """ 21 | import json 22 | import os 23 | import sys 24 | import time 25 | 26 | from docopt import docopt 27 | from dpu_utils.utils import run_and_debug, RichPath, git_tag_run 28 | 29 | from utils.model_utils import name_to_model_class, name_to_task_class 30 | from test import test 31 | 32 | 33 | def run(args): 34 | azure_info_path = args.get('--azure-info', None) 35 | model_cls, additional_model_params = name_to_model_class(args['MODEL_NAME']) 36 | task_cls, additional_task_params = name_to_task_class(args['TASK_NAME']) 37 | 38 | # Collect parameters from first the class defaults, potential task defaults, and then CLI: 39 | task_params = task_cls.default_params() 40 | task_params.update(additional_task_params) 41 | model_params = model_cls.default_params() 42 | model_params.update(additional_model_params) 43 | 44 | # Load potential task-specific defaults: 45 | task_model_default_hypers_file = \ 46 | os.path.join(os.path.dirname(__file__), 47 | "tasks", 48 | "default_hypers", 49 | "%s_%s.json" % (task_cls.name(), model_cls.name(model_params))) 50 | if os.path.exists(task_model_default_hypers_file): 51 | print("Loading task/model-specific default parameters from %s." % task_model_default_hypers_file) 52 | with open(task_model_default_hypers_file, "rt") as f: 53 | default_task_model_hypers = json.load(f) 54 | task_params.update(default_task_model_hypers['task_params']) 55 | model_params.update(default_task_model_hypers['model_params']) 56 | 57 | # Load overrides from command line: 58 | task_params.update(json.loads(args.get('--task-param-overrides') or '{}')) 59 | model_params.update(json.loads(args.get('--model-param-overrides') or '{}')) 60 | 61 | # Finally, upgrade every parameters that's a path to a RichPath: 62 | task_params_orig = dict(task_params) 63 | for (param_name, param_value) in task_params.items(): 64 | if param_name.endswith("_path"): 65 | task_params[param_name] = RichPath.create(param_value, azure_info_path) 66 | 67 | # Now prepare to actually run by setting up directories, creating object instances and running: 68 | result_dir = args.get('--result-dir', 'trained_models') 69 | os.makedirs(result_dir, exist_ok=True) 70 | task = task_cls(task_params) 71 | data_path = args.get('--data-path') or task.default_data_path() 72 | data_path = RichPath.create(data_path, azure_info_path) 73 | task.load_data(data_path) 74 | 75 | random_seeds = model_params['random_seed'] 76 | if not isinstance(random_seeds, list): 77 | random_seeds = [random_seeds] 78 | 79 | for random_seed in random_seeds: 80 | model_params['random_seed'] = random_seed 81 | run_id = "_".join([task_cls.name(), model_cls.name(model_params), time.strftime("%Y-%m-%d-%H-%M-%S"), str(os.getpid())]) 82 | 83 | model = model_cls(model_params, task, run_id, result_dir) 84 | model.log_line("Run %s starting." % run_id) 85 | model.log_line(" Using the following task params: %s" % json.dumps(task_params_orig)) 86 | model.log_line(" Using the following model params: %s" % json.dumps(model_params)) 87 | 88 | if sys.stdin.isatty(): 89 | try: 90 | git_sha = git_tag_run(run_id) 91 | model.log_line(" git tagged as %s" % git_sha) 92 | except: 93 | print(" Tried tagging run in git, but failed.") 94 | pass 95 | 96 | model.initialize_model() 97 | model.train(quiet=args.get('--quiet'), tf_summary_path=args.get('--tensorboard')) 98 | 99 | if args.get('--run-test'): 100 | test(model.best_model_file, data_path, result_dir, quiet=args.get('--quiet'), run_id=run_id) 101 | 102 | 103 | if __name__ == "__main__": 104 | args = docopt(__doc__) 105 | run_and_debug(lambda: run(args), enable_debugging=args['--debug']) 106 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import SMALL_NUMBER, BIG_NUMBER, get_gated_unit, get_aggregation_function, get_activation, MLP, micro_f1 2 | -------------------------------------------------------------------------------- /utils/citation_network_utils.py: -------------------------------------------------------------------------------- 1 | # This is largely copied from https://raw.githubusercontent.com/tkipf/gcn/master/gcn/utils.py 2 | # It is Copyright (c) 2016 Thomas Kipf, under the MIT license (see LICENSE for a copy) 3 | 4 | import numpy as np 5 | import pickle as pkl 6 | import scipy.sparse as sp 7 | import sys 8 | 9 | 10 | def parse_index_file(filename): 11 | """Parse index file.""" 12 | index = [] 13 | for line in open(filename): 14 | index.append(int(line.strip())) 15 | return index 16 | 17 | 18 | def sample_mask(idx, l): 19 | """Create mask.""" 20 | mask = np.zeros(l) 21 | mask[idx] = 1 22 | return np.array(mask, dtype=np.bool) 23 | 24 | 25 | def load_data(directory: str, dataset_str: str): 26 | """ 27 | Loads input data from gcn/data directory 28 | 29 | ind.dataset_str.x => the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object; 30 | ind.dataset_str.tx => the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object; 31 | ind.dataset_str.allx => the feature vectors of both labeled and unlabeled training instances 32 | (a superset of ind.dataset_str.x) as scipy.sparse.csr.csr_matrix object; 33 | ind.dataset_str.y => the one-hot labels of the labeled training instances as numpy.ndarray object; 34 | ind.dataset_str.ty => the one-hot labels of the test instances as numpy.ndarray object; 35 | ind.dataset_str.ally => the labels for instances in ind.dataset_str.allx as numpy.ndarray object; 36 | ind.dataset_str.graph => a dict in the format {index: [index_of_neighbor_nodes]} as collections.defaultdict 37 | object; 38 | ind.dataset_str.test.index => the indices of test instances in graph, for the inductive setting as list object. 39 | 40 | All objects above must be saved using python pickle module. 41 | 42 | :param dataset_str: Dataset name 43 | :return: All data input files loaded (as well the training/test data). 44 | """ 45 | names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph'] 46 | objects = [] 47 | for i in range(len(names)): 48 | with open("{}/ind.{}.{}".format(directory, dataset_str, names[i]), 'rb') as f: 49 | if sys.version_info > (3, 0): 50 | objects.append(pkl.load(f, encoding='latin1')) 51 | else: 52 | objects.append(pkl.load(f)) 53 | 54 | x, y, tx, ty, allx, ally, graph = tuple(objects) 55 | test_idx_reorder = parse_index_file("{}/ind.{}.test.index".format(directory, dataset_str)) 56 | test_idx_range = np.sort(test_idx_reorder) 57 | 58 | if dataset_str == 'citeseer': 59 | # Fix citeseer dataset (there are some isolated nodes in the graph) 60 | # Find isolated nodes, add them as zero-vecs into the right position 61 | test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1) 62 | tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1])) 63 | tx_extended[test_idx_range-min(test_idx_range), :] = tx 64 | tx = tx_extended 65 | ty_extended = np.zeros((len(test_idx_range_full), y.shape[1])) 66 | ty_extended[test_idx_range-min(test_idx_range), :] = ty 67 | ty = ty_extended 68 | 69 | features = sp.vstack((allx, tx)).tolil() 70 | features[test_idx_reorder, :] = features[test_idx_range, :] 71 | 72 | labels = np.vstack((ally, ty)) 73 | labels[test_idx_reorder, :] = labels[test_idx_range, :] 74 | 75 | idx_test = test_idx_range.tolist() 76 | idx_train = range(len(y)) 77 | idx_val = range(len(y), len(y)+500) 78 | 79 | train_mask = sample_mask(idx_train, labels.shape[0]) 80 | val_mask = sample_mask(idx_val, labels.shape[0]) 81 | test_mask = sample_mask(idx_test, labels.shape[0]) 82 | 83 | y_train = np.zeros(labels.shape) 84 | y_val = np.zeros(labels.shape) 85 | y_test = np.zeros(labels.shape) 86 | y_train[train_mask, :] = labels[train_mask, :] 87 | y_val[val_mask, :] = labels[val_mask, :] 88 | y_test[test_mask, :] = labels[test_mask, :] 89 | 90 | return graph, features, y_train, y_val, y_test, train_mask, val_mask, test_mask 91 | 92 | 93 | def sparse_to_tuple(sparse_mx): 94 | """Convert sparse matrix to tuple representation.""" 95 | def to_tuple(mx): 96 | if not sp.isspmatrix_coo(mx): 97 | mx = mx.tocoo() 98 | coords = np.vstack((mx.row, mx.col)).transpose() 99 | values = mx.data 100 | shape = mx.shape 101 | # All of these will need to be sorted: 102 | sort_indices = np.lexsort(np.rot90(coords)) 103 | return coords[sort_indices], values[sort_indices], shape 104 | 105 | if isinstance(sparse_mx, list): 106 | for i in range(len(sparse_mx)): 107 | sparse_mx[i] = to_tuple(sparse_mx[i]) 108 | else: 109 | sparse_mx = to_tuple(sparse_mx) 110 | 111 | return sparse_mx 112 | 113 | 114 | def preprocess_features(features): 115 | """Row-normalize feature matrix and convert to tuple representation""" 116 | rowsum = np.array(features.sum(1)) 117 | r_inv = np.power(rowsum, -1).flatten() 118 | r_inv[np.isinf(r_inv)] = 0. 119 | r_mat_inv = sp.diags(r_inv) 120 | features = r_mat_inv.dot(features) 121 | return features.toarray() # densify -- these are tiny and we don't care 122 | 123 | 124 | def normalize_adj(adj): 125 | """Symmetrically normalize adjacency matrix.""" 126 | adj = sp.coo_matrix(adj) 127 | rowsum = np.array(adj.sum(1)) 128 | d_inv_sqrt = np.power(rowsum, -0.5).flatten() 129 | d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0. 130 | d_mat_inv_sqrt = sp.diags(d_inv_sqrt) 131 | return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo() 132 | 133 | 134 | def preprocess_adj(adj): 135 | """Preprocessing of adjacency matrix for simple GCN model and conversion to tuple representation.""" 136 | adj_normalized = normalize_adj(adj + sp.eye(adj.shape[0])) 137 | return sparse_to_tuple(adj_normalized) 138 | 139 | 140 | -------------------------------------------------------------------------------- /utils/model_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | from typing import Tuple, Type, Dict, Any 4 | 5 | import pickle 6 | 7 | from models import (Sparse_Graph_Model, GGNN_Model, GNN_FiLM_Model, GNN_Edge_MLP_Model, 8 | RGAT_Model, RGCN_Model, RGDCN_Model, RGIN_Model) 9 | from tasks import Sparse_Graph_Task, QM9_Task, Citation_Network_Task, PPI_Task, VarMisuse_Task 10 | 11 | 12 | def name_to_task_class(name: str) -> Tuple[Type[Sparse_Graph_Task], Dict[str, Any]]: 13 | name = name.lower() 14 | if name == "qm9": 15 | return QM9_Task, {} 16 | if name == "cora": 17 | return Citation_Network_Task, {"data_kind": "cora"} 18 | if name == "citeseer": 19 | return Citation_Network_Task, {"data_kind": "citeseer"} 20 | if name == "pubmed": 21 | return Citation_Network_Task, {"data_kind": "pubmed"} 22 | if name == "citationnetwork": 23 | return Citation_Network_Task, {} 24 | if name == "ppi": 25 | return PPI_Task, {} 26 | if name == "varmisuse": 27 | return VarMisuse_Task, {} 28 | 29 | raise ValueError("Unknown task type '%s'" % name) 30 | 31 | 32 | def name_to_model_class(name: str) -> Tuple[Type[Sparse_Graph_Model], Dict[str, Any]]: 33 | name = name.lower() 34 | if name in ["ggnn", "ggnn_model"]: 35 | return GGNN_Model, {} 36 | if name in ["gnn_edge_mlp", "gnn-edge-mlp", "gnn_edge_mlp_model"]: 37 | return GNN_Edge_MLP_Model, {} 38 | if name in ["gnn_edge_mlp0", "gnn-edge-mlp0", "gnn_edge_mlp0_model"]: 39 | return GNN_Edge_MLP_Model, {'num_edge_hidden_layers': 0} 40 | if name in ["gnn_edge_mlp1", "gnn-edge-mlp1", "gnn_edge_mlp1_model"]: 41 | return GNN_Edge_MLP_Model, {'num_edge_hidden_layers': 1} 42 | if name in ["gnn_edge_mlp", "gnn-edge-mlp"]: 43 | return GNN_Edge_MLP_Model, {} 44 | if name in ["gnn_film", "gnn-film", "gnn_film_model"]: 45 | return GNN_FiLM_Model, {} 46 | if name in ["rgat", "rgat_model"]: 47 | return RGAT_Model, {} 48 | if name in ["rgcn", "rgcn_model"]: 49 | return RGCN_Model, {} 50 | if name in ["rgdcn", "rgdcn_model"]: 51 | return RGDCN_Model, {} 52 | if name in ["rgin", "rgin_model"]: 53 | return RGIN_Model, {} 54 | 55 | raise ValueError("Unknown model type '%s'" % name) 56 | 57 | 58 | def restore(saved_model_path: str, result_dir: str, run_id: str = None) -> Sparse_Graph_Model: 59 | print("Loading model from file %s." % saved_model_path) 60 | with open(saved_model_path, 'rb') as in_file: 61 | data_to_load = pickle.load(in_file) 62 | 63 | model_cls, _ = name_to_model_class(data_to_load['model_class']) 64 | task_cls, additional_task_params = name_to_task_class(data_to_load['task_class']) 65 | 66 | if run_id is None: 67 | run_id = "_".join([task_cls.name(), model_cls.name(data_to_load['model_params']), time.strftime("%Y-%m-%d-%H-%M-%S"), str(os.getpid())]) 68 | 69 | task = task_cls(data_to_load['task_params']) 70 | task.restore_from_metadata(data_to_load['task_metadata']) 71 | 72 | model = model_cls(data_to_load['model_params'], task, run_id, result_dir) 73 | model.load_weights(data_to_load['weights']) 74 | 75 | model.log_line("Loaded model from snapshot %s." % saved_model_path) 76 | 77 | return model 78 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Callable, Union, List 2 | 3 | import tensorflow as tf 4 | 5 | 6 | BIG_NUMBER = 1e7 7 | SMALL_NUMBER = 1e-7 8 | 9 | 10 | def get_gated_unit(units: int, gated_unit: str, activation_function: str): 11 | activation_fn = get_activation(activation_function) 12 | gated_unit_name = gated_unit.lower() 13 | if gated_unit_name == 'rnn': 14 | return tf.keras.layers.SimpleRNNCell(units, activation=activation_fn) 15 | if gated_unit_name == 'gru': 16 | return tf.keras.layers.GRUCell(units, activation=activation_fn) 17 | if gated_unit_name == 'lstm': 18 | return tf.keras.layers.LSTMCell(units, activation=activation_fn) 19 | else: 20 | raise Exception("Unknown RNN cell type '%s'." % gated_unit) 21 | 22 | 23 | def get_aggregation_function(aggregation_fun: Optional[str]): 24 | if aggregation_fun in ['sum', 'unsorted_segment_sum']: 25 | return tf.unsorted_segment_sum 26 | if aggregation_fun in ['max', 'unsorted_segment_max']: 27 | return tf.unsorted_segment_max 28 | if aggregation_fun in ['mean', 'unsorted_segment_mean']: 29 | return tf.unsorted_segment_mean 30 | if aggregation_fun in ['sqrt_n', 'unsorted_segment_sqrt_n']: 31 | return tf.unsorted_segment_sqrt_n 32 | else: 33 | raise ValueError("Unknown aggregation function '%s'!" % aggregation_fun) 34 | 35 | 36 | def get_activation(activation_fun: Optional[str]): 37 | if activation_fun is None: 38 | return None 39 | activation_fun = activation_fun.lower() 40 | if activation_fun == 'linear': 41 | return None 42 | if activation_fun == 'tanh': 43 | return tf.tanh 44 | if activation_fun == 'relu': 45 | return tf.nn.relu 46 | if activation_fun == 'leaky_relu': 47 | return tf.nn.leaky_relu 48 | if activation_fun == 'elu': 49 | return tf.nn.elu 50 | if activation_fun == 'selu': 51 | return tf.nn.selu 52 | if activation_fun == 'gelu': 53 | def gelu(input_tensor): 54 | cdf = 0.5 * (1.0 + tf.erf(input_tensor / tf.sqrt(2.0))) 55 | return input_tensor * cdf 56 | return gelu 57 | else: 58 | raise ValueError("Unknown activation function '%s'!" % activation_fun) 59 | 60 | 61 | def micro_f1(logits, labels): 62 | # Everything on int, because who trusts float anyway? 63 | predicted = tf.round(tf.nn.sigmoid(logits)) 64 | predicted = tf.cast(predicted, dtype=tf.int32) 65 | labels = tf.cast(labels, dtype=tf.int32) 66 | 67 | true_pos = tf.count_nonzero(predicted * labels) 68 | false_pos = tf.count_nonzero(predicted * (labels - 1)) 69 | false_neg = tf.count_nonzero((predicted - 1) * labels) 70 | 71 | precision = true_pos / (true_pos + false_pos) 72 | recall = true_pos / (true_pos + false_neg) 73 | fmeasure = (2 * precision * recall) / (precision + recall) 74 | return tf.cast(fmeasure, tf.float32) 75 | 76 | 77 | class MLP(object): 78 | def __init__(self, 79 | out_size: int, 80 | hidden_layers: Union[List[int], int] = 1, 81 | use_biases: bool = False, 82 | activation_fun: Optional[Callable[[tf.Tensor], tf.Tensor]] = tf.nn.relu, 83 | dropout_rate: Union[float, tf.Tensor] = 0.0, 84 | name: Optional[str] = "MLP", 85 | ): 86 | """ 87 | Create new MLP with given number of hidden layers. 88 | 89 | Arguments: 90 | out_size: Dimensionality of output. 91 | hidden_layers: Either an integer determining number of hidden layers, who will have out_size units each; 92 | or list of integers whose lengths determines the number of hidden layers and whose contents the 93 | number of units in each layer. 94 | use_biases: Flag indicating use of bias in fully connected layers. 95 | activation_fun: Activation function applied between hidden layers (NB: the output of the MLP 96 | is always the direct result of a linear transformation) 97 | dropout_rate: Dropout applied to inputs of each MLP layer. 98 | """ 99 | if isinstance(hidden_layers, int): 100 | hidden_layer_sizes = [out_size] * hidden_layers 101 | else: 102 | hidden_layer_sizes = hidden_layers 103 | 104 | if len(hidden_layer_sizes) > 1: 105 | assert activation_fun is not None, "Multiple linear layers without an activation" 106 | 107 | self.__dropout_rate = dropout_rate 108 | self.__name = name 109 | with tf.variable_scope(self.__name): 110 | self.__layers = [] # type: List[tf.layers.Dense] 111 | for hidden_layer_size in hidden_layer_sizes: 112 | self.__layers.append(tf.layers.Dense(units=hidden_layer_size, 113 | use_bias=use_biases, 114 | activation=activation_fun)) 115 | # Output layer: 116 | self.__layers.append(tf.layers.Dense(units=out_size, 117 | use_bias=use_biases, 118 | activation=None)) 119 | 120 | def __call__(self, input: tf.Tensor) -> tf.Tensor: 121 | with tf.variable_scope(self.__name): 122 | activations = input 123 | for layer in self.__layers[:-1]: 124 | activations = tf.nn.dropout(activations, rate=self.__dropout_rate) 125 | activations = layer(activations) 126 | return self.__layers[-1](activations) 127 | -------------------------------------------------------------------------------- /utils/varmisuse_data_splitter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Usage: 4 | varmisuse_data_splitter.py [options] RAW_DATA_DIR OUT_DIR 5 | 6 | Reads in datapoints from a set of files and creates smaller files mixing these, in a format 7 | suitable for streaming them into the training process. 8 | 9 | Options: 10 | -h --help Show this screen. 11 | --chunk-size NUM Number of samples per output file. [default: 100] 12 | --num-workers NUM Number of worker processes. Defaults to number of CPU cores. 13 | --window-size NUM Number of samples to load before mixing and writing things out. [default: 5000] 14 | --azure-info= Azure authentication information file (JSON). [default: azure_auth.json] 15 | --debug Turn on debugger. 16 | """ 17 | from typing import List, Any 18 | 19 | import numpy as np 20 | from more_itertools import chunked 21 | from docopt import docopt 22 | from dpu_utils.utils import run_and_debug, RichPath 23 | from multiprocessing import Process, Queue, cpu_count 24 | 25 | 26 | def _data_loading_worker(file_queue: Queue, result_queue: Queue) -> None: 27 | while True: 28 | next_path = file_queue.get() 29 | if next_path is None: # Our signal that all files have been processed 30 | file_queue.put(None) # Signal to the other workers 31 | result_queue.put(None) # Signal to the controller that we are done 32 | break 33 | 34 | # Read the file and push examples out as soon as we get them: 35 | for raw_sample in next_path.read_by_file_suffix(): 36 | result_queue.put(raw_sample) 37 | 38 | 39 | def _write_data(out_dir: RichPath, window_idx: int, chunk_size: int, data_window: List[Any]): 40 | np.random.shuffle(data_window) 41 | for chunk_idx, data_chunk in enumerate(chunked(data_window, chunk_size)): 42 | out_file = out_dir.join('chunk_%i-%i.jsonl.gz' % (window_idx, chunk_idx)) 43 | out_file.save_as_compressed_file(data_chunk) 44 | 45 | 46 | def run(args): 47 | azure_info_path = args.get('--azure-info', None) 48 | in_dir = RichPath.create(args['RAW_DATA_DIR'], azure_info_path) 49 | out_dir = RichPath.create(args['OUT_DIR'], azure_info_path) 50 | out_dir.make_as_dir() 51 | 52 | num_workers = int(args.get('--num-workers') or cpu_count()) 53 | chunk_size = int(args['--chunk-size']) 54 | window_size = int(args['--window-size']) 55 | 56 | files_to_load = list(in_dir.iterate_filtered_files_in_dir("*.gz")) 57 | path_queue = Queue(maxsize=len(files_to_load) + 1) 58 | result_queue = Queue(1000) 59 | 60 | # Set up list of work to do: 61 | for path in files_to_load: 62 | path_queue.put(path) 63 | path_queue.put(None) # Signal for the end of the queue 64 | 65 | # Set up workers: 66 | workers = [] 67 | for _ in range(num_workers): 68 | workers.append(Process(target=_data_loading_worker, 69 | args=(path_queue, result_queue,))) 70 | workers[-1].start() 71 | 72 | # Consume the data: 73 | num_workers_terminated = 0 74 | data_window = [] 75 | window_idx = 0 76 | while num_workers_terminated < len(workers): 77 | parsed_sample = result_queue.get() 78 | if parsed_sample is None: 79 | num_workers_terminated += 1 # Worker signaled that it's done 80 | else: 81 | data_window.append(parsed_sample) 82 | if len(data_window) >= window_size: 83 | _write_data(out_dir, window_idx, chunk_size, data_window) 84 | data_window = [] 85 | window_idx += 1 86 | 87 | # Write out the remainder of the data: 88 | _write_data(out_dir, window_idx, chunk_size, data_window) 89 | 90 | # Clean up the workers: 91 | for worker in workers: 92 | worker.join() 93 | 94 | 95 | if __name__ == "__main__": 96 | args = docopt(__doc__) 97 | run_and_debug(lambda: run(args), enable_debugging=args['--debug']) 98 | --------------------------------------------------------------------------------