├── .gitignore ├── LICENSE ├── README.md ├── data ├── external │ └── .gitkeep ├── interim │ └── .gitkeep ├── log │ └── .gitkeep ├── processed │ └── .gitkeep └── raw │ └── .gitkeep ├── experiments ├── __init__.py ├── classification │ ├── baseline_evaluation.ipynb │ ├── baseline_lstm.py │ ├── baseline_merge.py │ ├── baseline_tfidf.py │ ├── graph_based_experiment.py │ ├── model_analysis_for_attention.ipynb │ └── model_analysis_for_graph_structure.ipynb ├── language_model │ ├── __init__.py │ ├── baseline.py │ └── baseline_test.py └── layers │ ├── __init__.py │ ├── gat_experiment.py │ ├── gat_experiment_base.py │ ├── gat_experiment_original.py │ ├── gat_experiment_original_without_attention.py │ └── gat_experiment_without_attention.py ├── gcn ├── __init__.py ├── base_trainer.py ├── classification │ ├── __init__.py │ ├── baseline.py │ ├── baseline_trainer.py │ ├── graph_based_classifier.py │ └── trainer.py ├── data │ ├── __init__.py │ ├── graph_dataset.py │ └── multi_nli_dataset.py ├── graph │ ├── __init__.py │ ├── dependency_graph.py │ ├── similarity_graph.py │ └── static_graph.py ├── language_model │ ├── baseline.py │ ├── similarity_graph_lm.py │ └── trainer.py ├── layers │ ├── __init__.py │ ├── attention_layer.py │ ├── graph_attention_layer.py │ ├── graph_attention_layer_before.py │ ├── graph_attention_layer_multi.py │ ├── graph_attention_layer_original.py │ └── projection_layer.py ├── metrics.py ├── util.py └── visualize │ ├── __init__.py │ └── draw.py ├── requirements.txt └── tests ├── __init__.py ├── classification ├── __init__.py ├── test_baseline_tfidf.py ├── test_baseline_trainer.py └── test_trainer.py ├── data ├── __init__.py ├── test_graph_dataset.py └── test_multi_nli_dataset.py ├── graph ├── __init__.py ├── test_dependency_graph.py ├── test_similarity_graph.py └── test_static_graph.py ├── language_model ├── __init__.py ├── test_similarity_graph_lm.py └── test_trainer.py ├── layers ├── __init__.py ├── simple_attention_layer.py ├── simple_attention_layer_multi.py ├── test_attention_layer.py ├── test_attention_on_graph.py └── test_graph_attention_layer.py └── visualize ├── __init__.py └── test_draw.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | .vscode/ 107 | data/ 108 | !gcn/data/ 109 | !tests/data/ 110 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Takahiro Kubo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Graph Convolution for NLP 2 | 3 | Research project to apply Graph Convolution to NLP. 4 | 5 | ## Research 6 | 7 | 1. [Why & What is Graph Convolution](https://medium.com/programming-soda/graph-convolution%E3%82%92%E8%87%AA%E7%84%B6%E8%A8%80%E8%AA%9E%E5%87%A6%E7%90%86%E3%81%AB%E5%BF%9C%E7%94%A8%E3%81%99%E3%82%8B-part1-b792d53c4c18) 8 | 2. [Implementation of Graph Convolution: Graph Attention Network](https://medium.com/programming-soda/graph-convolution%E3%82%92%E8%87%AA%E7%84%B6%E8%A8%80%E8%AA%9E%E5%87%A6%E7%90%86%E3%81%AB%E5%BF%9C%E7%94%A8%E3%81%99%E3%82%8B-part2-dd0f9bc25dd3) 9 | 3. [Kinds of task in Graph](https://medium.com/programming-soda/graph-convolution%E3%82%92%E8%87%AA%E7%84%B6%E8%A8%80%E8%AA%9E%E5%87%A6%E7%90%86%E3%81%AB%E5%BF%9C%E7%94%A8%E3%81%99%E3%82%8B-part3-12e7458f31fb) 10 | 4. [Case study to use Graph Convolution in NLP](https://medium.com/programming-soda/graph-convolution%E3%82%92%E8%87%AA%E7%84%B6%E8%A8%80%E8%AA%9E%E5%87%A6%E7%90%86%E3%81%AB%E5%BF%9C%E7%94%A8%E3%81%99%E3%82%8B-part4-4b0082ce26da) 11 | 5. [Design of Experiment to verify effectiveness of Graph Convolution](https://medium.com/programming-soda/graph-convolution%E3%82%92%E8%87%AA%E7%84%B6%E8%A8%80%E8%AA%9E%E5%87%A6%E7%90%86%E3%81%AB%E5%BF%9C%E7%94%A8%E3%81%99%E3%82%8B-part5-c833f01fde58) 12 | 6. [Rethink the property of Graph Convolution and find appropriate task in NLP](https://medium.com/programming-soda/graph-convolution%E3%82%92%E8%87%AA%E7%84%B6%E8%A8%80%E8%AA%9E%E5%87%A6%E7%90%86%E3%81%AB%E5%BF%9C%E7%94%A8%E3%81%99%E3%82%8B-part6-f4596b2bcc93) 13 | 7. [Research summary of Graph Convolution in NLP](https://medium.com/programming-soda/graph-convolution%E3%82%92%E8%87%AA%E7%84%B6%E8%A8%80%E8%AA%9E%E5%87%A6%E7%90%86%E3%81%AB%E5%BF%9C%E7%94%A8%E3%81%99%E3%82%8B-part7-end-3f6812ca08cf) 14 | 15 | ## Implementation 16 | 17 | * [Language Modeling](https://medium.com/programming-soda/%E8%A8%80%E8%AA%9E%E3%83%A2%E3%83%87%E3%83%AB%E3%81%AE%E6%80%A7%E8%83%BD%E3%81%8C-%E5%AE%9F%E8%A3%85%E3%81%AB%E3%82%88%E3%82%8A%E7%95%B0%E3%81%AA%E3%82%8B%E4%BB%B6%E3%82%92%E8%A7%A3%E6%B1%BA%E3%81%99%E3%82%8B-5d36c841fcac) 18 | * Graph Attention Network Layer 19 | * [Fix to support batch data](https://medium.com/programming-soda/graph-attention-network-layer%E3%82%92%E5%AE%9F%E8%A3%85%E3%81%99%E3%82%8B-part1-4a199372b3de) 20 | * [Experiment by existing dataset](https://medium.com/programming-soda/graph-attention-network-layer%E3%82%92%E5%AE%9F%E8%A3%85%E3%81%99%E3%82%8B-part1-4a199372b3de) 21 | * [What is really effective attention method on Graph?](https://medium.com/programming-soda/graph-attention-network-layer%E3%82%92%E5%AE%9F%E8%A3%85%E3%81%99%E3%82%8B-part3-ce3548c3aa5c) 22 | * Graph Convolution for Text Classification 23 | * [Prepare the Dataset](https://medium.com/programming-soda/graph-convolution%E3%81%A7%E8%87%AA%E7%84%B6%E8%A8%80%E8%AA%9E%E5%87%A6%E7%90%86%E3%82%92%E8%A1%8C%E3%81%86-%E3%83%86%E3%82%AD%E3%82%B9%E3%83%88%E5%88%86%E9%A1%9E%E7%B7%A8-part1-3eacc11eb622) 24 | * [Make baseline model and analysis](https://medium.com/programming-soda/graph-convolution%E3%81%A7%E8%87%AA%E7%84%B6%E8%A8%80%E8%AA%9E%E5%87%A6%E7%90%86%E3%82%92%E8%A1%8C%E3%81%86-%E3%83%86%E3%82%AD%E3%82%B9%E3%83%88%E5%88%86%E9%A1%9E%E7%B7%A8-part2-b0f1f0a67b17) 25 | * [Make graph convolution model](https://medium.com/programming-soda/graph-convolution%E3%81%A7%E8%87%AA%E7%84%B6%E8%A8%80%E8%AA%9E%E5%87%A6%E7%90%86%E3%82%92%E8%A1%8C%E3%81%86-%E3%83%86%E3%82%AD%E3%82%B9%E3%83%88%E5%88%86%E9%A1%9E%E7%B7%A8-part3-b85acee1a3e8) 26 | * [Analyze graph convolution model](https://medium.com/programming-soda/graph-convolution%E3%81%A7%E8%87%AA%E7%84%B6%E8%A8%80%E8%AA%9E%E5%87%A6%E7%90%86%E3%82%92%E8%A1%8C%E3%81%86-%E3%83%86%E3%82%AD%E3%82%B9%E3%83%88%E5%88%86%E9%A1%9E%E7%B7%A8-part4-caee203b86af) 27 | * [Enhance graph convolution model](https://medium.com/programming-soda/graph-convolution%E3%81%A7%E8%87%AA%E7%84%B6%E8%A8%80%E8%AA%9E%E5%87%A6%E7%90%86%E3%82%92%E8%A1%8C%E3%81%86-%E3%83%86%E3%82%AD%E3%82%B9%E3%83%88%E5%88%86%E9%A1%9E%E7%B7%A8-part5-end-cc9b0b4aac06) 28 | 29 | ## Paper Reading 30 | 31 | * [How Powerful are Graph Neural Networks?](https://medium.com/programming-soda/graph-neural-network%E3%81%AE%E5%87%A6%E7%90%86%E3%81%A8%E5%8A%B9%E6%9E%9C%E3%82%92%E7%90%86%E8%A7%A3%E3%81%99%E3%82%8B-how-powerful-are-graph-neural-networks-a26ee9245cce) 32 | -------------------------------------------------------------------------------- /data/external/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/data/external/.gitkeep -------------------------------------------------------------------------------- /data/interim/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/data/interim/.gitkeep -------------------------------------------------------------------------------- /data/log/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/data/log/.gitkeep -------------------------------------------------------------------------------- /data/processed/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/data/processed/.gitkeep -------------------------------------------------------------------------------- /data/raw/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/data/raw/.gitkeep -------------------------------------------------------------------------------- /experiments/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/experiments/__init__.py -------------------------------------------------------------------------------- /experiments/classification/baseline_lstm.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy as np 4 | from sklearn.metrics import classification_report 5 | sys.path.append(os.path.join(os.path.dirname(__file__), "../../")) 6 | from gcn.data.multi_nli_dataset import MultiNLIDataset 7 | from gcn.classification.baseline import LSTMClassifier 8 | from gcn.classification.baseline_trainer import BaselineTrainer 9 | 10 | 11 | def main(): 12 | root = os.path.join(os.path.dirname(__file__), "../../") 13 | dataset = MultiNLIDataset(root) 14 | trainer = BaselineTrainer(root, log_dir="classifier_baseline") 15 | trainer.build() 16 | sequence_length = 25 17 | 18 | vocab_size = len(trainer.preprocessor.vocabulary.get()) 19 | 20 | def preprocessor(x): 21 | _x = trainer.preprocess(x, sequence_length) 22 | return _x["text"] 23 | 24 | model = LSTMClassifier(vocab_size) 25 | model.build(trainer.num_classes, preprocessor) 26 | 27 | metrics = trainer.train(model.model, epochs=25, 28 | sequence_length=sequence_length, 29 | representation="GloVe.6B.100d") 30 | 31 | test_data = dataset.test_data() 32 | y_pred = model.predict(test_data["text"]) 33 | 34 | print(classification_report(test_data["label"], y_pred, 35 | target_names=dataset.labels())) 36 | 37 | 38 | if __name__ == "__main__": 39 | main() 40 | -------------------------------------------------------------------------------- /experiments/classification/baseline_merge.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy as np 4 | from sklearn.metrics import classification_report 5 | sys.path.append(os.path.join(os.path.dirname(__file__), "../../")) 6 | from gcn.data.multi_nli_dataset import MultiNLIDataset 7 | from gcn.classification.baseline import MergeClassifier 8 | from gcn.classification.baseline_trainer import BaselineTrainer 9 | 10 | 11 | def main(): 12 | root = os.path.join(os.path.dirname(__file__), "../../") 13 | dataset = MultiNLIDataset(root) 14 | trainer = BaselineTrainer(root, log_dir="classifier_baseline") 15 | trainer.build() 16 | sequence_length = 25 17 | 18 | vocab_size = len(trainer.preprocessor.vocabulary.get()) 19 | 20 | def preprocessor(x): 21 | _x = trainer.preprocess(x, sequence_length) 22 | return _x["text"] 23 | 24 | model = MergeClassifier(vocab_size) 25 | model.build(trainer.num_classes, preprocessor) 26 | 27 | metrics = trainer.train(model.model, epochs=25, 28 | sequence_length=sequence_length, 29 | representation="GloVe.6B.100d") 30 | 31 | test_data = dataset.test_data() 32 | y_pred = model.predict(test_data["text"]) 33 | 34 | print(classification_report(test_data["label"], y_pred, 35 | target_names=dataset.labels())) 36 | 37 | 38 | if __name__ == "__main__": 39 | main() 40 | -------------------------------------------------------------------------------- /experiments/classification/baseline_tfidf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from sklearn.metrics import classification_report 4 | sys.path.append(os.path.join(os.path.dirname(__file__), "../../")) 5 | from gcn.data.multi_nli_dataset import MultiNLIDataset 6 | from gcn.classification.baseline import TfidfClassifier 7 | 8 | 9 | def main(): 10 | root = os.path.join(os.path.dirname(__file__), "../../") 11 | dataset = MultiNLIDataset(root) 12 | classifier = TfidfClassifier() 13 | 14 | train_data = dataset.train_data() 15 | scores = classifier.fit(train_data["text"], train_data["label"]) 16 | 17 | test_data = dataset.test_data() 18 | y_pred = classifier.predict(test_data["text"]) 19 | 20 | print(classification_report(test_data["label"], y_pred, 21 | target_names=dataset.labels())) 22 | 23 | 24 | if __name__ == "__main__": 25 | main() 26 | -------------------------------------------------------------------------------- /experiments/classification/graph_based_experiment.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from sklearn.metrics import classification_report 4 | sys.path.append(os.path.join(os.path.dirname(__file__), "../../")) 5 | from gcn.data.multi_nli_dataset import MultiNLIDataset 6 | from gcn.classification.trainer import Trainer 7 | from gcn.graph.dependency_graph import DependencyGraph 8 | from gcn.graph.similarity_graph import SimilarityGraph 9 | from gcn.graph.static_graph import StaticGraph 10 | from gcn.classification.graph_based_classifier import GraphBasedClassifier 11 | 12 | 13 | def main(graph_type="dependency", epochs=25): 14 | root = os.path.join(os.path.dirname(__file__), "../../") 15 | dataset = MultiNLIDataset(root) 16 | 17 | if graph_type == "dependency": 18 | graph_builder = DependencyGraph(lang="en") 19 | elif graph_type == "similarity": 20 | graph_builder = SimilarityGraph(lang="en") 21 | else: 22 | graph_builder = StaticGraph(lang="en") 23 | 24 | trainer = Trainer(graph_builder, root, log_dir="classifier") 25 | trainer.build() 26 | 27 | sequence_length = 25 28 | vocab_size = len(trainer.preprocessor.vocabulary.get()) 29 | 30 | def preprocessor(x): 31 | _x = trainer.preprocess(x, sequence_length) 32 | values = (_x["text"], _x["graph"]) 33 | return values 34 | 35 | model = GraphBasedClassifier(vocab_size, sequence_length, 36 | lstm=None) 37 | model.build(trainer.num_classes, preprocessor) 38 | 39 | metrics = trainer.train(model.model, epochs=epochs) 40 | 41 | test_data = dataset.test_data() 42 | y_pred = model.predict(test_data["text"]) 43 | 44 | print(classification_report(test_data["label"], y_pred, 45 | target_names=dataset.labels())) 46 | 47 | 48 | if __name__ == "__main__": 49 | main() 50 | -------------------------------------------------------------------------------- /experiments/language_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/experiments/language_model/__init__.py -------------------------------------------------------------------------------- /experiments/language_model/baseline.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.path.join(os.path.dirname(__file__), "../../")) 4 | from gcn.language_model.trainer import Trainer 5 | from gcn.language_model.baseline import LSTMLM 6 | 7 | 8 | def main(): 9 | root = os.path.join(os.path.dirname(__file__), "../../") 10 | trainer = Trainer(root, log_dir="language_model_baseline") 11 | trainer.build() 12 | vocab_size = len(trainer.preprocessor.vocabulary.get()) 13 | print("vocab size: {}".format(vocab_size)) 14 | model = LSTMLM(vocab_size) 15 | trainer.train(model, epochs=10) 16 | 17 | 18 | if __name__ == "__main__": 19 | main() 20 | -------------------------------------------------------------------------------- /experiments/language_model/baseline_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.path.join(os.path.dirname(__file__), "../../")) 4 | from gcn.language_model.trainer import Trainer 5 | from gcn.language_model.baseline import LSTMLM 6 | 7 | 8 | def main(): 9 | root = os.path.join(os.path.dirname(__file__), "../../") 10 | trainer = Trainer(root, preprocessor_name="baseline_preprocessor_test") 11 | trainer.build(data_kind="valid") 12 | vocab_size = len(trainer.preprocessor.vocabulary.get()) 13 | print("vocab size: {}".format(vocab_size)) 14 | model = LSTMLM(vocab_size) 15 | trainer.train(model, data_kind="valid", epochs=10) 16 | 17 | 18 | if __name__ == "__main__": 19 | main() 20 | -------------------------------------------------------------------------------- /experiments/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/experiments/layers/__init__.py -------------------------------------------------------------------------------- /experiments/layers/gat_experiment.py: -------------------------------------------------------------------------------- 1 | from gat_experiment_base import run_experiment 2 | 3 | 4 | run_experiment(original=False, attention=True) 5 | -------------------------------------------------------------------------------- /experiments/layers/gat_experiment_base.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.path.join(os.path.dirname(__file__), "../../")) 4 | import numpy as np 5 | import scipy.sparse as sp 6 | import tensorflow as tf 7 | from tensorflow.python import keras as K 8 | from chariot.storage import Storage 9 | from gcn.data.graph_dataset import GraphDataset 10 | 11 | 12 | """ 13 | Evaluation script is ported from 14 | https://github.com/danielegrattarola/keras-gat/blob/master/examples/gat.py 15 | """ 16 | 17 | 18 | def preprocess_features(features): 19 | """Row-normalize feature matrix and convert to tuple representation""" 20 | rowsum = np.array(features.sum(1)) 21 | r_inv = np.power(rowsum, -1).flatten() 22 | r_inv[np.isinf(r_inv)] = 0. 23 | r_mat_inv = sp.diags(r_inv) 24 | features = r_mat_inv.dot(features) 25 | return features.todense() 26 | 27 | 28 | def run_experiment(original=True, attention=True): 29 | # Read data 30 | root = os.path.join(os.path.dirname(__file__), "../../") 31 | storage = Storage(root) 32 | gd = GraphDataset(root, kind="cora") 33 | data = gd.download(return_mask=original) 34 | A, X, Y_train, Y_val, Y_test, idx_train, idx_val, idx_test = data 35 | 36 | # Parameters 37 | N = X.shape[0] # Number of nodes in the graph 38 | F = X.shape[1] # Original feature dimension 39 | n_classes = Y_train.shape[1] # Number of classes 40 | F_ = 8 # Output size of first GraphAttention layer 41 | n_attn_heads = 8 # Number of attention heads in first GAT layer 42 | dropout_rate = 0.6 # Dropout rate (between and inside GAT layers) 43 | l2_reg = 5e-4/2 # Factor for l2 regularization 44 | learning_rate = 5e-3 # Learning rate for Adam 45 | epochs = 120 # Number of training epochs 46 | es_patience = 100 # Patience fot early stopping 47 | l2 = K.regularizers.l2 48 | node_size = 32 49 | 50 | # Preprocessing operations 51 | X = preprocess_features(X) 52 | A = A + np.eye(A.shape[0]) # Add self-loops 53 | 54 | # Model definition (as per Section 3.3 of the paper) 55 | if original: 56 | from gcn.layers.graph_attention_layer_original import GraphAttentionLayer 57 | X_in = K.layers.Input(shape=(F,)) 58 | A_in = K.layers.Input(shape=(N,)) 59 | else: 60 | from gcn.layers.graph_attention_layer import GraphAttentionLayer 61 | X_in = K.layers.Input(shape=(N, F)) 62 | A_in = K.layers.Input(shape=(N, N)) 63 | 64 | I_in = K.layers.Input(shape=(node_size,), dtype="int32") 65 | 66 | dropout1 = K.layers.Dropout(dropout_rate)(X_in) 67 | 68 | graph_attention_1 = GraphAttentionLayer( 69 | feature_units=F_, 70 | attn_heads=n_attn_heads, 71 | attn_heads_reduction="concat", 72 | dropout_rate=dropout_rate, 73 | activation="elu", 74 | kernel_regularizer=l2(l2_reg), 75 | attention=attention, 76 | attn_kernel_regularizer=l2(l2_reg))([dropout1, A_in]) 77 | 78 | dropout2 = K.layers.Dropout(dropout_rate)(graph_attention_1) 79 | graph_attention_2 = GraphAttentionLayer( 80 | n_classes, 81 | attn_heads=1, 82 | attn_heads_reduction="average", 83 | dropout_rate=dropout_rate, 84 | activation="softmax", 85 | kernel_regularizer=l2(l2_reg), 86 | attention=attention, 87 | attn_kernel_regularizer=l2(l2_reg))([dropout2, A_in]) 88 | 89 | # Build model 90 | optimizer = K.optimizers.Adam(lr=learning_rate) 91 | 92 | if original: 93 | model = K.models.Model(inputs=[X_in, A_in], outputs=graph_attention_2) 94 | model.compile(optimizer=optimizer, 95 | loss="categorical_crossentropy", 96 | weighted_metrics=["acc"]) 97 | else: 98 | output = K.layers.Lambda( 99 | lambda x: tf.reshape(tf.batch_gather(x, I_in), 100 | (-1, node_size, n_classes)))(graph_attention_2) 101 | model = K.models.Model(inputs=[X_in, A_in, I_in], outputs=output) 102 | model.compile(optimizer=optimizer, 103 | loss="categorical_crossentropy", 104 | metrics=["acc"]) 105 | 106 | model.summary() 107 | 108 | # Callbacks 109 | experiment_dir = "log/gan_experiment" 110 | monitor = "val_acc" 111 | if original: 112 | experiment_dir += "_o" 113 | monitor = "val_weighted_acc" 114 | if not attention: 115 | experiment_dir += "_na" 116 | 117 | experiment_dir = storage.data_path(experiment_dir) 118 | model_path = os.path.join(experiment_dir, "best_model.h5") 119 | es_callback = K.callbacks.EarlyStopping( 120 | monitor=monitor, patience=es_patience) 121 | tb_callback = K.callbacks.TensorBoard(log_dir=experiment_dir) 122 | mc_callback = K.callbacks.ModelCheckpoint( 123 | model_path, 124 | monitor=monitor, 125 | save_best_only=True, 126 | save_weights_only=True) 127 | 128 | def batch_generator(indices, label): 129 | if len(indices) != len(label): 130 | raise Exception("Does not match length") 131 | batch_size = len(indices) 132 | batch_size = batch_size // node_size 133 | 134 | def generator(): 135 | while True: 136 | for i in range(batch_size): 137 | _X = np.array([X]) 138 | _A = np.array([A]) 139 | samples = np.random.randint(len(indices), size=node_size) 140 | _i = np.array([indices[samples]]) 141 | _label = np.array([label[samples]]) 142 | yield [_X, _A, _i], _label 143 | return generator(), batch_size 144 | 145 | if original: 146 | validation_data = ([X, A], Y_val, idx_val) 147 | model.fit([X, A], 148 | Y_train, 149 | sample_weight=idx_train, 150 | epochs=epochs, 151 | batch_size=N, 152 | validation_data=validation_data, 153 | shuffle=False, # Shuffling data means shuffling the whole graph 154 | callbacks=[es_callback, tb_callback, mc_callback]) 155 | 156 | # Load best model 157 | model.load_weights(model_path) 158 | 159 | # Evaluate model 160 | eval_results = model.evaluate([X, A], 161 | Y_test, 162 | sample_weight=idx_test, 163 | batch_size=N, 164 | verbose=0) 165 | else: 166 | val_generator, val_steps = batch_generator(idx_val, Y_val) 167 | train_generator, train_steps = batch_generator(idx_train, Y_train) 168 | 169 | model.fit_generator( 170 | train_generator, train_steps, 171 | validation_data=val_generator, validation_steps=val_steps, 172 | epochs=epochs, 173 | callbacks=[es_callback, tb_callback, mc_callback]) 174 | 175 | # Load best model 176 | model.load_weights(model_path) 177 | 178 | # Evaluate model 179 | test_generator, test_steps = batch_generator(idx_test, Y_test) 180 | eval_results = model.evaluate_generator( 181 | test_generator, test_steps, 182 | verbose=0) 183 | 184 | print("Done.\n" 185 | "Test loss: {}\n" 186 | "Test accuracy: {}".format(*eval_results)) 187 | -------------------------------------------------------------------------------- /experiments/layers/gat_experiment_original.py: -------------------------------------------------------------------------------- 1 | from gat_experiment_base import run_experiment 2 | 3 | 4 | run_experiment(original=True, attention=True) 5 | -------------------------------------------------------------------------------- /experiments/layers/gat_experiment_original_without_attention.py: -------------------------------------------------------------------------------- 1 | from gat_experiment_base import run_experiment 2 | 3 | 4 | run_experiment(original=True, attention=False) 5 | -------------------------------------------------------------------------------- /experiments/layers/gat_experiment_without_attention.py: -------------------------------------------------------------------------------- 1 | from gat_experiment_base import run_experiment 2 | 3 | 4 | run_experiment(original=False, attention=False) 5 | -------------------------------------------------------------------------------- /gcn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/gcn/__init__.py -------------------------------------------------------------------------------- /gcn/base_trainer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from sklearn.externals import joblib 4 | from chariot.storage import Storage 5 | import chariot.transformer as ct 6 | from chariot.preprocessor import Preprocessor 7 | 8 | 9 | class BaseTrainer(): 10 | 11 | def __init__(self, root="", lang=None, min_df=5, max_df=sys.maxsize, 12 | unknown="", preprocessor_name="preprocessor", log_dir=""): 13 | default_root = os.path.join(os.path.dirname(__file__), "../../") 14 | _root = root if root else default_root 15 | 16 | self.storage = Storage(_root) 17 | self.preprocessor_name = preprocessor_name 18 | self._base_log_dir = log_dir 19 | self._built = False 20 | self.preprocessor = Preprocessor( 21 | text_transformers=[ 22 | ct.text.UnicodeNormalizer(), 23 | ct.text.LowerNormalizer() 24 | ], 25 | tokenizer=ct.Tokenizer(lang=lang), 26 | vocabulary=ct.Vocabulary( 27 | min_df=min_df, max_df=max_df, 28 | unknown=unknown)) 29 | 30 | def load_preprocessor(self): 31 | if os.path.exists(self.preprocessor_path): 32 | self._built = True 33 | self.preprocessor = joblib.load(self.preprocessor_path) 34 | 35 | @property 36 | def preprocessor_path(self): 37 | if self._base_log_dir: 38 | path = self._log_dir + "/{}.pkl".format(self.preprocessor_name) 39 | return self.storage.data_path(path) 40 | else: 41 | path = "interim/{}.pkl".format(self.preprocessor_name) 42 | return self.storage.data_path(path) 43 | 44 | @property 45 | def _log_dir(self): 46 | folder = "/" + self._base_log_dir if self._base_log_dir else "" 47 | log_dir = "log{}".format(folder) 48 | if not os.path.exists(self.storage.data_path(log_dir)): 49 | os.mkdir(self.storage.data_path(log_dir)) 50 | 51 | return log_dir 52 | 53 | @property 54 | def log_dir(self): 55 | return self.storage.data_path(self._log_dir) 56 | 57 | @property 58 | def model_path(self): 59 | return self.storage.data_path(self._log_dir + "/model.h5") 60 | 61 | @property 62 | def tensorboard_dir(self): 63 | return self.storage.data_path(self._log_dir) 64 | 65 | def download(self): 66 | raise Exception("You have to specify what kinds of data you use.") 67 | 68 | def build(self, data_kind="train", field="", save=True): 69 | if not self._built: 70 | self.load_preprocessor() 71 | if self._built: 72 | print("Load existing preprocessor {}.".format( 73 | os.path.basename(self.preprocessor_path))) 74 | return 0 75 | 76 | r = self.download() 77 | if data_kind == "test": 78 | data = r.test_data() 79 | elif data_kind == "valid": 80 | data = r.valid_data() 81 | else: 82 | data = r.train_data() 83 | 84 | print("Building Dictionary from {} data...".format(data_kind)) 85 | if not field: 86 | self.preprocessor.fit(data) 87 | else: 88 | self.preprocessor.fit(data[field]) 89 | 90 | if save: 91 | joblib.dump(self.preprocessor, self.preprocessor_path) 92 | self._built = True 93 | print("Done!") 94 | -------------------------------------------------------------------------------- /gcn/classification/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/gcn/classification/__init__.py -------------------------------------------------------------------------------- /gcn/classification/baseline.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.feature_extraction.text import TfidfVectorizer 3 | from sklearn.linear_model import LogisticRegression 4 | from sklearn.pipeline import Pipeline 5 | from sklearn.model_selection import cross_val_score 6 | from tensorflow.python import keras as K 7 | import tensorflow as tf 8 | from gcn.util import gpu_enable 9 | 10 | 11 | class TfidfClassifier(): 12 | 13 | def __init__(self, max_df=1.0, min_df=1, vocabulary=None): 14 | self.vectorizer = TfidfVectorizer(max_df=max_df, min_df=min_df, 15 | vocabulary=vocabulary) 16 | self.classifier = LogisticRegression(penalty="l1", solver="liblinear", 17 | multi_class="ovr") 18 | self.model = Pipeline([("vectorizer", self.vectorizer), 19 | ("classifier", self.classifier)]) 20 | 21 | def fit(self, x, y, cv=5): 22 | scores = cross_val_score(self.model, x, y, cv=cv, scoring="f1_micro") 23 | self.model.fit(x, y) 24 | return scores 25 | 26 | def predict(self, x): 27 | return self.model.predict(x) 28 | 29 | def predict_proba(self, x): 30 | return self.model.predict_proba(x) 31 | 32 | 33 | class MergeClassifier(): 34 | 35 | def __init__(self, vocab_size, embedding_size=100, 36 | merge_method="add"): 37 | self.vocab_size = vocab_size 38 | self.embedding_size = embedding_size 39 | self.merge_method = merge_method 40 | self.model = None 41 | 42 | def build(self, num_classes, preprocessor=None): 43 | self.preprocessor = preprocessor 44 | model = K.Sequential() 45 | embedding = K.layers.Embedding(input_dim=self.vocab_size, 46 | output_dim=self.embedding_size, 47 | embeddings_regularizer=K.regularizers.l2(), 48 | name="embedding", 49 | mask_zero=True) 50 | model.add(embedding) 51 | 52 | if self.merge_method == "mean": 53 | def mask_mean(x, mask): 54 | sum = K.backend.sum(x, axis=1) 55 | total = K.backend.sum(tf.to_float(mask), axis=1, keepdims=True) 56 | return tf.divide(sum, total) 57 | 58 | model.add(K.layers.Lambda(mask_mean)) 59 | else: 60 | model.add(K.layers.Lambda(lambda x: K.backend.sum(x, axis=1))) 61 | 62 | model.add(K.layers.Dense(num_classes, activation="softmax")) 63 | 64 | self.model = model 65 | 66 | def predict(self, x): 67 | preds = self.predict_proba(x) 68 | return np.argmax(preds, axis=1) 69 | 70 | def predict_proba(self, x): 71 | _x = x if self.preprocessor is None else self.preprocessor(x) 72 | return self.model.predict(_x) 73 | 74 | 75 | class LSTMClassifier(): 76 | 77 | def __init__(self, vocab_size, embedding_size=100, hidden_size=100, 78 | layers=1, dropout=0.5, bidirectional=False): 79 | 80 | self.vocab_size = vocab_size 81 | self.embedding_size = embedding_size 82 | self.hidden_size = hidden_size 83 | self.layers = layers 84 | self.dropout = dropout 85 | self.bidirectional = bidirectional 86 | self.model = None 87 | 88 | def build(self, num_classes, preprocessor=None): 89 | self.preprocessor = preprocessor 90 | model = K.Sequential() 91 | embedding = K.layers.Embedding(input_dim=self.vocab_size, 92 | output_dim=self.embedding_size, 93 | embeddings_regularizer=K.regularizers.l2(), 94 | name="embedding", 95 | mask_zero=True) 96 | model.add(embedding) 97 | model.add(K.layers.Dropout(self.dropout)) 98 | for layer in range(self.layers): 99 | lstm_layer = K.layers.CuDNNLSTM if gpu_enable() else K.layers.LSTM 100 | lstm = lstm_layer(self.hidden_size) 101 | if self.bidirectional: 102 | lstm = K.layers.Bidirectional(lstm, merge_mode="concat") 103 | model.add(lstm) 104 | 105 | model.add(K.layers.Dropout(self.dropout)) 106 | model.add(K.layers.Dense(num_classes, activation="softmax")) 107 | 108 | self.model = model 109 | 110 | def predict(self, x): 111 | preds = self.predict_proba(x) 112 | return np.argmax(preds, axis=1) 113 | 114 | def predict_proba(self, x): 115 | _x = x if self.preprocessor is None else self.preprocessor(x) 116 | return self.model.predict(_x) 117 | -------------------------------------------------------------------------------- /gcn/classification/baseline_trainer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from tensorflow.python import keras as K 4 | import chariot.transformer as ct 5 | from chariot.preprocess import Preprocess 6 | from chariot.feeder import Feeder 7 | from chariot.transformer.formatter import Padding 8 | from gcn.base_trainer import BaseTrainer 9 | from gcn.data.multi_nli_dataset import MultiNLIDataset 10 | 11 | 12 | class BaselineTrainer(BaseTrainer): 13 | 14 | def __init__(self, root="", lang=None, min_df=1, max_df=1.0, 15 | unknown="", preprocessor_name="preprocessor", 16 | log_dir=""): 17 | super().__init__(root, lang, min_df, max_df, unknown, 18 | preprocessor_name, log_dir) 19 | 20 | def download(self): 21 | r = MultiNLIDataset(self.storage.root).download() 22 | return r 23 | 24 | @property 25 | def num_classes(self): 26 | return len(MultiNLIDataset.labels()) 27 | 28 | def build(self, data_kind="train", save=True): 29 | super().build(data_kind, "text", save) 30 | if self.preprocessor.vocabulary.pad != 0: 31 | raise Exception("Padding is not executed by zero.") 32 | 33 | def train(self, model, data_kind="train", lr=1e-3, 34 | batch_size=20, sequence_length=25, 35 | representation="GloVe.6B.100d", 36 | epochs=40, verbose=2): 37 | 38 | if not self._built: 39 | raise Exception("Trainer's preprocessor is not built.") 40 | 41 | if representation is not None: 42 | print("Load word embedding...") 43 | self.storage.chakin(name=representation) 44 | file_path = "external/{}.txt".format(representation.lower()) 45 | weights = [self.preprocessor.vocabulary.make_embedding( 46 | self.storage.data_path(file_path))] 47 | model.get_layer("embedding").set_weights(weights) 48 | 49 | r = self.download() 50 | 51 | train_data = self.preprocess(r.train_data(), sequence_length) 52 | test_data = self.preprocess(r.test_data(), sequence_length) 53 | 54 | # Set optimizer 55 | model.compile(loss="sparse_categorical_crossentropy", 56 | optimizer=K.optimizers.Adam(lr=lr), 57 | metrics=["accuracy"]) 58 | 59 | metrics = model.fit(train_data["text"], train_data["label"], 60 | validation_data=(test_data["text"], test_data["label"]), 61 | batch_size=batch_size, 62 | epochs=epochs, verbose=verbose) 63 | 64 | return metrics 65 | 66 | def preprocess(self, data, length): 67 | _data = data 68 | if isinstance(data, (list, tuple)): 69 | _data = pd.Series(data, name="text").to_frame() 70 | elif isinstance(data, pd.Series): 71 | _data = data.to_frame() 72 | 73 | preprocess = Preprocess({ 74 | "text": self.preprocessor 75 | }) 76 | feeder = Feeder({"text": Padding.from_(self.preprocessor, 77 | length=length)}) 78 | 79 | _data = preprocess.transform(_data) 80 | _data = feeder.transform(_data) 81 | 82 | return _data 83 | -------------------------------------------------------------------------------- /gcn/classification/graph_based_classifier.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tensorflow.python import keras as K 3 | import tensorflow as tf 4 | from gcn.layers.graph_attention_layer import GraphAttentionLayer 5 | from gcn.util import gpu_enable 6 | 7 | 8 | class GraphBasedClassifier(): 9 | 10 | def __init__(self, vocab_size, graph_size, 11 | embedding_size=100, hidden_size=100, 12 | head_types=("concat",), heads=1, dropout=0.6, 13 | node_level_bias=False, with_attention=True, 14 | lstm=None, bidirectional=False): 15 | 16 | self.vocab_size = vocab_size 17 | self.graph_size = graph_size 18 | self.embedding_size = embedding_size 19 | self.hidden_size = hidden_size 20 | self.head_types = head_types 21 | self.heads = heads 22 | self.dropout = dropout 23 | self.node_level_bias = node_level_bias 24 | self.with_attention = with_attention 25 | self.lstm = lstm 26 | self.bidirectional = bidirectional 27 | self.model = None 28 | self._attention = None 29 | self.preprocessor = None 30 | 31 | def build(self, num_classes, preprocessor=None): 32 | X_in = K.layers.Input(shape=(self.graph_size,)) 33 | A_in = K.layers.Input(shape=(self.graph_size, self.graph_size)) 34 | self.preprocessor = preprocessor 35 | 36 | embedding = K.layers.Embedding(input_dim=self.vocab_size, 37 | output_dim=self.embedding_size, 38 | input_length=self.graph_size, 39 | embeddings_regularizer=K.regularizers.l2(), 40 | name="embedding", 41 | mask_zero=True) 42 | vectors = embedding(X_in) 43 | _vectors = K.layers.Dropout(self.dropout)(vectors) 44 | 45 | def lstm(return_sequences): 46 | # CuDNNLSTM does not support mask. 47 | # layer = K.layers.CuDNNLSTM if gpu_enable() else K.layers.LSTM 48 | layer = K.layers.LSTM 49 | _lstm = layer(self.hidden_size, return_sequences=return_sequences, 50 | dropout=self.dropout, recurrent_dropout=self.dropout) 51 | if self.bidirectional: 52 | _lstm = K.layers.Bidirectional(_lstm, merge_mode="concat") 53 | return _lstm 54 | 55 | if self.lstm is not None and self.lstm == "before": 56 | _vectors = lstm(return_sequences=True)(_vectors) 57 | 58 | attentions = [] 59 | for ht in self.head_types: 60 | gh = GraphAttentionLayer( 61 | feature_units=self.hidden_size, 62 | attn_heads=self.heads, 63 | attn_heads_reduction=ht, 64 | dropout_rate=self.dropout, 65 | kernel_regularizer=K.regularizers.l2(), 66 | attention=self.with_attention, 67 | attn_kernel_regularizer=K.regularizers.l2(), 68 | return_attention=True, 69 | node_level_bias=self.node_level_bias) 70 | _vectors, attention = gh([_vectors, A_in]) 71 | attentions.append(attention) 72 | 73 | if self.lstm is not None and self.lstm == "after": 74 | merged = lstm(return_sequences=False)(_vectors) 75 | else: 76 | merged = K.layers.Lambda(lambda x: K.backend.sum(x, axis=1))(_vectors) 77 | 78 | probs = K.layers.Dense(num_classes, activation="softmax")(merged) 79 | 80 | self.model = K.models.Model(inputs=[X_in, A_in], outputs=probs) 81 | self._attention = K.models.Model(inputs=[X_in, A_in], 82 | outputs=attentions) 83 | 84 | def predict(self, x): 85 | preds = self.predict_proba(x) 86 | return np.argmax(preds, axis=1) 87 | 88 | def predict_proba(self, x): 89 | _x = x if self.preprocessor is None else self.preprocessor(x) 90 | return self.model.predict(_x) 91 | 92 | def show_attention(self, x): 93 | _x = x if self.preprocessor is None else self.preprocessor(x) 94 | attentions = self._attention.predict(_x) 95 | if len(self.head_types) == 1: 96 | attentions = [attentions] 97 | 98 | # batch, layer, head, node_size, node_size 99 | attentions = np.array(attentions) 100 | attentions = np.transpose(attentions, (1, 0, 2, 3, 4)) 101 | return attentions 102 | -------------------------------------------------------------------------------- /gcn/classification/trainer.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from tensorflow.python import keras as K 3 | from chariot.preprocess import Preprocess 4 | from chariot.feeder import Feeder 5 | from chariot.transformer.formatter import Padding 6 | from gcn.base_trainer import BaseTrainer 7 | from gcn.data.multi_nli_dataset import MultiNLIDataset 8 | from gcn.graph.dependency_graph import DependencyGraph 9 | from gcn.graph.similarity_graph import SimilarityGraph 10 | from gcn.graph.static_graph import StaticGraph 11 | 12 | 13 | class Trainer(BaseTrainer): 14 | 15 | def __init__(self, graph_builder, root="", min_df=1, max_df=1.0, 16 | unknown="", preprocessor_name="preprocessor", 17 | log_dir=""): 18 | super().__init__(root, graph_builder.lang, min_df, max_df, unknown, 19 | preprocessor_name, log_dir) 20 | self.graph_builder = graph_builder 21 | 22 | def download(self): 23 | r = MultiNLIDataset(self.storage.root).download() 24 | return r 25 | 26 | @property 27 | def num_classes(self): 28 | return len(MultiNLIDataset.labels()) 29 | 30 | def build(self, data_kind="train", save=True): 31 | super().build(data_kind, "text", save) 32 | if self.preprocessor.vocabulary.pad != 0: 33 | raise Exception("Padding is not executed by zero.") 34 | 35 | def train(self, model, data_kind="train", 36 | lr=1e-3, batch_size=20, sequence_length=25, 37 | representation="GloVe.6B.100d", 38 | epochs=40, verbose=2): 39 | 40 | if not self._built: 41 | raise Exception("Trainer's preprocessor is not built.") 42 | 43 | if representation is not None: 44 | self.storage.chakin(name=representation) 45 | file_path = "external/{}.txt".format(representation.lower()) 46 | weights = [self.preprocessor.vocabulary.make_embedding( 47 | self.storage.data_path(file_path))] 48 | model.get_layer("embedding").set_weights(weights) 49 | 50 | r = self.download() 51 | 52 | train_data = self.preprocess(r.train_data(), sequence_length) 53 | test_data = self.preprocess(r.test_data(), sequence_length) 54 | 55 | # Set optimizer 56 | model.compile(loss="sparse_categorical_crossentropy", 57 | optimizer=K.optimizers.Adam(lr=lr), 58 | metrics=["accuracy"]) 59 | 60 | validation_data = ((test_data["text"], test_data["graph"]), test_data["label"]) 61 | metrics = model.fit((train_data["text"], train_data["graph"]), 62 | train_data["label"], 63 | validation_data=validation_data, 64 | batch_size=batch_size, 65 | epochs=epochs, verbose=verbose) 66 | 67 | return metrics 68 | 69 | def preprocess(self, data, length): 70 | _data = data 71 | if isinstance(data, (list, tuple)): 72 | _data = pd.Series(data, name="text").to_frame() 73 | elif isinstance(data, pd.Series): 74 | _data = data.to_frame() 75 | 76 | graph = self.graph_builder.batch_build(_data["text"], length) 77 | 78 | preprocess = Preprocess({ 79 | "text": self.preprocessor 80 | }) 81 | feeder = Feeder({"text": Padding.from_(self.preprocessor, 82 | length=length)}) 83 | 84 | _data = preprocess.transform(_data) 85 | _data = feeder.transform(_data) 86 | _data["graph"] = graph 87 | 88 | return _data 89 | -------------------------------------------------------------------------------- /gcn/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/gcn/data/__init__.py -------------------------------------------------------------------------------- /gcn/data/graph_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle as pkl 3 | import zipfile 4 | import requests 5 | import numpy as np 6 | import scipy.sparse as sp 7 | import networkx as nx 8 | from chariot.storage import Storage 9 | 10 | 11 | class GraphDataset(): 12 | 13 | def __init__(self, root, kind="cora"): 14 | self.storage = Storage(root) 15 | self.kind = kind 16 | self.download_url = "https://s3-ap-northeast-1.amazonaws.com/dev.tech-sketch.jp/chakki/public/graph/" # noqa 17 | if kind == "cora": 18 | self.download_url += "cora.zip" 19 | elif kind == "citeseer": 20 | self.download_url += "citeseer.zip" 21 | elif kind == "pubmed": 22 | self.download_url += "pubmed.zip" 23 | else: 24 | raise Exception("Graph dataset {} is not supported.".format(kind)) 25 | 26 | @property 27 | def data_root(self): 28 | return self.storage.data_path("raw/{}".format(self.kind)) 29 | 30 | @property 31 | def download_file_path(self): 32 | return self.storage.data_path("raw/{}.zip".format(self.kind)) 33 | 34 | def download(self, return_mask=True): 35 | # Check downloaded file 36 | if os.path.isdir(self.data_root): 37 | print("{} dataset is already downloaded.".format(self.kind)) 38 | return self.load(return_mask) 39 | 40 | # Download dataset 41 | resp = requests.get(self.download_url, stream=True) 42 | with open(self.download_file_path, "wb") as f: 43 | chunk_size = 1024 44 | for data in resp.iter_content(chunk_size=chunk_size): 45 | f.write(data) 46 | 47 | # Expand file 48 | with zipfile.ZipFile(self.download_file_path) as z: 49 | z.extractall(path=self.data_root) 50 | os.remove(self.download_file_path) 51 | 52 | return self.load(return_mask) 53 | 54 | def load(self, return_mask): 55 | """ 56 | Loads input data (reference from: https://github.com/tkipf/gcn/blob/master/gcn/utils.py) 57 | ind.dataset_str.x => the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object; 58 | ind.dataset_str.tx => the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object; 59 | ind.dataset_str.allx => the feature vectors of both labeled and unlabeled training instances 60 | (a superset of ind.dataset_str.x) as scipy.sparse.csr.csr_matrix object; 61 | ind.dataset_str.y => the one-hot labels of the labeled training instances as numpy.ndarray object; 62 | ind.dataset_str.ty => the one-hot labels of the test instances as numpy.ndarray object; 63 | ind.dataset_str.ally => the labels for instances in ind.dataset_str.allx as numpy.ndarray object; 64 | ind.dataset_str.graph => a dict in the format {index: [index_of_neighbor_nodes]} as collections.defaultdict 65 | object; 66 | ind.dataset_str.test.index => the indices of test instances in graph, for the inductive setting as list object. 67 | All objects above must be saved using python pickle module. 68 | :param dataset_str: Dataset name 69 | :return: All data input files loaded (as well the training/test data). 70 | """ 71 | 72 | names = ["x", "y", "tx", "ty", "allx", "ally", "graph", "test.index"] 73 | objects = [] 74 | for n in names: 75 | file_path = os.path.join(self.data_root, 76 | "ind.{}.{}".format(self.kind, n)) 77 | 78 | if n != "test.index": 79 | with open(file_path, "rb") as f: 80 | objects.append(pkl.load(f, encoding="latin1")) 81 | else: 82 | with open(file_path, encoding="latin1") as f: 83 | lines = f.readlines() 84 | indices = [int(ln.strip()) for ln in lines] 85 | objects.append(indices) 86 | 87 | x, y, tx, ty, allx, ally, graph, test_idx = tuple(objects) 88 | test_idx_range = np.sort(test_idx) 89 | 90 | if self.kind == "citeseer": 91 | # Fix citeseer dataset (there are some isolated nodes in the graph) 92 | # Find isolated nodes, add them as zero-vecs into the right position 93 | test_idx_range_full = range(min(test_idx), max(test_idx)+1) 94 | tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1])) 95 | tx_extended[test_idx_range-min(test_idx_range), :] = tx 96 | tx = tx_extended 97 | ty_extended = np.zeros((len(test_idx_range_full), y.shape[1])) 98 | ty_extended[test_idx_range-min(test_idx_range), :] = ty 99 | ty = ty_extended 100 | 101 | features = sp.vstack((allx, tx)).tolil() 102 | features[test_idx, :] = features[test_idx_range, :] 103 | adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph)) 104 | 105 | labels = np.vstack((ally, ty)) 106 | labels[test_idx, :] = labels[test_idx_range, :] 107 | 108 | idx_test = test_idx_range 109 | idx_train = np.array(range(len(y))) 110 | idx_val = np.array(range(len(y), len(y)+500)) 111 | 112 | if return_mask: 113 | train_mask = self.sample_mask(idx_train, labels.shape[0]) 114 | val_mask = self.sample_mask(idx_val, labels.shape[0]) 115 | test_mask = self.sample_mask(idx_test, labels.shape[0]) 116 | 117 | y_train = np.zeros(labels.shape) 118 | y_val = np.zeros(labels.shape) 119 | y_test = np.zeros(labels.shape) 120 | y_train[train_mask, :] = labels[train_mask, :] 121 | y_val[val_mask, :] = labels[val_mask, :] 122 | y_test[test_mask, :] = labels[test_mask, :] 123 | 124 | return adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask 125 | else: 126 | 127 | y_train = labels[idx_train, :] 128 | y_val = labels[idx_val, :] 129 | y_test = labels[idx_test, :] 130 | return adj, features, y_train, y_val, y_test, idx_train, idx_val, idx_test 131 | 132 | def sample_mask(self, idx, length): 133 | """Create mask.""" 134 | mask = np.zeros(length) 135 | mask[idx] = 1 136 | return np.array(mask, dtype=np.bool) 137 | -------------------------------------------------------------------------------- /gcn/data/multi_nli_dataset.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import spacy 3 | import chazutsu 4 | from chariot.storage import Storage 5 | 6 | 7 | class MultiNLIDataset(): 8 | 9 | def __init__(self, root, min_word_count=3, max_word_count=25, 10 | prefix=""): 11 | self.storage = Storage(root) 12 | self.nlp = spacy.load("en", parser=False, entity=False) 13 | self.min_word_count = min_word_count 14 | self.max_word_count = max_word_count 15 | self.prefix = prefix 16 | 17 | def train_data(self): 18 | return pd.read_csv(self.processed_file("train")) 19 | 20 | def test_data(self): 21 | return pd.read_csv(self.processed_file("test")) 22 | 23 | @classmethod 24 | def labels(self): 25 | return ["fiction", "government", "slate", "telephone", "travel", 26 | "nineeleven", "facetoface", "letters", "oup", "verbatim"] 27 | 28 | def download(self): 29 | download_dir = self.storage.data_path("raw") 30 | matched = chazutsu.datasets.MultiNLI.matched().download(download_dir) 31 | mismatched = chazutsu.datasets.MultiNLI.mismatched().download(download_dir) 32 | 33 | for kind in ["train", "test"]: 34 | data = self._merge_data(matched, mismatched, kind) 35 | data.to_csv(self.interim_file(kind)) 36 | preprocessed = self.preprocess(data) 37 | preprocessed = pd.concat([preprocessed["text"], 38 | preprocessed["label"]], axis=1) 39 | preprocessed.to_csv(self.processed_file(kind), index=False) 40 | return self 41 | 42 | def interim_file(self, kind): 43 | if self.prefix: 44 | p = "interim/{}_multi_nli_{}.csv".format(self.prefix, kind) 45 | else: 46 | p = "interim/multi_nli_{}.csv".format(kind) 47 | 48 | return self.storage.data_path(p) 49 | 50 | def processed_file(self, kind): 51 | if self.prefix: 52 | p = "processed/{}_multi_nli_{}.csv".format(self.prefix, kind) 53 | else: 54 | p = "processed/multi_nli_{}.csv".format(kind) 55 | 56 | return self.storage.data_path(p) 57 | 58 | def preprocess(self, df): 59 | # Drop duplicates 60 | except_d = df.drop_duplicates(["text"]) 61 | 62 | # Count words 63 | word_count = except_d["text"].apply(lambda x: len(self.nlp(x))) 64 | except_d = except_d.assign(word_count=pd.Series(word_count).values) 65 | 66 | limited = except_d[(self.min_word_count <= except_d["word_count"]) & 67 | (except_d["word_count"] <= self.max_word_count)] 68 | 69 | # Equalize data count 70 | min_count = limited["label"].value_counts().min() 71 | selected = limited.groupby("label").apply(lambda x: x.sample(n=min_count)) 72 | selected = selected.drop(columns=["label", "index"]).reset_index() 73 | 74 | # Convert label to index 75 | selected["label"] = selected["label"].apply( 76 | lambda x: self.labels().index(x)) 77 | 78 | return selected 79 | 80 | def _merge_data(self, matched, mismatched, kind="train"): 81 | dataset = [] 82 | for d in [matched, mismatched]: 83 | if kind == "train": 84 | _d = d.dev_data() 85 | else: 86 | _d = d.test_data() 87 | 88 | _d = pd.concat([_d["genre"], _d["sentence1"]], axis=1) 89 | dataset.append(_d) 90 | merged = pd.concat(dataset).reset_index() 91 | merged.rename(columns={"sentence1": "text", "genre": "label"}, 92 | inplace=True) 93 | return merged 94 | -------------------------------------------------------------------------------- /gcn/graph/__init__.py: -------------------------------------------------------------------------------- 1 | from .similarity_graph import SimilarityGraph 2 | from .dependency_graph import DependencyGraph 3 | from .static_graph import StaticGraph 4 | -------------------------------------------------------------------------------- /gcn/graph/dependency_graph.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import spacy 3 | 4 | 5 | class DependencyGraph(): 6 | 7 | def __init__(self, lang): 8 | self.lang = lang 9 | self._parser = spacy.load(self.lang, disable=["ner", "textcat"]) 10 | 11 | def get_nodes(self, sentence): 12 | return [t.text for t in self._parser(sentence)] 13 | 14 | def build(self, sentence, size=-1, return_label=False): 15 | tokens = self._parser(sentence) 16 | _size = size if size > 0 else len(tokens) 17 | matrix = np.zeros((_size, _size)) 18 | if return_label: 19 | matrix = [[""] * matrix.shape[1] for r in range(matrix.shape[0])] 20 | for token in tokens: 21 | # print("{} =({})=> {}".format(token.text, token.dep_, token.head.text)) 22 | if not token.dep_: 23 | raise Exception("Dependency Parse does not work well.") 24 | 25 | if token.i < _size and token.head.i < _size: 26 | v = token.dep_ if return_label else 1 27 | matrix[token.i][token.head.i] = v 28 | 29 | return matrix 30 | 31 | def batch_build(self, sentences, size=-1): 32 | matrices = [self.build(s, size) for s in sentences] 33 | return np.array(matrices) 34 | -------------------------------------------------------------------------------- /gcn/graph/similarity_graph.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from sklearn.metrics.pairwise import cosine_similarity 4 | import spacy 5 | from chariot.storage import Storage 6 | from chariot.resource.word_vector import WordVector 7 | 8 | 9 | class SimilarityGraph(): 10 | 11 | def __init__(self, lang, nearest_neighbor=4, threshold=0.3, 12 | mode="similarity", representation="GloVe.6B.100d", root=""): 13 | self.lang = lang 14 | self._parser = spacy.load(self.lang, disable=["ner", "textcat"]) 15 | self.nearest_neighbor = nearest_neighbor 16 | self.threshold = threshold 17 | self.mode = mode 18 | self.representation = representation 19 | default_root = os.path.join(os.path.dirname(__file__), "../../") 20 | _root = root if root else default_root 21 | 22 | self.storage = Storage(_root) 23 | self.key_vector = {} 24 | self._unknown = None 25 | 26 | def get_nodes(self, sentence): 27 | return [t.text for t in self._parser(sentence)] 28 | 29 | def build(self, sentence, size=-1): 30 | if 0 < size < self.nearest_neighbor: 31 | raise Exception("Matrix size is not enough for neighbors.") 32 | 33 | if len(self.key_vector) == 0: 34 | # download representation 35 | self.storage.chakin(name=self.representation) 36 | 37 | # Make embedding matrix 38 | file_path = "external/{}.txt".format(self.representation.lower()) 39 | wv = WordVector(self.storage.data_path(file_path)) 40 | self.key_vector = wv.load() 41 | 42 | for k in self.key_vector: 43 | self._unknown = np.zeros(len(self.key_vector[k])) 44 | break 45 | 46 | tokens = self._parser(sentence) 47 | vectors = [] 48 | for t in tokens: 49 | if t.text in self.key_vector: 50 | vectors.append(self.key_vector[t.text]) 51 | else: 52 | vectors.append(self._unknown) 53 | 54 | vectors = np.vstack(vectors) 55 | matrix = self._build(vectors, size) 56 | return matrix 57 | 58 | def _build(self, vectors, size=-1): 59 | _size = size if size > 0 else len(vectors) 60 | similarity = cosine_similarity(vectors[:_size]) 61 | similarity -= np.eye(similarity.shape[0]) # exclude similarity to self 62 | top_k = np.argsort(-similarity, axis=1)[:, :self.nearest_neighbor] 63 | 64 | matrix = np.zeros((_size, _size)) 65 | for i, top in enumerate(top_k): 66 | _top = np.array([t for t in top 67 | if np.abs(similarity[i, t]) >= self.threshold]) 68 | 69 | if len(_top) == 0: 70 | continue 71 | 72 | if self.mode == "connectivity": 73 | matrix[i, _top] = 1 74 | else: 75 | matrix[i, _top] = similarity[i, _top] 76 | 77 | return matrix 78 | 79 | def batch_build(self, sentences, size=-1): 80 | matrices = [self.build(s, size) for s in sentences] 81 | return np.array(matrices) 82 | -------------------------------------------------------------------------------- /gcn/graph/static_graph.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import spacy 3 | 4 | 5 | class StaticGraph(): 6 | 7 | def __init__(self, lang, kind="previous", fill=True): 8 | self.lang = lang 9 | self._parser = spacy.load(self.lang, disable=["ner", "textcat"]) 10 | self.kind = kind 11 | self.fill = fill 12 | 13 | def get_nodes(self, sentence): 14 | return [t.text for t in self._parser(sentence)] 15 | 16 | def build(self, sentence, size=-1): 17 | nodes = self.get_nodes(sentence) 18 | _size = size if size > 0 else len(nodes) 19 | if self.fill: 20 | func = lambda s, k=0: np.tril(np.ones((s, s)), k) 21 | else: 22 | func = np.eye 23 | 24 | if self.kind == "self": 25 | return func(_size) 26 | elif self.kind == "previous": 27 | return func(_size, k=-1) 28 | 29 | def batch_build(self, sentences, size=-1): 30 | matrices = [self.build(s, size) for s in sentences] 31 | return np.array(matrices) 32 | -------------------------------------------------------------------------------- /gcn/language_model/baseline.py: -------------------------------------------------------------------------------- 1 | from tensorflow.python import keras as K 2 | from gcn.layers import ProjectionLayer 3 | from gcn.util import gpu_enable 4 | 5 | 6 | def LSTMLM(vocab_size, embedding_size=100, hidden_size=100, 7 | layers=1, dropout=0.5): 8 | # Prepare initializer 9 | initializer = K.initializers.RandomUniform(minval=-0.1, maxval=0.1) 10 | 11 | # Build the model 12 | model = K.Sequential() 13 | embedding = K.layers.Embedding(input_dim=vocab_size, 14 | output_dim=embedding_size, 15 | embeddings_initializer=initializer) 16 | model.add(embedding) 17 | model.add(K.layers.Dropout(dropout)) 18 | rnn_layer = K.layers.CuDNNLSTM if gpu_enable() else K.layers.LSTM 19 | for layer in range(layers): 20 | model.add(rnn_layer(hidden_size, return_sequences=True)) 21 | model.add(K.layers.Dropout(dropout)) 22 | if hidden_size != embedding_size: 23 | model.add(K.layers.TimeDistributed( 24 | K.layers.Dense(embedding_size, 25 | kernel_initializer=initializer) 26 | )) 27 | # Tying encoder/decoder 28 | #model.add(K.layers.TimeDistributed(ProjectionLayer(embedding))) 29 | model.add(K.layers.TimeDistributed( 30 | K.layers.Dense(vocab_size, 31 | kernel_initializer=initializer, activation="softmax") 32 | )) 33 | 34 | #model.add(K.layers.Activation(activation="softmax")) 35 | 36 | return model 37 | -------------------------------------------------------------------------------- /gcn/language_model/similarity_graph_lm.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python import keras as K 3 | from gcn.layers import GraphAttentionLayer 4 | from gcn.metrics import perplexity 5 | 6 | 7 | def SimilarityGraphLM(vocab_size, sequence_length, 8 | embedding_size, dropout=0.7, num_graph_conv=2): 9 | 10 | words = K.layers.Input(shape=(sequence_length,)) 11 | matrix = K.layers.Input(shape=(sequence_length,)) 12 | 13 | embeddings = K.layers.Embedding(output_dim=embedding_size, 14 | input_dim=vocab_size, 15 | input_length=sequence_length)(words) 16 | 17 | # context feature 18 | context = K.layers.LSTM(embedding_size, dropout=dropout, 19 | return_sequences=True, return_state=True)(embeddings) 20 | 21 | # graph feature 22 | features = tf.transpose(embeddings, [1, 0, 2]) 23 | for layer in range(num_graph_conv): 24 | features = K.layers.TimeDistributed( 25 | GraphAttentionLayer( 26 | embedding_size, 27 | attn_heads_reduction="average"))([features, matrix]) 28 | return None 29 | 30 | features = K.backend.transpose(features) 31 | merged = K.layers.concatenate([context, features]) 32 | output = K.layers.Dense(vocab_size, activation="softmax")(merged) 33 | 34 | model = K.models.Model(inputs=[words, matrix], outputs=output) 35 | 36 | # Set optimizer 37 | model.compile(loss="sparse_categorical_crossentropy", 38 | optimizer="adam", 39 | metrics=["accuracy", perplexity]) 40 | 41 | return model 42 | -------------------------------------------------------------------------------- /gcn/language_model/trainer.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from tensorflow.python import keras as K 3 | import numpy as np 4 | import chazutsu 5 | import chariot.transformer as ct 6 | from chariot.feeder import LanguageModelFeeder 7 | from gcn.base_trainer import BaseTrainer 8 | from gcn.metrics import perplexity 9 | 10 | 11 | class Trainer(BaseTrainer): 12 | 13 | def __init__(self, root="", lang=None, min_df=5, max_df=sys.maxsize, 14 | unknown="", preprocessor_name="preprocessor", 15 | log_dir=""): 16 | super().__init__(root, lang, min_df, max_df, unknown, 17 | preprocessor_name, log_dir) 18 | 19 | def download(self): 20 | download_dir = self.storage.data_path("raw") 21 | r = chazutsu.datasets.WikiText2().download(download_dir) 22 | return r 23 | 24 | def train(self, model, data_kind="train", lr=1e-3, 25 | batch_size=20, sequence_length=35, epochs=40): 26 | if not self._built: 27 | raise Exception("Trainer's preprocessor is not built.") 28 | 29 | r = self.download() 30 | step_generators = {"train": {}, "valid": {}} 31 | 32 | # Set optimizer 33 | model.compile(loss="sparse_categorical_crossentropy", 34 | optimizer=K.optimizers.Adam(lr=lr), 35 | metrics=["accuracy", perplexity]) 36 | 37 | for k in step_generators: 38 | if k == "train": 39 | if data_kind == "train": 40 | data = r.train_data() 41 | else: 42 | data = r.valid_data() 43 | else: 44 | data = r.test_data() 45 | 46 | spec = {"sentence": ct.formatter.ShiftGenerator()} 47 | feeder = LanguageModelFeeder(spec) 48 | data = self.preprocessor.transform(data) 49 | step, generator = feeder.make_generator( 50 | data, batch_size=batch_size, 51 | sequence_length=sequence_length, 52 | sequencial=False) 53 | 54 | step_generators[k]["g"] = generator 55 | step_generators[k]["s"] = step 56 | 57 | callbacks = [K.callbacks.ModelCheckpoint(self.model_path, 58 | save_best_only=True), 59 | K.callbacks.TensorBoard(self.tensorboard_dir)] 60 | 61 | metrics = model.fit_generator( 62 | step_generators["train"]["g"](), 63 | step_generators["train"]["s"], 64 | validation_data=step_generators["valid"]["g"](), 65 | validation_steps=step_generators["valid"]["s"], 66 | epochs=epochs, 67 | callbacks=callbacks) 68 | 69 | return metrics 70 | 71 | def generate_text(self, model, seed_text, 72 | sequence_length=10, iteration=20): 73 | preprocessed = self.preprocessor.transform([seed_text])[0] 74 | 75 | def pad_sequence(tokens, length): 76 | if len(tokens) < length: 77 | pad_size = length - len(tokens) 78 | return tokens + [self.preprocessor.vocabulary.pad] * pad_size 79 | elif len(tokens) > length: 80 | return tokens[-length:] 81 | else: 82 | return tokens 83 | 84 | for _ in range(iteration): 85 | x = pad_sequence(preprocessed, sequence_length) 86 | y = model.predict([x]) 87 | index = min(len(preprocessed) - 1, sequence_length - 1) 88 | target_word_probs = y[index][0] 89 | w = np.random.choice(np.arange(len(target_word_probs)), 90 | 1, p=target_word_probs)[0] 91 | preprocessed.append(w) 92 | 93 | decoded = self.preprocessor.inverse_transform([preprocessed]) 94 | text = " ".join(decoded[0]) 95 | 96 | return text 97 | -------------------------------------------------------------------------------- /gcn/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .attention_layer import AttentionLayer 2 | from .graph_attention_layer import GraphAttentionLayer 3 | from .projection_layer import ProjectionLayer 4 | -------------------------------------------------------------------------------- /gcn/layers/attention_layer.py: -------------------------------------------------------------------------------- 1 | from tensorflow.python.keras import backend as K 2 | from tensorflow.python.keras.layers import Layer 3 | from tensorflow.python.keras import initializers, regularizers, constraints 4 | 5 | 6 | class AttentionLayer(Layer): 7 | """ 8 | import from Bidirectional LSTM and Attention 9 | https://www.kaggle.com/takuok/bidirectional-lstm-and-attention-lb-0-043 10 | """ 11 | 12 | def __init__(self, sequence_length, 13 | W_regularizer=None, b_regularizer=None, 14 | W_constraint=None, b_constraint=None, 15 | bias=True, return_attentions=False, **kwargs): 16 | self.sequence_length = sequence_length 17 | self.supports_masking = True 18 | self.return_attentions = return_attentions 19 | self.init = initializers.get("glorot_uniform") 20 | 21 | self.W_regularizer = regularizers.get(W_regularizer) 22 | self.b_regularizer = regularizers.get(b_regularizer) 23 | 24 | self.W_constraint = constraints.get(W_constraint) 25 | self.b_constraint = constraints.get(b_constraint) 26 | 27 | self.bias = bias 28 | self.embedding_dim = 0 29 | super(AttentionLayer, self).__init__(**kwargs) 30 | 31 | def build(self, input_shape): 32 | assert len(input_shape) == 3 33 | 34 | _input_shape = input_shape.as_list() 35 | self.embedding_dim = _input_shape[-1] 36 | self.W = self.add_weight(name="{}_W".format(self.name), 37 | shape=(self.embedding_dim,), 38 | initializer=self.init, 39 | regularizer=self.W_regularizer, 40 | constraint=self.W_constraint) 41 | 42 | if self.bias: 43 | self.b = self.add_weight(name="{}_b".format(self.name), 44 | shape=(_input_shape[1],), 45 | initializer="zero", 46 | regularizer=self.b_regularizer, 47 | constraint=self.b_constraint) 48 | else: 49 | self.b = None 50 | 51 | self.built = True 52 | 53 | def compute_mask(self, input, input_mask=None): 54 | return None 55 | 56 | def call(self, x, mask=None): 57 | embedding_dim = self.embedding_dim 58 | sequence_length = self.sequence_length 59 | 60 | eij = K.reshape(K.dot(K.reshape(x, (-1, embedding_dim)), 61 | K.reshape(self.W, (embedding_dim, 1))), 62 | (-1, sequence_length)) 63 | 64 | if self.bias: 65 | eij += self.b 66 | 67 | eij = K.tanh(eij) 68 | 69 | a = K.exp(eij) 70 | 71 | if mask is not None: 72 | a *= K.cast(mask, K.floatx()) 73 | 74 | a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) 75 | 76 | weighted_input = x * K.expand_dims(a) 77 | output = K.sum(weighted_input, axis=1) 78 | if self.return_attentions: 79 | return output, a 80 | else: 81 | return output 82 | 83 | def compute_output_shape(self, input_shape): 84 | return input_shape[0], self.embedding_dim 85 | -------------------------------------------------------------------------------- /gcn/layers/graph_attention_layer.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.keras import backend as K 3 | from tensorflow.python.keras.engine.base_layer import InputSpec 4 | from tensorflow.python.keras import initializers, regularizers, constraints 5 | from tensorflow.python.keras.layers import Dense, Dropout 6 | 7 | 8 | class GraphAttentionLayer(Dense): 9 | """ 10 | import from danielegrattarola/keras-gat 11 | https://github.com/danielegrattarola/keras-gat/blob/master/keras_gat/graph_attention_layer.py 12 | """ 13 | 14 | def __init__(self, 15 | feature_units, 16 | attn_heads=1, 17 | attn_heads_reduction="concat", # {"concat", "average"} 18 | dropout_rate=0.5, 19 | activation="relu", 20 | attn_kernel_initializer="glorot_uniform", 21 | attn_kernel_regularizer=None, 22 | attn_kernel_constraint=None, 23 | attention=True, 24 | return_attention=False, 25 | node_level_bias=False, 26 | **kwargs): 27 | 28 | if attn_heads_reduction not in {"concat", "average"}: 29 | raise ValueError("Possbile reduction methods: concat, average") 30 | 31 | super().__init__(units=feature_units, 32 | activation=activation, 33 | **kwargs) 34 | 35 | # Number of attention heads (K in the paper) 36 | self.attn_heads = attn_heads 37 | # Eq. 5 and 6 in the paper 38 | self.attn_heads_reduction = attn_heads_reduction 39 | # Internal dropout rate 40 | self.dropout_rate = dropout_rate 41 | 42 | self.attn_kernel_initializer \ 43 | = initializers.get(attn_kernel_initializer) 44 | self.attn_kernel_regularizer \ 45 | = regularizers.get(attn_kernel_regularizer) 46 | self.attn_kernel_constraint = constraints.get(attn_kernel_constraint) 47 | self.attention = attention 48 | self.return_attention = return_attention 49 | self.node_level_bias = node_level_bias 50 | self.input_spec = [InputSpec(ndim=3), InputSpec(ndim=3)] 51 | self.supports_masking = True 52 | # Populated by build() 53 | self.kernels = [] 54 | self.biases = [] 55 | self.neighbor_kernels = [] 56 | self.attn_kernels = [] 57 | self.attention_biases = [] 58 | 59 | if attn_heads_reduction == "concat": 60 | # Output will have shape (..., K * F") 61 | self.output_dim = self.units * self.attn_heads 62 | else: 63 | # Output will have shape (..., F") 64 | self.output_dim = self.units 65 | 66 | def build(self, input_shape): 67 | X_dims, A_dims = [dims.as_list() for dims in input_shape] 68 | assert len(X_dims) == 3 69 | assert len(A_dims) == 3 and A_dims[1] == A_dims[2] 70 | 71 | _, N, F = X_dims 72 | 73 | # Initialize weights for each attention head 74 | for head in range(self.attn_heads): 75 | # Layer kernel 76 | kernel = self.add_weight(shape=(F, self.units), 77 | initializer=self.kernel_initializer, 78 | regularizer=self.kernel_regularizer, 79 | constraint=self.kernel_constraint, 80 | name="kernel_{}".format(head)) 81 | self.kernels.append(kernel) 82 | 83 | # Layer bias 84 | if self.use_bias: 85 | bias = self.add_weight(shape=(self.units,), 86 | initializer=self.bias_initializer, 87 | regularizer=self.bias_regularizer, 88 | constraint=self.bias_constraint, 89 | name="bias_{}".format(head)) 90 | self.biases.append(bias) 91 | 92 | if not self.attention: 93 | continue 94 | 95 | # Attention kernels 96 | neighbor_kernel = self.add_weight( 97 | shape=(F, self.units), 98 | initializer=self.kernel_initializer, 99 | regularizer=self.kernel_regularizer, 100 | constraint=self.kernel_constraint, 101 | name="kernel_neighbor_{}".format(head)) 102 | 103 | attn_kernel = self.add_weight( 104 | shape=(self.units, 1), 105 | initializer=self.attn_kernel_initializer, 106 | regularizer=self.attn_kernel_regularizer, 107 | constraint=self.attn_kernel_constraint, 108 | name="attn_kernel_{}".format(head)) 109 | 110 | self.neighbor_kernels.append(neighbor_kernel) 111 | self.attn_kernels.append(attn_kernel) 112 | 113 | if self.use_bias: 114 | if self.node_level_bias: 115 | biases = self.add_weight(shape=(N, N), 116 | initializer=self.bias_initializer, 117 | regularizer=self.bias_regularizer, 118 | constraint=self.bias_constraint, 119 | name="attention_bias") 120 | else: 121 | biases = [] 122 | for kind in ["self", "neigbor"]: 123 | name = "bias_attn_{}_{}".format(kind, head) 124 | bias = self.add_weight(shape=(N,), 125 | initializer=self.bias_initializer, 126 | regularizer=self.bias_regularizer, 127 | constraint=self.bias_constraint, 128 | name=name) 129 | biases.append(bias) 130 | self.attention_biases.append(biases) 131 | 132 | self.built = True 133 | 134 | def call(self, inputs): 135 | X = inputs[0] # Node features (B x N x F) 136 | A = inputs[1] # Adjacency matrix (B x N x N) 137 | 138 | X_dims = X.get_shape().as_list() 139 | B, N, F = X_dims 140 | 141 | outputs = [] 142 | attentions = [] 143 | for head in range(self.attn_heads): 144 | # W in the paper (F x F") 145 | kernel = self.kernels[head] 146 | 147 | # Compute inputs to attention network 148 | features = K.dot(X, kernel) # (B x N x F") 149 | dropout_feat = Dropout(self.dropout_rate)(features) # (B x N x F") 150 | 151 | if not self.attention: 152 | attention = A 153 | aggregation = tf.matmul(attention, dropout_feat) # (N x F") 154 | else: 155 | # Attention kernel a in the paper (2F" x 1) 156 | neighbor_kernel = self.neighbor_kernels[head] 157 | attn_kernel = self.attn_kernels[head] 158 | 159 | neighbor_features = K.dot(X, neighbor_kernel) 160 | 161 | attn_self = K.dot(features, attn_kernel) 162 | attn_neighbor = K.dot(neighbor_features, attn_kernel) 163 | 164 | if self.use_bias and not self.node_level_bias: 165 | self_attn_bias, neigbor_attn_bias = self.attention_biases[head] 166 | attn_self = K.bias_add(attn_self, self_attn_bias) 167 | attn_neighbor = K.bias_add(attn_neighbor, neigbor_attn_bias) 168 | 169 | attention = attn_neighbor + tf.transpose(attn_self, (0, 2, 1)) 170 | attention = tf.nn.tanh(attention) 171 | attention = K.reshape(attention, (-1, N, N)) 172 | if self.use_bias and self.node_level_bias: 173 | bias = self.attention_biases[head] 174 | attention = K.bias_add(attention, bias) 175 | 176 | has_connection = tf.to_float(tf.greater(A, 0.0)) 177 | 178 | mask = -10e9 * (1.0 - has_connection) 179 | attention += mask 180 | 181 | attention = tf.nn.softmax(attention) * has_connection 182 | 183 | dropout_attn = Dropout(self.dropout_rate)(attention) 184 | aggregation = tf.matmul(dropout_attn, dropout_feat) 185 | 186 | node_features = dropout_feat + aggregation 187 | if self.use_bias: 188 | node_features = K.bias_add(node_features, self.biases[head]) 189 | 190 | # Add output of attention 191 | if self.return_attention: 192 | attentions.append(attention) 193 | 194 | outputs.append(node_features) 195 | 196 | # Aggregate the heads" output according to the reduction method 197 | if self.attn_heads_reduction == "concat": 198 | output = K.concatenate(outputs, axis=-1) # (B x N x KF") 199 | else: 200 | output = K.mean(K.stack(outputs), axis=0) # (B x N x F") 201 | # If "average", compute the activation here (Eq. 6) 202 | 203 | output = self.activation(output) 204 | 205 | if self.return_attention: 206 | attentions = K.stack(attentions, axis=1) 207 | return (output, attentions) 208 | else: 209 | return output 210 | 211 | def compute_output_shape(self, input_shape): 212 | X_dims, A_dims = [dims.as_list() for dims in input_shape] 213 | assert len(X_dims) == 3 214 | assert len(A_dims) == 3 215 | output_shape = X_dims[0], X_dims[1], self.output_dim 216 | 217 | if self.return_attention: 218 | return (tf.TensorShape(output_shape), 219 | tf.TensorShape(A_dims.insert(1, self.attn_heads))) 220 | else: 221 | return tf.TensorShape(output_shape) 222 | 223 | def compute_mask(self, inputs, mask): 224 | if isinstance(mask, list): 225 | output_mask = mask[0] 226 | else: 227 | output_mask = mask 228 | 229 | if self.return_attention: 230 | return [output_mask] + [None] 231 | else: 232 | return output_mask 233 | -------------------------------------------------------------------------------- /gcn/layers/graph_attention_layer_before.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.keras import backend as K 3 | from tensorflow.python.keras.engine.base_layer import InputSpec 4 | from tensorflow.python.keras import initializers, regularizers, constraints 5 | from tensorflow.python.keras.layers import Dense, Dropout, LeakyReLU 6 | 7 | 8 | class GraphAttentionLayer(Dense): 9 | """ 10 | import from danielegrattarola/keras-gat 11 | https://github.com/danielegrattarola/keras-gat/blob/master/keras_gat/graph_attention_layer.py 12 | """ 13 | 14 | def __init__(self, 15 | feature_units, 16 | attn_heads=1, 17 | attn_heads_reduction="concat", # {"concat", "average"} 18 | dropout_rate=0.5, 19 | activation="relu", 20 | attn_kernel_initializer="glorot_uniform", 21 | attn_kernel_regularizer=None, 22 | attn_kernel_constraint=None, 23 | attention=True, 24 | return_attention=False, 25 | **kwargs): 26 | 27 | if attn_heads_reduction not in {"concat", "average"}: 28 | raise ValueError("Possbile reduction methods: concat, average") 29 | 30 | super(GraphAttentionLayer, self).__init__(units=feature_units, 31 | activation=activation, 32 | **kwargs) 33 | 34 | # Number of attention heads (K in the paper) 35 | self.attn_heads = attn_heads 36 | # Eq. 5 and 6 in the paper 37 | self.attn_heads_reduction = attn_heads_reduction 38 | # Internal dropout rate 39 | self.dropout_rate = dropout_rate 40 | 41 | self.attn_kernel_initializer \ 42 | = initializers.get(attn_kernel_initializer) 43 | self.attn_kernel_regularizer \ 44 | = regularizers.get(attn_kernel_regularizer) 45 | self.attn_kernel_constraint = constraints.get(attn_kernel_constraint) 46 | self.attention = attention 47 | self.return_attention = return_attention 48 | self.input_spec = [InputSpec(ndim=3), InputSpec(ndim=3)] 49 | self.supports_masking = False 50 | 51 | # Populated by build() 52 | self.kernels = [] # Layer kernels for attention heads 53 | self.biases = [] # Layer biases for attention heads 54 | self.attn_kernels = [] # Attention kernels for attention heads 55 | 56 | if attn_heads_reduction == "concat": 57 | # Output will have shape (..., K * F") 58 | self.output_dim = self.units * self.attn_heads 59 | else: 60 | # Output will have shape (..., F") 61 | self.output_dim = self.units 62 | 63 | def build(self, input_shape): 64 | X_dims, A_dims = [dims.as_list() for dims in input_shape] 65 | assert len(X_dims) == 3 66 | assert len(A_dims) == 3 and A_dims[1] == A_dims[2] 67 | 68 | F = X_dims[-1] 69 | 70 | # Initialize weights for each attention head 71 | for head in range(self.attn_heads): 72 | # Layer kernel 73 | kernel = self.add_weight(shape=(F, self.units), 74 | initializer=self.kernel_initializer, 75 | regularizer=self.kernel_regularizer, 76 | constraint=self.kernel_constraint, 77 | name="kernel_{}".format(head)) 78 | self.kernels.append(kernel) 79 | 80 | # Layer bias 81 | if self.use_bias: 82 | bias = self.add_weight(shape=(self.units, ), 83 | initializer=self.bias_initializer, 84 | regularizer=self.bias_regularizer, 85 | constraint=self.bias_constraint, 86 | name="bias_{}".format(head)) 87 | self.biases.append(bias) 88 | 89 | if not self.attention: 90 | continue 91 | 92 | # Attention kernels 93 | attn_kernel_self = self.add_weight( 94 | shape=(self.units, 1), 95 | initializer=self.attn_kernel_initializer, 96 | regularizer=self.attn_kernel_regularizer, 97 | constraint=self.attn_kernel_constraint, 98 | name="attn_kernel_self_{}".format(head),) 99 | attn_kernel_neighs = self.add_weight( 100 | shape=(self.units, 1), 101 | initializer=self.attn_kernel_initializer, 102 | regularizer=self.attn_kernel_regularizer, 103 | constraint=self.attn_kernel_constraint, 104 | name="attn_kernel_neigh_{}".format(head)) 105 | 106 | self.attn_kernels.append([attn_kernel_self, attn_kernel_neighs]) 107 | 108 | self.built = True 109 | 110 | def call(self, inputs): 111 | X = inputs[0] # Node features (B x N x F) 112 | A = inputs[1] # Adjacency matrix (B x N x N) 113 | 114 | outputs = [] 115 | attentions = [] 116 | for head in range(self.attn_heads): 117 | # W in the paper (F x F") 118 | kernel = self.kernels[head] 119 | 120 | # Compute inputs to attention network 121 | features = K.dot(X, kernel) # (B x N x F") 122 | dropout_feat = Dropout(self.dropout_rate)(features) # (B x N x F") 123 | 124 | if not self.attention: 125 | attention = A 126 | node_features = tf.matmul(attention, dropout_feat) # (N x F") 127 | else: 128 | # Attention kernel a in the paper (2F" x 1) 129 | attention_kernel = self.attn_kernels[head] 130 | 131 | # Compute feature combinations 132 | # Note: [[a_1], [a_2]]^T [[Wh_i], [Wh_2]] 133 | # = [a_1]^T [Wh_i] + [a_2]^T [Wh_j] 134 | # Both (B x N x 1) 135 | attn_for_self = K.dot(features, attention_kernel[0]) 136 | attn_for_neighs = K.dot(features, attention_kernel[1]) 137 | 138 | # Attention head a(Wh_i, Wh_j) = a^T [[Wh_i], [Wh_j]] 139 | # attention becomes (B x N x N) via broadcasting 140 | attention = attn_for_self + tf.transpose(attn_for_neighs, 141 | (0, 2, 1)) 142 | 143 | # Add nonlinearty (alpha=0.2 is tensorflow default) 144 | attention = LeakyReLU(alpha=0.2)(attention) 145 | 146 | # Mask values before activation (Vaswani et al., 2017) 147 | mask = -10e9 * (1.0 - A) 148 | attention += mask 149 | 150 | # Apply softmax to get attention coefficients 151 | attention = K.softmax(attention) # (B x N x N) 152 | 153 | # Apply dropout to features and attention coefficients 154 | dropout_attn = Dropout(self.dropout_rate)(attention) # (B x N x N) 155 | 156 | # Linear combination with neighbors" features 157 | # (B x N x F") 158 | node_features = tf.matmul(dropout_attn, dropout_feat) # (N x F") 159 | 160 | if self.use_bias: 161 | node_features = K.bias_add(node_features, self.biases[head]) 162 | 163 | if self.attn_heads_reduction == "concat": 164 | # If "concat", compute the activation here (Eq. 5) 165 | node_features = self.activation(node_features) 166 | 167 | if self.return_attention: 168 | attentions.append(attention) 169 | # Add output of attention head to final output 170 | outputs.append(node_features) 171 | 172 | # Aggregate the heads" output according to the reduction method 173 | if self.attn_heads_reduction == "concat": 174 | output = K.concatenate(outputs, axis=-1) # (B x N x KF") 175 | else: 176 | output = K.mean(K.stack(outputs), axis=0) # (B x N x F") 177 | # If "average", compute the activation here (Eq. 6) 178 | 179 | output = self.activation(output) 180 | 181 | if self.return_attention: 182 | attentions = K.stack(attentions, axis=1) 183 | return (output, attentions) 184 | else: 185 | return output 186 | 187 | def compute_output_shape(self, input_shape): 188 | X_dims, A_dims = [dims.as_list() for dims in input_shape] 189 | assert len(X_dims) == 3 190 | assert len(A_dims) == 3 191 | output_shape = X_dims[0], X_dims[1], self.output_dim 192 | 193 | if self.return_attention: 194 | return (tf.TensorShape(output_shape), 195 | tf.TensorShape(A_dims.insert(1, self.attn_heads))) 196 | else: 197 | return tf.TensorShape(output_shape) 198 | -------------------------------------------------------------------------------- /gcn/layers/graph_attention_layer_multi.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.keras import backend as K 3 | from tensorflow.python.keras.engine.base_layer import InputSpec 4 | from tensorflow.python.keras import initializers, regularizers, constraints 5 | from tensorflow.python.keras.layers import Dense, Dropout 6 | 7 | 8 | class GraphAttentionLayer(Dense): 9 | """ 10 | import from danielegrattarola/keras-gat 11 | https://github.com/danielegrattarola/keras-gat/blob/master/keras_gat/graph_attention_layer.py 12 | """ 13 | 14 | def __init__(self, 15 | feature_units, 16 | attn_heads=1, 17 | attn_heads_reduction="concat", # {"concat", "average"} 18 | dropout_rate=0.5, 19 | activation="relu", 20 | attn_kernel_initializer="glorot_uniform", 21 | attn_kernel_regularizer=None, 22 | attn_kernel_constraint=None, 23 | attention=True, 24 | return_attention=False, 25 | **kwargs): 26 | 27 | if attn_heads_reduction not in {"concat", "average"}: 28 | raise ValueError("Possbile reduction methods: concat, average") 29 | 30 | super(GraphAttentionLayer, self).__init__(units=feature_units, 31 | activation=activation, 32 | **kwargs) 33 | 34 | # Number of attention heads (K in the paper) 35 | self.attn_heads = attn_heads 36 | # Eq. 5 and 6 in the paper 37 | self.attn_heads_reduction = attn_heads_reduction 38 | # Internal dropout rate 39 | self.dropout_rate = dropout_rate 40 | 41 | self.attn_kernel_initializer \ 42 | = initializers.get(attn_kernel_initializer) 43 | self.attn_kernel_regularizer \ 44 | = regularizers.get(attn_kernel_regularizer) 45 | self.attn_kernel_constraint = constraints.get(attn_kernel_constraint) 46 | self.attention = attention 47 | self.return_attention = return_attention 48 | self.input_spec = [InputSpec(ndim=3), InputSpec(ndim=3)] 49 | self.supports_masking = False 50 | 51 | # Populated by build() 52 | self.kernels = [] 53 | self.biases = [] 54 | self.neighbor_kernels = [] 55 | self.attn_kernels = [] 56 | self.attention_biases = [] 57 | 58 | if attn_heads_reduction == "concat": 59 | # Output will have shape (..., K * F") 60 | self.output_dim = self.units * self.attn_heads 61 | else: 62 | # Output will have shape (..., F") 63 | self.output_dim = self.units 64 | 65 | def build(self, input_shape): 66 | X_dims, A_dims = [dims.as_list() for dims in input_shape] 67 | assert len(X_dims) == 3 68 | assert len(A_dims) == 3 and A_dims[1] == A_dims[2] 69 | 70 | _, N, F = X_dims 71 | 72 | # Initialize weights for each attention head 73 | for head in range(self.attn_heads): 74 | # Layer kernel 75 | kernel = self.add_weight(shape=(F, self.units), 76 | initializer=self.kernel_initializer, 77 | regularizer=self.kernel_regularizer, 78 | constraint=self.kernel_constraint, 79 | name="kernel_{}".format(head)) 80 | self.kernels.append(kernel) 81 | 82 | # Layer bias 83 | if self.use_bias: 84 | bias = self.add_weight(shape=(self.units,), 85 | initializer=self.bias_initializer, 86 | regularizer=self.bias_regularizer, 87 | constraint=self.bias_constraint, 88 | name="bias_{}".format(head)) 89 | self.biases.append(bias) 90 | 91 | if not self.attention: 92 | continue 93 | 94 | # Attention kernels 95 | neighbor_kernel = self.add_weight( 96 | shape=(F, self.units), 97 | initializer=self.kernel_initializer, 98 | regularizer=self.kernel_regularizer, 99 | constraint=self.kernel_constraint, 100 | name="kernel_neighbor_{}".format(head)) 101 | 102 | attn_kernel = self.add_weight( 103 | shape=(self.units, self.units), 104 | initializer=self.attn_kernel_initializer, 105 | regularizer=self.attn_kernel_regularizer, 106 | constraint=self.attn_kernel_constraint, 107 | name="attn_kernel_{}".format(head)) 108 | 109 | self.neighbor_kernels.append(neighbor_kernel) 110 | self.attn_kernels.append(attn_kernel) 111 | 112 | self.built = True 113 | 114 | def call(self, inputs): 115 | X = inputs[0] # Node features (B x N x F) 116 | A = inputs[1] # Adjacency matrix (B x N x N) 117 | 118 | X_dims = X.get_shape().as_list() 119 | B, N, F = X_dims 120 | 121 | outputs = [] 122 | attentions = [] 123 | for head in range(self.attn_heads): 124 | # W in the paper (F x F") 125 | kernel = self.kernels[head] 126 | 127 | # Compute inputs to attention network 128 | features = K.dot(X, kernel) # (B x N x F") 129 | dropout_feat = Dropout(self.dropout_rate)(features) # (B x N x F") 130 | 131 | neighbor_kernel = self.neighbor_kernels[head] 132 | attn_kernel = self.attn_kernels[head] 133 | 134 | neighbor_features = K.dot(X, neighbor_kernel) 135 | dropout_neighbor = Dropout(self.dropout_rate)(neighbor_features) 136 | 137 | merged = tf.matmul(K.dot(dropout_feat, attn_kernel), 138 | tf.transpose(dropout_neighbor, (0, 2, 1))) 139 | 140 | attention = tf.nn.tanh(merged) 141 | attention = K.reshape(attention, (-1, N, N)) 142 | 143 | mask = -10e9 * (1.0 - A) 144 | attention += mask 145 | 146 | attention = tf.nn.softmax(attention) 147 | dropout_attn = Dropout(self.dropout_rate)(attention) 148 | 149 | node_features = tf.matmul(dropout_attn, dropout_feat) 150 | 151 | if self.use_bias: 152 | node_features = K.bias_add(node_features, self.biases[head]) 153 | 154 | if self.return_attention: 155 | attentions.append(attention) 156 | # Add output of attention head to final output 157 | outputs.append(node_features) 158 | 159 | # Aggregate the heads" output according to the reduction method 160 | if self.attn_heads_reduction == "concat": 161 | output = K.concatenate(outputs, axis=-1) # (B x N x KF") 162 | else: 163 | output = K.mean(K.stack(outputs), axis=0) # (B x N x F") 164 | # If "average", compute the activation here (Eq. 6) 165 | 166 | output = self.activation(output) 167 | 168 | if self.return_attention: 169 | attentions = K.stack(attentions, axis=1) 170 | return (output, attentions) 171 | else: 172 | return output 173 | 174 | def compute_output_shape(self, input_shape): 175 | X_dims, A_dims = [dims.as_list() for dims in input_shape] 176 | assert len(X_dims) == 3 177 | assert len(A_dims) == 3 178 | output_shape = X_dims[0], X_dims[1], self.output_dim 179 | 180 | if self.return_attention: 181 | return (tf.TensorShape(output_shape), 182 | tf.TensorShape(A_dims.insert(1, self.attn_heads))) 183 | else: 184 | return tf.TensorShape(output_shape) 185 | -------------------------------------------------------------------------------- /gcn/layers/graph_attention_layer_original.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.keras import backend as K 3 | from tensorflow.python.keras import activations, constraints, initializers, regularizers 4 | from tensorflow.python.keras.layers import Layer, Dropout, LeakyReLU 5 | 6 | 7 | class GraphAttentionLayer(Layer): 8 | """ 9 | import from danielegrattarola/keras-gat 10 | https://github.com/danielegrattarola/keras-gat/blob/master/keras_gat/graph_attention_layer.py 11 | """ 12 | 13 | def __init__(self, 14 | feature_units, 15 | attn_heads=1, 16 | attn_heads_reduction="concat", # {"concat", "average"} 17 | dropout_rate=0.5, 18 | activation="relu", 19 | use_bias=True, 20 | kernel_initializer="glorot_uniform", 21 | bias_initializer="zeros", 22 | attn_kernel_initializer="glorot_uniform", 23 | kernel_regularizer=None, 24 | bias_regularizer=None, 25 | attn_kernel_regularizer=None, 26 | activity_regularizer=None, 27 | kernel_constraint=None, 28 | bias_constraint=None, 29 | attn_kernel_constraint=None, 30 | attention=True, 31 | **kwargs): 32 | 33 | if attn_heads_reduction not in {"concat", "average"}: 34 | raise ValueError("Possbile reduction methods: concat, average") 35 | 36 | self.F_ = feature_units # Number of output features (F" in the paper) 37 | self.attn_heads = attn_heads # Number of attention heads (K in the paper) 38 | self.attn_heads_reduction = attn_heads_reduction # Eq. 5 and 6 in the paper 39 | self.dropout_rate = dropout_rate # Internal dropout rate 40 | self.activation = activations.get(activation) # Eq. 4 in the paper 41 | self.use_bias = use_bias 42 | 43 | self.kernel_initializer = initializers.get(kernel_initializer) 44 | self.bias_initializer = initializers.get(bias_initializer) 45 | self.attn_kernel_initializer = initializers.get(attn_kernel_initializer) 46 | 47 | self.kernel_regularizer = regularizers.get(kernel_regularizer) 48 | self.bias_regularizer = regularizers.get(bias_regularizer) 49 | self.attn_kernel_regularizer = regularizers.get(attn_kernel_regularizer) 50 | self.activity_regularizer = regularizers.get(activity_regularizer) 51 | 52 | self.kernel_constraint = constraints.get(kernel_constraint) 53 | self.bias_constraint = constraints.get(bias_constraint) 54 | self.attn_kernel_constraint = constraints.get(attn_kernel_constraint) 55 | self.supports_masking = False 56 | self.attention = attention 57 | 58 | # Populated by build() 59 | self.kernels = [] # Layer kernels for attention heads 60 | self.biases = [] # Layer biases for attention heads 61 | self.attn_kernels = [] # Attention kernels for attention heads 62 | 63 | if attn_heads_reduction == "concat": 64 | # Output will have shape (..., K * F") 65 | self.output_dim = self.F_ * self.attn_heads 66 | else: 67 | # Output will have shape (..., F") 68 | self.output_dim = self.F_ 69 | 70 | super(GraphAttentionLayer, self).__init__(**kwargs) 71 | 72 | def build(self, input_shape): 73 | assert len(input_shape) >= 2 74 | F = input_shape[0][-1].value 75 | 76 | # Initialize weights for each attention head 77 | for head in range(self.attn_heads): 78 | # Layer kernel 79 | kernel = self.add_weight(shape=(F, self.F_), 80 | initializer=self.kernel_initializer, 81 | regularizer=self.kernel_regularizer, 82 | constraint=self.kernel_constraint, 83 | name="kernel_{}".format(head)) 84 | self.kernels.append(kernel) 85 | 86 | # # Layer bias 87 | if self.use_bias: 88 | bias = self.add_weight(shape=(self.F_, ), 89 | initializer=self.bias_initializer, 90 | regularizer=self.bias_regularizer, 91 | constraint=self.bias_constraint, 92 | name="bias_{}".format(head)) 93 | self.biases.append(bias) 94 | 95 | if not self.attention: 96 | continue 97 | 98 | # Attention kernels 99 | attn_kernel_self = self.add_weight(shape=(self.F_, 1), 100 | initializer=self.attn_kernel_initializer, 101 | regularizer=self.attn_kernel_regularizer, 102 | constraint=self.attn_kernel_constraint, 103 | name="attn_kernel_self_{}".format(head),) 104 | attn_kernel_neighs = self.add_weight(shape=(self.F_, 1), 105 | initializer=self.attn_kernel_initializer, 106 | regularizer=self.attn_kernel_regularizer, 107 | constraint=self.attn_kernel_constraint, 108 | name="attn_kernel_neigh_{}".format(head)) 109 | self.attn_kernels.append([attn_kernel_self, attn_kernel_neighs]) 110 | 111 | self.built = True 112 | 113 | def call(self, inputs): 114 | X = inputs[0] # Node features (N x F) 115 | A = inputs[1] # Adjacency matrix (N x N) 116 | 117 | outputs = [] 118 | for head in range(self.attn_heads): 119 | kernel = self.kernels[head] # W in the paper (F x F") 120 | 121 | # Compute inputs to attention network 122 | features = K.dot(X, kernel) # (N x F") 123 | dropout_feat = Dropout(self.dropout_rate)(features) # (N x F") 124 | 125 | if not self.attention: 126 | node_features = tf.matmul(A, dropout_feat) # (N x F") 127 | else: 128 | attention_kernel = self.attn_kernels[head] # Attention kernel a in the paper (2F" x 1) 129 | 130 | # Compute feature combinations 131 | # Note: [[a_1], [a_2]]^T [[Wh_i], [Wh_2]] = [a_1]^T [Wh_i] + [a_2]^T [Wh_j] 132 | attn_for_self = K.dot(features, attention_kernel[0]) # (N x 1), [a_1]^T [Wh_i] 133 | attn_for_neighs = K.dot(features, attention_kernel[1]) # (N x 1), [a_2]^T [Wh_j] 134 | 135 | # Attention head a(Wh_i, Wh_j) = a^T [[Wh_i], [Wh_j]] 136 | dense = attn_for_self + K.transpose(attn_for_neighs) # (N x N) via broadcasting 137 | 138 | # Add nonlinearty 139 | dense = LeakyReLU(alpha=0.2)(dense) 140 | 141 | # Mask values before activation (Vaswani et al., 2017) 142 | mask = -10e9 * (1.0 - A) 143 | dense += mask 144 | 145 | # Apply softmax to get attention coefficients 146 | dense = K.softmax(dense) # (N x N) 147 | 148 | # Apply dropout to features and attention coefficients 149 | dropout_attn = Dropout(self.dropout_rate)(dense) # (N x N) 150 | 151 | # Linear combination with neighbors" features 152 | node_features = K.dot(dropout_attn, dropout_feat) # (N x F") 153 | 154 | if self.use_bias: 155 | node_features = K.bias_add(node_features, self.biases[head]) 156 | 157 | if self.attn_heads_reduction == "concat": 158 | # If "concat", compute the activation here (Eq. 5) 159 | node_features = self.activation(node_features) 160 | 161 | # Add output of attention head to final output 162 | outputs.append(node_features) 163 | 164 | # Aggregate the heads" output according to the reduction method 165 | if self.attn_heads_reduction == "concat": 166 | output = K.concatenate(outputs) # (N x KF") 167 | else: 168 | output = K.mean(K.stack(outputs), axis=0) # N x F") 169 | # If "average", compute the activation here (Eq. 6) 170 | 171 | output = self.activation(output) 172 | return output 173 | 174 | def compute_output_shape(self, input_shape): 175 | output_shape = input_shape[0][0].value, self.output_dim 176 | return tf.TensorShape(output_shape) 177 | -------------------------------------------------------------------------------- /gcn/layers/projection_layer.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.keras import backend as K 3 | from tensorflow.python.keras.layers import Layer 4 | 5 | 6 | class ProjectionLayer(Layer): 7 | 8 | def __init__(self, embedding, **kwargs): 9 | super(ProjectionLayer, self).__init__(**kwargs) 10 | self.weight = embedding.embeddings 11 | self.output_dim = self.weight.shape[0] 12 | 13 | def call(self, x): 14 | return K.dot(x, K.transpose(self.weight)) 15 | 16 | def compute_output_shape(self, input_shape): 17 | assert input_shape and len(input_shape) >= 2 18 | assert input_shape[-1] 19 | output_shape = list(input_shape) 20 | output_shape[-1] = self.output_dim 21 | return tf.TensorShape(output_shape) 22 | -------------------------------------------------------------------------------- /gcn/metrics.py: -------------------------------------------------------------------------------- 1 | from tensorflow.python.keras import backend as K 2 | 3 | 4 | def perplexity(y_true, y_pred): 5 | cross_entropy = K.mean(K.sparse_categorical_crossentropy(y_true, y_pred)) 6 | perplexity = K.exp(cross_entropy) 7 | return perplexity 8 | -------------------------------------------------------------------------------- /gcn/util.py: -------------------------------------------------------------------------------- 1 | from tensorflow.python.client import device_lib 2 | 3 | 4 | def gpu_enable(): 5 | local_device_protos = device_lib.list_local_devices() 6 | gpus = [x.name for x in local_device_protos if x.device_type == "GPU"] 7 | if len(gpus) > 0: 8 | return True 9 | else: 10 | return False 11 | -------------------------------------------------------------------------------- /gcn/visualize/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/gcn/visualize/__init__.py -------------------------------------------------------------------------------- /gcn/visualize/draw.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import networkx as nx 3 | import matplotlib.pyplot as plt 4 | from gcn.graph.dependency_graph import DependencyGraph 5 | 6 | 7 | class AttentionDrawer(): 8 | 9 | def __init__(self, graph_builder): 10 | self.graph_builder = graph_builder 11 | 12 | def draw(self, sentence, attention=()): 13 | edge_matrix = () 14 | nodes = self.graph_builder.get_nodes(sentence) 15 | 16 | size = len(attention) if len(attention) > 0 else len(nodes) 17 | if isinstance(self.graph_builder, DependencyGraph): 18 | edge_matrix = self.graph_builder.build( 19 | sentence, size, return_label=True) 20 | matrix = attention 21 | if len(attention) == 0: 22 | matrix = self.graph_builder.build(sentence, size) 23 | graph = self._build(nodes, matrix, edge_matrix) 24 | return graph 25 | 26 | def _build(self, nodes, matrix, edge_matrix=()): 27 | graph = nx.Graph() 28 | _size = min(len(nodes), len(matrix)) 29 | graph.add_nodes_from(nodes[i] for i in range(_size)) 30 | for i in range(_size): 31 | for j in range(_size): 32 | if matrix[i][j] > 0: 33 | if len(edge_matrix) == 0: 34 | graph.add_edge(nodes[i], nodes[j], 35 | weight=matrix[i][j]) 36 | else: 37 | graph.add_edge(nodes[i], nodes[j], 38 | weight=matrix[i][j], 39 | label=edge_matrix[i][j]) 40 | 41 | return graph 42 | 43 | def show(self, graph, figsize=(6, 6), 44 | node_color="skyblue", edge_color="grey", 45 | font_size=15, max_width=5): 46 | plt.figure(figsize=figsize) 47 | pos = nx.spring_layout(graph) 48 | weights = np.array([graph[u][v]["weight"] for u, v in graph.edges()]) 49 | width = 1 + (np.abs(weights) * max_width - 1) 50 | 51 | nx.draw_networkx(graph, pos, 52 | node_color=node_color, 53 | font_size=font_size, edge_color=edge_color, 54 | width=width) 55 | 56 | if isinstance(self.graph_builder, DependencyGraph): 57 | labels = {(u, v): graph[u][v]["label"] for u, v in graph.edges()} 58 | nx.draw_networkx_edge_labels(graph, pos, edge_labels=labels) 59 | 60 | plt.axis("off") 61 | plt.tight_layout() 62 | plt.show() 63 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/requirements.txt -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/tests/__init__.py -------------------------------------------------------------------------------- /tests/classification/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/tests/classification/__init__.py -------------------------------------------------------------------------------- /tests/classification/test_baseline_tfidf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | from gcn.data.multi_nli_dataset import MultiNLIDataset 4 | from gcn.classification.baseline import TfidfClassifier 5 | 6 | 7 | class TestBaseline(unittest.TestCase): 8 | 9 | def test_baseline(self): 10 | root = os.path.join(os.path.dirname(__file__), "../../") 11 | dataset = MultiNLIDataset(root) 12 | data = dataset.test_data() 13 | 14 | classifier = TfidfClassifier() 15 | scores = classifier.fit(data["text"], data["label"]) 16 | self.assertTrue(len(scores) > 0) 17 | -------------------------------------------------------------------------------- /tests/classification/test_baseline_trainer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | from gcn.classification.baseline_trainer import BaselineTrainer 4 | from gcn.classification.baseline import LSTMClassifier 5 | 6 | 7 | class TestBaselineTrainer(unittest.TestCase): 8 | 9 | def test_build(self): 10 | root = os.path.join(os.path.dirname(__file__), "../../") 11 | trainer = BaselineTrainer(root, preprocessor_name="test_cbt_preprocessor") 12 | 13 | trainer.build() 14 | self.assertTrue(len(trainer.preprocessor.vocabulary.get()) > 1000) 15 | print(trainer.preprocessor.vocabulary.get()[:100]) 16 | print(trainer.preprocessor_path) 17 | os.remove(trainer.preprocessor_path) 18 | 19 | def test_train(self): 20 | root = os.path.join(os.path.dirname(__file__), "../../") 21 | trainer = BaselineTrainer(root, preprocessor_name="test_cbt_preprocessor") 22 | trainer.build() 23 | 24 | vocab_size = len(trainer.preprocessor.vocabulary.get()) 25 | model = LSTMClassifier(vocab_size) 26 | model.build(trainer.num_classes) 27 | 28 | metrics = trainer.train(model.model, epochs=2) 29 | self.assertTrue(metrics.history["acc"][-1] - metrics.history["acc"][0] > 0) 30 | -------------------------------------------------------------------------------- /tests/classification/test_trainer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import unittest 4 | import numpy as np 5 | from gcn.classification.trainer import Trainer 6 | from gcn.classification.graph_based_classifier import GraphBasedClassifier 7 | from gcn.data.multi_nli_dataset import MultiNLIDataset 8 | from gcn.graph.dependency_graph import DependencyGraph 9 | from gcn.graph.similarity_graph import SimilarityGraph 10 | from gcn.graph.static_graph import StaticGraph 11 | 12 | 13 | class TestTrainer(unittest.TestCase): 14 | 15 | def test_train_by_dependency_graph(self): 16 | self._test_train("dependency") 17 | 18 | def test_train_by_similarity_graph(self): 19 | self._test_train("similarity") 20 | 21 | def test_train_by_static_graph(self): 22 | self._test_train("static") 23 | 24 | def _test_train(self, graph_type): 25 | root = os.path.join(os.path.dirname(__file__), "../../") 26 | sequence_length = 25 27 | heads = 3 28 | 29 | dataset = MultiNLIDataset(root) 30 | test_data = dataset.test_data() 31 | index = np.random.randint(len(test_data), size=1)[0] 32 | text = test_data["text"].iloc[index] 33 | 34 | graph_builder = None 35 | if graph_type == "dependency": 36 | graph_builder = DependencyGraph(lang="en") 37 | elif graph_type == "similarity": 38 | graph_builder = SimilarityGraph(lang="en") 39 | else: 40 | graph_builder = StaticGraph(lang="en") 41 | 42 | trainer = Trainer(graph_builder, root, 43 | preprocessor_name="test_ct_preprocessor") 44 | 45 | trainer.build(data_kind="test") 46 | 47 | def preprocessor(x): 48 | _x = trainer.preprocess(x, sequence_length) 49 | values = (_x["text"], _x["graph"]) 50 | return values 51 | 52 | _, g = preprocessor([text]) 53 | vocab_size = len(trainer.preprocessor.vocabulary.get()) 54 | model = GraphBasedClassifier(vocab_size, sequence_length, heads=heads) 55 | model.build(trainer.num_classes, preprocessor) 56 | 57 | metrics = trainer.train(model.model, epochs=2) 58 | os.remove(trainer.preprocessor_path) 59 | self.assertTrue(metrics.history["acc"][-1] - metrics.history["acc"][0] > 0) 60 | 61 | attention = model.show_attention([text]) 62 | self.assertEqual(len(attention), 1) # batch size 63 | attention = attention[0] 64 | self.assertEqual(len(attention), 2) # layer count 65 | attention = attention[0] 66 | self.assertEqual(attention.shape, (heads, sequence_length, sequence_length)) 67 | -------------------------------------------------------------------------------- /tests/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/tests/data/__init__.py -------------------------------------------------------------------------------- /tests/data/test_graph_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | from gcn.data.graph_dataset import GraphDataset 4 | 5 | 6 | class TestGraphDataset(unittest.TestCase): 7 | 8 | def test_citeseer(self): 9 | root = os.path.join(os.path.dirname(__file__), "../../") 10 | gd = GraphDataset(root, kind="citeseer") 11 | x, y, tx, ty, allx, ally, graph, test_idx = gd.download() 12 | -------------------------------------------------------------------------------- /tests/data/test_multi_nli_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | from gcn.data.multi_nli_dataset import MultiNLIDataset 4 | 5 | 6 | class TestMultiNLIDataset(unittest.TestCase): 7 | 8 | def test_download(self): 9 | root = os.path.join(os.path.dirname(__file__), "../../") 10 | dataset = MultiNLIDataset(root, prefix="test") 11 | dataset.download() 12 | 13 | train_data = dataset.train_data() 14 | test_data = dataset.test_data() 15 | 16 | for d in [train_data, test_data]: 17 | self.assertTrue(len(d) > 0) 18 | counts = d["label"].value_counts().values.tolist() 19 | c = counts[0] 20 | for _c in counts: 21 | self.assertEqual(c, _c) 22 | 23 | for k in ["train", "test"]: 24 | self.assertTrue(os.path.exists(dataset.interim_file(k))) 25 | os.remove(dataset.interim_file(k)) 26 | 27 | self.assertTrue(os.path.exists(dataset.processed_file(k))) 28 | os.remove(dataset.processed_file(k)) 29 | -------------------------------------------------------------------------------- /tests/graph/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/tests/graph/__init__.py -------------------------------------------------------------------------------- /tests/graph/test_dependency_graph.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from chariot.transformer.vocabulary import Vocabulary 4 | from gcn.graph import DependencyGraph 5 | 6 | 7 | class TestDependencyGraph(unittest.TestCase): 8 | 9 | def test_build(self): 10 | graph = DependencyGraph("en") 11 | matrix = graph.build("I am living at house") 12 | 13 | answer = np.array([ 14 | [0, 0, 1, 0, 0], 15 | [0, 0, 1, 0, 0], 16 | [0, 0, 1, 0, 0], 17 | [0, 0, 1, 0, 0], 18 | [0, 0, 0, 1, 0], 19 | ]) 20 | self.assertEqual(tuple(matrix.tolist()), 21 | tuple(answer.tolist())) 22 | 23 | def test_build_label(self): 24 | graph = DependencyGraph("en") 25 | matrix = graph.build("I am living at house", return_label=True) 26 | 27 | answer = [ 28 | ["", "", "nsubj", "", ""], 29 | ["", "", "aux", "", ""], 30 | ["", "", "ROOT", "", ""], 31 | ["", "", "prep", "", ""], 32 | ["", "", "", "pobj", ""], 33 | ] 34 | self.assertEqual(tuple(matrix), 35 | tuple(answer)) 36 | 37 | def test_batch_build(self): 38 | graph = DependencyGraph("en") 39 | 40 | sentences = ["I am living at house", 41 | "You are waiting on the station"] 42 | matrices = graph.batch_build(sentences, size=6) 43 | 44 | self.assertEqual(matrices.shape, (2, 6, 6)) 45 | -------------------------------------------------------------------------------- /tests/graph/test_similarity_graph.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | import numpy as np 4 | from sklearn.metrics.pairwise import cosine_similarity 5 | from chariot.transformer.vocabulary import Vocabulary 6 | from gcn.graph import SimilarityGraph 7 | 8 | 9 | class TestSimilarityGraph(unittest.TestCase): 10 | 11 | def test_build(self): 12 | root = os.path.join(os.path.dirname(__file__), "../../") 13 | nearest_neighbor = 3 14 | node_count = 10 15 | feature_size = 5 16 | 17 | graph = SimilarityGraph("en", nearest_neighbor, root=root) 18 | 19 | vectors = np.random.uniform(size=node_count * feature_size) 20 | vectors = vectors.reshape(node_count, feature_size) 21 | 22 | similarity = cosine_similarity(vectors) 23 | similarity -= np.eye(node_count) 24 | top_k = np.argsort(-similarity, axis=1)[:, :nearest_neighbor] 25 | 26 | for mode in ["connectivity", "distance"]: 27 | graph.mode = mode 28 | matrix = graph._build(vectors) 29 | 30 | for i, top in enumerate(top_k): 31 | if mode == "connectivity": 32 | self.assertEqual(sum(matrix[i, top]), nearest_neighbor) 33 | else: 34 | self.assertEqual(tuple(similarity[i, top]), 35 | tuple(matrix[i, top])) 36 | 37 | def test_build_from_vocab(self): 38 | root = os.path.join(os.path.dirname(__file__), "../../") 39 | graph = SimilarityGraph("en", nearest_neighbor=2, root=root) 40 | matrix = graph.build("you loaded now") 41 | self.assertTrue(matrix.shape, (3, 3)) 42 | 43 | def test_batch_build(self): 44 | root = os.path.join(os.path.dirname(__file__), "../../") 45 | sentences = ["I am living at house", 46 | "You are waiting on the station"] 47 | graph = SimilarityGraph("en", nearest_neighbor=2, root=root) 48 | matrices = graph.batch_build(sentences, size=6) 49 | 50 | self.assertEqual(matrices.shape, (2, 6, 6)) 51 | -------------------------------------------------------------------------------- /tests/graph/test_static_graph.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from gcn.graph import StaticGraph 4 | 5 | 6 | class TestStaticGraph(unittest.TestCase): 7 | 8 | def test_build(self): 9 | for k in ("self", "previous"): 10 | for f in (True, False): 11 | graph = StaticGraph("en", kind=k, fill=f) 12 | matrix = graph.build("You can get static graph.") 13 | self.check_graph(matrix, k, f) 14 | 15 | def check_graph(self, matrix, kind, fill): 16 | print("kind={}, fill={}".format(kind, fill)) 17 | print(matrix) 18 | for r in range(len(matrix)): 19 | for c in range(len(matrix[r])): 20 | spike = False 21 | offset = 0 if kind == "self" else -1 22 | _r = r + offset 23 | if c == _r: 24 | spike = True 25 | elif fill and c <= _r: 26 | spike = True 27 | 28 | if spike: 29 | self.assertEqual(matrix[r][c], 1) 30 | else: 31 | self.assertEqual(matrix[r][c], 0) 32 | 33 | def test_batch_build(self): 34 | graph = StaticGraph("en") 35 | sentences = ["I am living at house", 36 | "You are waiting on the station"] 37 | matrix = graph.batch_build(sentences, size=3) 38 | 39 | self.assertEqual(matrix.shape, (2, 3, 3)) 40 | -------------------------------------------------------------------------------- /tests/language_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/tests/language_model/__init__.py -------------------------------------------------------------------------------- /tests/language_model/test_similarity_graph_lm.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from gcn.language_model.similarity_graph_lm import SimilarityGraphLM 3 | 4 | 5 | class TestSimilarityGraphLM(unittest.TestCase): 6 | 7 | def test_similarity_graph_lm(self): 8 | vocab_size = 100 9 | sequence_length = 15 10 | embedding_size = 10 11 | model = SimilarityGraphLM(vocab_size, sequence_length, 12 | embedding_size) 13 | -------------------------------------------------------------------------------- /tests/language_model/test_trainer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import unittest 4 | from gcn.language_model.trainer import Trainer 5 | from gcn.language_model.baseline import LSTMLM 6 | 7 | 8 | class TestTrainer(unittest.TestCase): 9 | 10 | def test_download(self): 11 | root = os.path.join(os.path.dirname(__file__), "../../") 12 | trainer = Trainer(root) 13 | 14 | r = trainer.download() 15 | self.assertTrue(r) 16 | 17 | def test_build(self): 18 | root = os.path.join(os.path.dirname(__file__), "../../") 19 | trainer = Trainer(root, preprocessor_name="test_lm_preprocessor") 20 | 21 | trainer.build("valid") 22 | self.assertTrue(len(trainer.preprocessor.vocabulary.get()) > 1000) 23 | print(trainer.preprocessor.vocabulary.get()[:100]) 24 | print(trainer.preprocessor_path) 25 | os.remove(trainer.preprocessor_path) 26 | 27 | def test_train(self): 28 | root = os.path.join(os.path.dirname(__file__), "../../") 29 | trainer = Trainer(root, preprocessor_name="test_train_lm_preprocessor", 30 | log_dir="lm_test") 31 | trainer.build("valid") 32 | 33 | vocab_size = len(trainer.preprocessor.vocabulary.get()) 34 | model = LSTMLM(vocab_size, embedding_size=100, hidden_size=50) 35 | 36 | metrics = trainer.train(model, data_kind="valid", epochs=2) 37 | last_acc = metrics.history["acc"][-1] 38 | shutil.rmtree(trainer.log_dir) 39 | os.remove(trainer.preprocessor_path) 40 | self.assertTrue(metrics.history["acc"][-1] - metrics.history["acc"][0] > 0) 41 | -------------------------------------------------------------------------------- /tests/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/tests/layers/__init__.py -------------------------------------------------------------------------------- /tests/layers/simple_attention_layer.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.keras import backend as K 3 | from tensorflow.python.keras.engine.base_layer import InputSpec 4 | from tensorflow.python.keras.layers import Dense 5 | 6 | 7 | class SimpleAttentionLayer(Dense): 8 | 9 | def __init__(self, 10 | feature_units, 11 | activation="relu", 12 | return_attention=False, 13 | node_axis="row", 14 | merge_method="add", 15 | use_attention_kernel=True, 16 | **kwargs): 17 | 18 | super(SimpleAttentionLayer, self).__init__(units=feature_units, 19 | activation=activation, 20 | **kwargs) 21 | if merge_method == "concat" and not use_attention_kernel: 22 | raise Exception("Can't use concat without attention") 23 | 24 | self.return_attention = return_attention 25 | self.node_axis = node_axis 26 | self.merge_method = merge_method 27 | self.use_attention_kernel = use_attention_kernel 28 | self.input_spec = [InputSpec(ndim=3), InputSpec(ndim=3)] 29 | self.supports_masking = False 30 | 31 | self.self_kernel = None 32 | self.neighbor_kernel = None 33 | self.attention_kernel = None 34 | self.bias = None 35 | 36 | def build(self, input_shape): 37 | X_dims, A_dims = [dims.as_list() for dims in input_shape] 38 | assert len(X_dims) == 3 39 | assert len(A_dims) == 3 and A_dims[1] == A_dims[2] 40 | 41 | F = X_dims[-1] 42 | N = X_dims[1] 43 | 44 | for kind in ["self", "neighbor", "attention"]: 45 | if kind in ["self", "neighbor"]: 46 | if self.use_attention_kernel: 47 | shape = (F, self.units) 48 | else: 49 | shape = (F, 1) 50 | elif kind == "attention" and self.use_attention_kernel: 51 | if self.merge_method == "concat": 52 | shape = (self.units * 2, 1) 53 | else: 54 | shape = (self.units, 1) 55 | else: 56 | shape = () 57 | 58 | if len(shape) == 0: 59 | continue 60 | 61 | kernel = self.add_weight(shape=shape, 62 | initializer=self.kernel_initializer, 63 | regularizer=self.kernel_regularizer, 64 | constraint=self.kernel_constraint, 65 | name="{}_kernel".format(kind)) 66 | 67 | if kind == "self": 68 | self.self_kernel = kernel 69 | elif kind == "neighbor": 70 | self.neighbor_kernel = kernel 71 | elif kind == "attention": 72 | self.attention_kernel = kernel 73 | 74 | if self.use_bias: 75 | self.bias = self.add_weight(shape=(N, N), 76 | initializer=self.bias_initializer, 77 | regularizer=self.bias_regularizer, 78 | constraint=self.bias_constraint, 79 | name="bias") 80 | 81 | self.built = True 82 | 83 | def call(self, inputs): 84 | X = inputs[0] # Node features (B x N x F) 85 | A = inputs[1] # Adjacency matrix (B x N x N) 86 | 87 | X_dims = X.get_shape().as_list() 88 | B, N, F = X_dims 89 | 90 | feature_self = K.dot(X, self.self_kernel) 91 | feature_neighbor = K.dot(X, self.neighbor_kernel) 92 | 93 | # repeat_elements is same as np.repeat. 94 | # it repeats element to row direction. 95 | # Example. 96 | # z = np.array([[1,2,3],[4,5,6]]) # shape=(2, 3) 97 | # repeat = 4 98 | # np.reshape(np.repeat(z, repeat, axis=-1), (2, 3, repeat)) 99 | # > array([[[1, 1, 1, 1], 100 | # [2, 2, 2, 2], 101 | # [3, 3, 3, 3]], 102 | # [[4, 4, 4, 4], 103 | # [5, 5, 5, 5], 104 | # [6, 6, 6, 6]]]) 105 | feature_self = K.repeat_elements(feature_self, N, axis=2) 106 | feature_self = K.reshape(feature_self, (-1, N, N, self.units)) 107 | 108 | feature_neighbor = K.repeat_elements(feature_neighbor, N, axis=2) 109 | feature_neighbor = K.reshape(feature_neighbor, (-1, N, N, self.units)) 110 | 111 | T = (0, 2, 1, 3) 112 | if self.merge_method == "concat": 113 | if self.node_axis == "row": 114 | merged = tf.concat([feature_self, 115 | tf.transpose(feature_neighbor, T)], 116 | axis=-1) 117 | else: 118 | merged = tf.concat([tf.transpose(feature_self, T), 119 | feature_neighbor], 120 | axis=-1) 121 | else: 122 | if self.node_axis == "row": 123 | merged = feature_self + tf.transpose(feature_neighbor, T) 124 | else: 125 | merged = tf.transpose(feature_self, T) + feature_neighbor 126 | 127 | activation_func = tf.nn.tanh 128 | if self.use_attention_kernel: 129 | attention = K.dot(activation_func(merged), self.attention_kernel) 130 | else: 131 | attention = activation_func(merged) 132 | 133 | attention = K.reshape(attention, (-1, N, N)) 134 | if self.use_bias: 135 | attention = K.bias_add(attention, self.bias) 136 | 137 | mask = -10e9 * (1.0 - A) 138 | attention += mask 139 | 140 | attention = tf.nn.softmax(attention) 141 | 142 | output = tf.matmul(attention, X) 143 | 144 | if self.return_attention: 145 | return (output, attention) 146 | else: 147 | return output 148 | 149 | def compute_output_shape(self, input_shape): 150 | X_dims, A_dims = [dims.as_list() for dims in input_shape] 151 | assert len(X_dims) == 3 152 | assert len(A_dims) == 3 153 | output_shape = X_dims[0], X_dims[0], self.output_dim 154 | 155 | if self.return_attention: 156 | return (tf.TensorShape(output_shape), 157 | tf.TensorShape(A_dims)) 158 | else: 159 | return tf.TensorShape(output_shape) 160 | -------------------------------------------------------------------------------- /tests/layers/simple_attention_layer_multi.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.keras import backend as K 3 | from tensorflow.python.keras.engine.base_layer import InputSpec 4 | from tensorflow.python.keras.layers import Dense 5 | 6 | 7 | class SimpleAttentionLayer(Dense): 8 | 9 | def __init__(self, 10 | feature_units, 11 | activation="relu", 12 | return_attention=False, 13 | node_axis="row", 14 | merge_method="add", 15 | use_attention_kernel=True, 16 | **kwargs): 17 | 18 | super(SimpleAttentionLayer, self).__init__(units=feature_units, 19 | activation=activation, 20 | **kwargs) 21 | if merge_method == "concat" and not use_attention_kernel: 22 | raise Exception("Can't use concat without attention") 23 | 24 | self.return_attention = return_attention 25 | self.node_axis = node_axis 26 | self.merge_method = merge_method 27 | self.use_attention_kernel = use_attention_kernel 28 | self.input_spec = [InputSpec(ndim=3), InputSpec(ndim=3)] 29 | self.supports_masking = False 30 | 31 | self.kernel = None 32 | self.bias = None 33 | 34 | def build(self, input_shape): 35 | X_dims, A_dims = [dims.as_list() for dims in input_shape] 36 | assert len(X_dims) == 3 37 | assert len(A_dims) == 3 and A_dims[1] == A_dims[2] 38 | 39 | F = X_dims[-1] 40 | N = X_dims[1] 41 | 42 | self.kernel = self.add_weight(shape=(F, F), 43 | initializer=self.kernel_initializer, 44 | regularizer=self.kernel_regularizer, 45 | constraint=self.kernel_constraint, 46 | name="kernel") 47 | 48 | if self.use_bias: 49 | self.bias = self.add_weight(shape=(N, N), 50 | initializer=self.bias_initializer, 51 | regularizer=self.bias_regularizer, 52 | constraint=self.bias_constraint, 53 | name="bias") 54 | 55 | self.built = True 56 | 57 | def call(self, inputs): 58 | X = inputs[0] # Node features (B x N x F) 59 | A = inputs[1] # Adjacency matrix (B x N x N) 60 | 61 | X_dims = X.get_shape().as_list() 62 | B, N, F = X_dims 63 | 64 | merged = tf.matmul(K.dot(X, self.self_kernel), 65 | tf.transpose(X, (0, 2, 1))) 66 | attention = tf.nn.tanh(merged) 67 | attention = K.reshape(attention, (-1, N, N)) 68 | 69 | if self.use_bias: 70 | attention = K.bias_add(attention, self.bias) 71 | 72 | mask = -10e9 * (1.0 - A) 73 | attention += mask 74 | 75 | attention = tf.nn.softmax(attention) 76 | output = tf.matmul(attention, X) 77 | 78 | if self.return_attention: 79 | return (output, attention) 80 | else: 81 | return output 82 | 83 | def compute_output_shape(self, input_shape): 84 | X_dims, A_dims = [dims.as_list() for dims in input_shape] 85 | assert len(X_dims) == 3 86 | assert len(A_dims) == 3 87 | output_shape = X_dims[0], X_dims[0], self.output_dim 88 | 89 | if self.return_attention: 90 | return (tf.TensorShape(output_shape), 91 | tf.TensorShape(A_dims)) 92 | else: 93 | return tf.TensorShape(output_shape) 94 | -------------------------------------------------------------------------------- /tests/layers/test_attention_layer.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from tensorflow.python import keras as K 4 | from gcn.layers import AttentionLayer 5 | 6 | 7 | class TestAttentionLayer(unittest.TestCase): 8 | 9 | def test_attention_layer(self): 10 | sample_size = 20000 11 | sequence_length = 20 12 | embedding_size = 2 13 | units = 8 14 | attention_column = 3 15 | 16 | # Baseline Model 17 | base = K.Sequential() 18 | base.add(K.layers.LSTM(units, 19 | input_shape=[sequence_length, embedding_size])) 20 | base.add(K.layers.Dense(1, activation="sigmoid")) 21 | base.compile(optimizer="adam", loss="binary_crossentropy", 22 | metrics=["accuracy"]) 23 | 24 | # Attention Model 25 | def make_model(): 26 | input = K.layers.Input([sequence_length, embedding_size]) 27 | lstm_out = K.layers.LSTM(units, return_sequences=True)(input) 28 | attn_out, prob = AttentionLayer(sequence_length, 29 | return_attentions=True)(lstm_out) 30 | output = K.layers.Dense(1, activation="sigmoid")(attn_out) 31 | model = K.models.Model(inputs=input, outputs=output) 32 | return model 33 | 34 | model = make_model() 35 | model.compile(optimizer="adam", loss="binary_crossentropy", 36 | metrics=["accuracy"]) 37 | x, y = self.make_test_data(sample_size, sequence_length, 38 | embedding_size, attention_column) 39 | 40 | base_metrics = base.fit(x, y, epochs=1, batch_size=32, 41 | validation_split=0.1, verbose=1) 42 | metrics = model.fit(x, y, epochs=1, batch_size=32, 43 | validation_split=0.1, verbose=1) 44 | 45 | base_score = base_metrics.history["val_acc"][-1] 46 | score = metrics.history["val_acc"][-1] 47 | self.assertTrue(score > base_score) 48 | 49 | attention_layer = model.layers[2] 50 | attention_model = K.models.Model(inputs=model.input, 51 | outputs=attention_layer.output) 52 | activation, attention = attention_model.predict_on_batch(x) 53 | attention_index = np.argmax(np.mean(attention, axis=0)) 54 | print(np.mean(attention, axis=0)) 55 | self.assertTrue(attention_index in [attention_column, 56 | attention_column + 1]) 57 | 58 | def make_test_data(self, sample_size, sequence_length, embedding_size, 59 | attention_column): 60 | if attention_column >= sequence_length: 61 | raise Exception("Directed column is larger than sequence_length.") 62 | 63 | x = np.random.standard_normal(size=(sample_size, sequence_length, 64 | embedding_size)) 65 | y = np.random.randint(low=0, high=2, size=(sample_size, 1)) 66 | x[:, attention_column, :] = np.tile(y[:], (1, embedding_size)) 67 | 68 | return x, y 69 | -------------------------------------------------------------------------------- /tests/layers/test_attention_on_graph.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from scipy.spatial import distance_matrix 4 | from tensorflow.python import keras as K 5 | from tests.layers.simple_attention_layer import SimpleAttentionLayer 6 | 7 | 8 | class TestAttentionOnGraph(unittest.TestCase): 9 | 10 | def test_attention_learning(self): 11 | exp1 = self.run_attention_learning("column", "add", False) # original 12 | exp2 = self.run_attention_learning("column", "add", True) 13 | exp3 = self.run_attention_learning("column", "concat", True) 14 | exp4 = self.run_attention_learning("row", "add", False) 15 | exp5 = self.run_attention_learning("row", "add", True) 16 | exp6 = self.run_attention_learning("row", "concat", True) 17 | 18 | for acc in [exp2, exp3, exp4, exp5, exp6]: 19 | # original method should be most accurate 20 | self.assertGreater(exp1, acc) 21 | raise Exception("ex") 22 | 23 | def run_attention_learning(self, node_axis, merge_method, 24 | use_attention_kernel): 25 | node_count = 10 26 | feature_size = 2 27 | feature_units = 2 28 | problem_count = 10000 29 | validation_count = 5 30 | 31 | last_accs = [] 32 | for i in range(validation_count): 33 | model = self.make_simple_attention_network( 34 | node_count, feature_size, feature_units, 35 | node_axis, merge_method, 36 | use_attention_kernel) 37 | 38 | model.compile(loss="categorical_crossentropy", optimizer="adam", 39 | metrics=["accuracy"]) 40 | 41 | params = self.make_problems(node_count, feature_size, 42 | feature_units, problem_count) 43 | node_inputs, matrix_inputs, answers, attn_answers = params 44 | 45 | metrics = model.fit([node_inputs, matrix_inputs], attn_answers, 46 | validation_split=0.2, epochs=8, verbose=0) 47 | acc = metrics.history["val_acc"][-1] 48 | last_accs.append(acc) 49 | 50 | def calc_baseline_acc(A, label): 51 | x = np.random.normal(size=A.shape) * A 52 | x_exp = np.exp(x) 53 | x = x_exp / np.sum(x_exp, axis=-1, keepdims=True) 54 | match = np.equal(np.argmax(label, axis=-1), 55 | np.argmax(x_exp, axis=-1),) 56 | count = A.shape[0] * A.shape[1] 57 | acc = np.sum(match) / count 58 | return acc 59 | 60 | baseline_acc = calc_baseline_acc(matrix_inputs, attn_answers) 61 | method = "Merge: {} Node: {} Attention: {}".format( 62 | merge_method, node_axis, use_attention_kernel) 63 | if merge_method == "add" and node_axis == "column" and \ 64 | not use_attention_kernel: 65 | method += " (original)" 66 | 67 | print(method) 68 | acc = np.mean(last_accs) 69 | print("\t acc: {}(+/-{}) (baseline {})".format( 70 | acc, np.std(last_accs), baseline_acc)) 71 | return acc 72 | 73 | def make_problems(self, node_count, feature_size, feature_units, 74 | problem_count): 75 | """ 76 | Make task to extract the nearest node from neighbors. 77 | """ 78 | 79 | node_samples = problem_count * node_count * feature_size 80 | node_inputs = np.random.uniform(high=10, size=node_samples).reshape( 81 | (problem_count, node_count, feature_size)) 82 | 83 | matrix_samples = problem_count * node_count * node_count 84 | matrix_inputs = np.random.randint(2, size=matrix_samples).reshape( 85 | (problem_count, node_count, node_count)) 86 | 87 | answers = [] 88 | attention_answers = [] 89 | for n, m in zip(node_inputs, matrix_inputs): 90 | distance = distance_matrix(n, n) 91 | mask = 10e9 * (1.0 - m) 92 | target_index = np.argmin(distance * m + mask, axis=1) 93 | 94 | answers.append(n[target_index]) 95 | attn = np.zeros(m.shape) 96 | attn[np.arange(len(attn)), target_index] = 1 97 | attention_answers.append(attn) 98 | 99 | answers = np.array(answers) 100 | attention_answers = np.array(attention_answers) 101 | 102 | return node_inputs, matrix_inputs, answers, attention_answers 103 | 104 | def make_simple_attention_network(self, node_count, 105 | feature_size, feature_units, 106 | node_axis, merge_method, 107 | use_attention_kernel): 108 | 109 | nodes = K.layers.Input(shape=(node_count, feature_size)) 110 | matrix = K.layers.Input(shape=(node_count, node_count)) 111 | layer = SimpleAttentionLayer(feature_units=feature_units, 112 | node_axis=node_axis, 113 | merge_method=merge_method, 114 | use_attention_kernel=use_attention_kernel, 115 | return_attention=True) 116 | 117 | _, attn = layer([nodes, matrix]) 118 | model = K.models.Model(inputs=[nodes, matrix], outputs=attn) 119 | return model 120 | -------------------------------------------------------------------------------- /tests/layers/test_graph_attention_layer.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from scipy.spatial import distance_matrix 4 | from tensorflow.python import keras as K 5 | from gcn.layers.graph_attention_layer_before import GraphAttentionLayer as GraphAttentionLayerB 6 | from gcn.layers.graph_attention_layer import GraphAttentionLayer 7 | 8 | 9 | class TestGraphAttentionLayer(unittest.TestCase): 10 | TEST_PATTERN = (10, 2, 2) 11 | 12 | def test_forward(self): 13 | node_count = 12 14 | feature_size = 10 15 | feature_units = 8 16 | head = 3 17 | 18 | batch_size = 32 19 | node_samples = batch_size * node_count * feature_size 20 | node_inputs = np.random.uniform(size=node_samples) 21 | node_inputs = node_inputs.reshape((batch_size, 22 | node_count, feature_size)) 23 | 24 | matrix_samples = batch_size * node_count * node_count 25 | matrix_inputs = np.random.randint(2, size=matrix_samples) 26 | matrix_inputs = matrix_inputs.reshape((batch_size, 27 | node_count, node_count)) 28 | 29 | concat_model = self.make_graph_attention_network( 30 | node_count, feature_size, feature_units, 31 | head=head, merge="concat") 32 | outputs = concat_model.predict([node_inputs, matrix_inputs]) 33 | self.assertEqual(outputs.shape, (batch_size, node_count, 34 | feature_units * head)) 35 | 36 | mean_model = self.make_graph_attention_network( 37 | node_count, feature_size, feature_units, 38 | head=head, merge="average") 39 | outputs = mean_model.predict([node_inputs, matrix_inputs]) 40 | self.assertEqual(outputs.shape, (batch_size, node_count, 41 | feature_units)) 42 | 43 | def test_training(self): 44 | node_count = 4 45 | feature_size = 3 46 | feature_units = 1 47 | problem_count = 1000 48 | 49 | node_inputs, matrix_inputs, answers, _ = self.make_problems( 50 | node_count, feature_size, 51 | feature_units, 52 | problem_count) 53 | 54 | model = self.make_graph_attention_network( 55 | node_count, feature_size, feature_units, 56 | merge="average") 57 | model.compile(loss="mse", optimizer="adam") 58 | metrics = model.fit([node_inputs, matrix_inputs], answers, 59 | validation_split=0.3, 60 | epochs=50) 61 | last_loss = metrics.history["val_loss"][-1] 62 | min_loss = np.min(metrics.history["val_loss"]) 63 | self.assertEqual(last_loss, min_loss) 64 | 65 | def test_attention(self): 66 | node_count, feature_size, feature_units = self.TEST_PATTERN 67 | 68 | def make_model(): 69 | model, model_attn = self.make_graph_attention_network( 70 | node_count, feature_size, feature_units, 71 | return_attention=True) 72 | return model, model_attn 73 | 74 | loss, hit_prob = self._test_attention(make_model, 75 | node_count, 76 | feature_size, feature_units, 77 | header="GAL After") 78 | 79 | self.assertGreater(hit_prob, 0.6) 80 | 81 | def test_attention_before(self): 82 | node_count, feature_size, feature_units = self.TEST_PATTERN 83 | 84 | def make_model(): 85 | model, model_attn = self.make_graph_attention_network( 86 | node_count, feature_size, feature_units, 87 | return_attention=True, before=True) 88 | return model, model_attn 89 | 90 | loss, hit_prob = self._test_attention(make_model, 91 | node_count, 92 | feature_size, feature_units, 93 | header="GAL Before") 94 | self.assertGreater(hit_prob, 0.6) 95 | 96 | def test_attention_theoretical(self): 97 | node_count, feature_size, feature_units = self.TEST_PATTERN 98 | 99 | def make_model(): 100 | model, model_attn = self.make_simple_attention_network( 101 | node_count, feature_size, feature_units, 102 | return_attention=True) 103 | return model, model_attn 104 | 105 | loss, hit_prob = self._test_attention(make_model, 106 | node_count, 107 | feature_size, feature_units, 108 | header="Theoretical Attention") 109 | self.assertGreater(hit_prob, 0.6) 110 | 111 | def _test_attention(self, make_model, 112 | node_count, feature_size, feature_units, 113 | problem_count=10000, varidation_count=5, 114 | header=""): 115 | 116 | losses = [] 117 | hit_probs = [] 118 | for i in range(varidation_count): 119 | model, model_attn = make_model() 120 | params = self.make_problems(node_count, feature_size, 121 | feature_units, problem_count) 122 | node_inputs, matrix_inputs, answers, attn_answers = params 123 | 124 | model.compile(loss="mse", optimizer="adam") 125 | model.fit([node_inputs, matrix_inputs], answers, 126 | validation_split=0.3, epochs=20) 127 | 128 | attentions = model_attn.predict([node_inputs, matrix_inputs]) 129 | 130 | if len(attentions.shape) == 4: 131 | attentions = attentions[:, 0, :, :] # attention of head 0 132 | 133 | loss, hit_prob = self.calculate_attention_loss( 134 | attentions, attn_answers) 135 | losses.append(loss) 136 | hit_probs.append(hit_prob) 137 | 138 | loss = np.mean(losses) 139 | hit_prob = np.mean(hit_probs) 140 | if header: 141 | print(header) 142 | print("\t loss: {}(+/-{}), hit_prob:{} (+/-{}).".format( 143 | loss, np.std(losses), hit_prob, np.std(hit_prob))) 144 | return loss, hit_prob 145 | 146 | def make_problems(self, node_count, feature_size, feature_units, 147 | problem_count): 148 | """ 149 | Make task to extract the nearest node from neighbors. 150 | """ 151 | 152 | node_samples = problem_count * node_count * feature_size 153 | node_inputs = np.random.uniform(high=10, size=node_samples).reshape( 154 | (problem_count, node_count, feature_size)) 155 | 156 | matrix_samples = problem_count * node_count * node_count 157 | matrix_inputs = np.random.randint(2, size=matrix_samples).reshape( 158 | (problem_count, node_count, node_count)) 159 | 160 | answers = [] 161 | attention_answers = [] 162 | for n, m in zip(node_inputs, matrix_inputs): 163 | distance = distance_matrix(n, n) 164 | mask = 10e9 * (1.0 - m) 165 | target_index = np.argmin(distance * m + mask, axis=1) 166 | 167 | if feature_size == feature_units: 168 | answers.append(n[target_index]) 169 | else: 170 | answers.append(n[target_index][:, :feature_units]) 171 | 172 | attn = np.zeros(m.shape) 173 | attn[np.arange(len(attn)), target_index] = 1 174 | attention_answers.append(attn) 175 | 176 | answers = np.array(answers) 177 | attention_answers = np.array(attention_answers) 178 | 179 | return node_inputs, matrix_inputs, answers, attention_answers 180 | 181 | def make_graph_attention_network(self, node_count, 182 | feature_size, feature_units, 183 | head=1, merge="average", 184 | return_attention=False, 185 | before=False): 186 | 187 | nodes = K.layers.Input(shape=(node_count, feature_size)) 188 | matrix = K.layers.Input(shape=(node_count, node_count)) 189 | 190 | if before: 191 | GAL = GraphAttentionLayerB 192 | else: 193 | GAL = GraphAttentionLayer 194 | 195 | layer = GAL(feature_units=feature_units, 196 | attn_heads=head, 197 | attn_heads_reduction=merge, 198 | dropout_rate=0.0, 199 | return_attention=return_attention) 200 | 201 | if return_attention: 202 | output, attn = layer([nodes, matrix]) 203 | else: 204 | output = layer([nodes, matrix]) 205 | 206 | model = K.models.Model(inputs=[nodes, matrix], outputs=output) 207 | if return_attention: 208 | model_attn = K.models.Model(inputs=[nodes, matrix], outputs=attn) 209 | return model, model_attn 210 | else: 211 | return model 212 | 213 | def make_simple_attention_network(self, node_count, 214 | feature_size, feature_units, 215 | return_attention=False): 216 | 217 | from tests.layers.simple_attention_layer import SimpleAttentionLayer 218 | 219 | nodes = K.layers.Input(shape=(node_count, feature_size)) 220 | matrix = K.layers.Input(shape=(node_count, node_count)) 221 | layer = SimpleAttentionLayer(feature_units=feature_units, 222 | return_attention=return_attention) 223 | 224 | if return_attention: 225 | output, attn = layer([nodes, matrix]) 226 | attn = attn 227 | else: 228 | output = layer([nodes, matrix]) 229 | 230 | model = K.models.Model(inputs=[nodes, matrix], outputs=output) 231 | if return_attention: 232 | model_attn = K.models.Model(inputs=[nodes, matrix], outputs=attn) 233 | return model, model_attn 234 | else: 235 | return model 236 | 237 | def calculate_attention_loss(self, predicted, answers): 238 | loss = 0 239 | hit_prob = 0 240 | 241 | for p, a in zip(predicted, answers): 242 | norm = np.linalg.norm(p * a - a) 243 | hits = np.sum(np.equal(np.argmax(p, axis=1), 244 | np.argmax(a, axis=1))) 245 | hit_prob += hits / len(p) 246 | loss += norm 247 | loss = loss / len(predicted) 248 | hit_prob = hit_prob / len(predicted) 249 | return loss, hit_prob 250 | -------------------------------------------------------------------------------- /tests/visualize/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/tests/visualize/__init__.py -------------------------------------------------------------------------------- /tests/visualize/test_draw.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from chariot.transformer.vocabulary import Vocabulary 4 | from gcn.graph import DependencyGraph, SimilarityGraph, StaticGraph 5 | from gcn.visualize.draw import AttentionDrawer 6 | 7 | 8 | class TestDraw(unittest.TestCase): 9 | 10 | def test_draw_dependency_graph(self): 11 | sentence = "I am living at house" 12 | graph_builder = DependencyGraph("en") 13 | attention = np.array([ 14 | [0, 0, 1, 0, 0], 15 | [0, 0, 0.2, 0, 0], 16 | [0, 0, 0.7, 0, 0], 17 | [0, 0, 1, 0, 0], 18 | [0, 0, 0, 0.5, 0], 19 | ]) 20 | 21 | drawer = AttentionDrawer(graph_builder) 22 | graph = drawer.draw(sentence, attention) 23 | drawer.show(graph) 24 | 25 | def test_draw_similarity_graph(self): 26 | sentence = "I am building similarity graph structure" 27 | graph_builder = SimilarityGraph("en") 28 | drawer = AttentionDrawer(graph_builder) 29 | graph = drawer.draw(sentence) 30 | drawer.show(graph) 31 | 32 | def test_draw_static_graph(self): 33 | sentence = "I am static graph" 34 | graph_builder = StaticGraph("en", kind="previous") 35 | drawer = AttentionDrawer(graph_builder) 36 | graph = drawer.draw(sentence) 37 | drawer.show(graph) 38 | --------------------------------------------------------------------------------