├── .gitignore
├── LICENSE
├── README.md
├── data
    ├── external
    │   └── .gitkeep
    ├── interim
    │   └── .gitkeep
    ├── log
    │   └── .gitkeep
    ├── processed
    │   └── .gitkeep
    └── raw
    │   └── .gitkeep
├── experiments
    ├── __init__.py
    ├── classification
    │   ├── baseline_evaluation.ipynb
    │   ├── baseline_lstm.py
    │   ├── baseline_merge.py
    │   ├── baseline_tfidf.py
    │   ├── graph_based_experiment.py
    │   ├── model_analysis_for_attention.ipynb
    │   └── model_analysis_for_graph_structure.ipynb
    ├── language_model
    │   ├── __init__.py
    │   ├── baseline.py
    │   └── baseline_test.py
    └── layers
    │   ├── __init__.py
    │   ├── gat_experiment.py
    │   ├── gat_experiment_base.py
    │   ├── gat_experiment_original.py
    │   ├── gat_experiment_original_without_attention.py
    │   └── gat_experiment_without_attention.py
├── gcn
    ├── __init__.py
    ├── base_trainer.py
    ├── classification
    │   ├── __init__.py
    │   ├── baseline.py
    │   ├── baseline_trainer.py
    │   ├── graph_based_classifier.py
    │   └── trainer.py
    ├── data
    │   ├── __init__.py
    │   ├── graph_dataset.py
    │   └── multi_nli_dataset.py
    ├── graph
    │   ├── __init__.py
    │   ├── dependency_graph.py
    │   ├── similarity_graph.py
    │   └── static_graph.py
    ├── language_model
    │   ├── baseline.py
    │   ├── similarity_graph_lm.py
    │   └── trainer.py
    ├── layers
    │   ├── __init__.py
    │   ├── attention_layer.py
    │   ├── graph_attention_layer.py
    │   ├── graph_attention_layer_before.py
    │   ├── graph_attention_layer_multi.py
    │   ├── graph_attention_layer_original.py
    │   └── projection_layer.py
    ├── metrics.py
    ├── util.py
    └── visualize
    │   ├── __init__.py
    │   └── draw.py
├── requirements.txt
└── tests
    ├── __init__.py
    ├── classification
        ├── __init__.py
        ├── test_baseline_tfidf.py
        ├── test_baseline_trainer.py
        └── test_trainer.py
    ├── data
        ├── __init__.py
        ├── test_graph_dataset.py
        └── test_multi_nli_dataset.py
    ├── graph
        ├── __init__.py
        ├── test_dependency_graph.py
        ├── test_similarity_graph.py
        └── test_static_graph.py
    ├── language_model
        ├── __init__.py
        ├── test_similarity_graph_lm.py
        └── test_trainer.py
    ├── layers
        ├── __init__.py
        ├── simple_attention_layer.py
        ├── simple_attention_layer_multi.py
        ├── test_attention_layer.py
        ├── test_attention_on_graph.py
        └── test_graph_attention_layer.py
    └── visualize
        ├── __init__.py
        └── test_draw.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | .vscode/
107 | data/
108 | !gcn/data/
109 | !tests/data/
110 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Takahiro Kubo
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Graph Convolution for NLP
 2 | 
 3 | Research project to apply Graph Convolution to NLP.
 4 | 
 5 | ## Research
 6 | 
 7 | 1. [Why & What is Graph Convolution](https://medium.com/programming-soda/graph-convolution%E3%82%92%E8%87%AA%E7%84%B6%E8%A8%80%E8%AA%9E%E5%87%A6%E7%90%86%E3%81%AB%E5%BF%9C%E7%94%A8%E3%81%99%E3%82%8B-part1-b792d53c4c18)
 8 | 2. [Implementation of Graph Convolution: Graph Attention Network](https://medium.com/programming-soda/graph-convolution%E3%82%92%E8%87%AA%E7%84%B6%E8%A8%80%E8%AA%9E%E5%87%A6%E7%90%86%E3%81%AB%E5%BF%9C%E7%94%A8%E3%81%99%E3%82%8B-part2-dd0f9bc25dd3)
 9 | 3. [Kinds of task in Graph](https://medium.com/programming-soda/graph-convolution%E3%82%92%E8%87%AA%E7%84%B6%E8%A8%80%E8%AA%9E%E5%87%A6%E7%90%86%E3%81%AB%E5%BF%9C%E7%94%A8%E3%81%99%E3%82%8B-part3-12e7458f31fb)
10 | 4. [Case study to use Graph Convolution in NLP](https://medium.com/programming-soda/graph-convolution%E3%82%92%E8%87%AA%E7%84%B6%E8%A8%80%E8%AA%9E%E5%87%A6%E7%90%86%E3%81%AB%E5%BF%9C%E7%94%A8%E3%81%99%E3%82%8B-part4-4b0082ce26da)
11 | 5. [Design of Experiment to verify effectiveness of Graph Convolution](https://medium.com/programming-soda/graph-convolution%E3%82%92%E8%87%AA%E7%84%B6%E8%A8%80%E8%AA%9E%E5%87%A6%E7%90%86%E3%81%AB%E5%BF%9C%E7%94%A8%E3%81%99%E3%82%8B-part5-c833f01fde58)
12 | 6. [Rethink the property of Graph Convolution and find appropriate task in NLP](https://medium.com/programming-soda/graph-convolution%E3%82%92%E8%87%AA%E7%84%B6%E8%A8%80%E8%AA%9E%E5%87%A6%E7%90%86%E3%81%AB%E5%BF%9C%E7%94%A8%E3%81%99%E3%82%8B-part6-f4596b2bcc93)
13 | 7. [Research summary of Graph Convolution in NLP](https://medium.com/programming-soda/graph-convolution%E3%82%92%E8%87%AA%E7%84%B6%E8%A8%80%E8%AA%9E%E5%87%A6%E7%90%86%E3%81%AB%E5%BF%9C%E7%94%A8%E3%81%99%E3%82%8B-part7-end-3f6812ca08cf)
14 | 
15 | ## Implementation
16 | 
17 | * [Language Modeling](https://medium.com/programming-soda/%E8%A8%80%E8%AA%9E%E3%83%A2%E3%83%87%E3%83%AB%E3%81%AE%E6%80%A7%E8%83%BD%E3%81%8C-%E5%AE%9F%E8%A3%85%E3%81%AB%E3%82%88%E3%82%8A%E7%95%B0%E3%81%AA%E3%82%8B%E4%BB%B6%E3%82%92%E8%A7%A3%E6%B1%BA%E3%81%99%E3%82%8B-5d36c841fcac)
18 | * Graph Attention Network Layer
19 |   * [Fix to support batch data](https://medium.com/programming-soda/graph-attention-network-layer%E3%82%92%E5%AE%9F%E8%A3%85%E3%81%99%E3%82%8B-part1-4a199372b3de)
20 |   * [Experiment by existing dataset](https://medium.com/programming-soda/graph-attention-network-layer%E3%82%92%E5%AE%9F%E8%A3%85%E3%81%99%E3%82%8B-part1-4a199372b3de)
21 |   * [What is really effective attention method on Graph?](https://medium.com/programming-soda/graph-attention-network-layer%E3%82%92%E5%AE%9F%E8%A3%85%E3%81%99%E3%82%8B-part3-ce3548c3aa5c)
22 | * Graph Convolution for Text Classification
23 |   * [Prepare the Dataset](https://medium.com/programming-soda/graph-convolution%E3%81%A7%E8%87%AA%E7%84%B6%E8%A8%80%E8%AA%9E%E5%87%A6%E7%90%86%E3%82%92%E8%A1%8C%E3%81%86-%E3%83%86%E3%82%AD%E3%82%B9%E3%83%88%E5%88%86%E9%A1%9E%E7%B7%A8-part1-3eacc11eb622)
24 |   * [Make baseline model and analysis](https://medium.com/programming-soda/graph-convolution%E3%81%A7%E8%87%AA%E7%84%B6%E8%A8%80%E8%AA%9E%E5%87%A6%E7%90%86%E3%82%92%E8%A1%8C%E3%81%86-%E3%83%86%E3%82%AD%E3%82%B9%E3%83%88%E5%88%86%E9%A1%9E%E7%B7%A8-part2-b0f1f0a67b17)
25 |   * [Make graph convolution model](https://medium.com/programming-soda/graph-convolution%E3%81%A7%E8%87%AA%E7%84%B6%E8%A8%80%E8%AA%9E%E5%87%A6%E7%90%86%E3%82%92%E8%A1%8C%E3%81%86-%E3%83%86%E3%82%AD%E3%82%B9%E3%83%88%E5%88%86%E9%A1%9E%E7%B7%A8-part3-b85acee1a3e8)
26 |   * [Analyze graph convolution model](https://medium.com/programming-soda/graph-convolution%E3%81%A7%E8%87%AA%E7%84%B6%E8%A8%80%E8%AA%9E%E5%87%A6%E7%90%86%E3%82%92%E8%A1%8C%E3%81%86-%E3%83%86%E3%82%AD%E3%82%B9%E3%83%88%E5%88%86%E9%A1%9E%E7%B7%A8-part4-caee203b86af)
27 |   * [Enhance graph convolution model](https://medium.com/programming-soda/graph-convolution%E3%81%A7%E8%87%AA%E7%84%B6%E8%A8%80%E8%AA%9E%E5%87%A6%E7%90%86%E3%82%92%E8%A1%8C%E3%81%86-%E3%83%86%E3%82%AD%E3%82%B9%E3%83%88%E5%88%86%E9%A1%9E%E7%B7%A8-part5-end-cc9b0b4aac06)
28 | 
29 | ## Paper Reading
30 | 
31 | * [How Powerful are Graph Neural Networks?](https://medium.com/programming-soda/graph-neural-network%E3%81%AE%E5%87%A6%E7%90%86%E3%81%A8%E5%8A%B9%E6%9E%9C%E3%82%92%E7%90%86%E8%A7%A3%E3%81%99%E3%82%8B-how-powerful-are-graph-neural-networks-a26ee9245cce)
32 | 


--------------------------------------------------------------------------------
/data/external/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/data/external/.gitkeep


--------------------------------------------------------------------------------
/data/interim/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/data/interim/.gitkeep


--------------------------------------------------------------------------------
/data/log/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/data/log/.gitkeep


--------------------------------------------------------------------------------
/data/processed/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/data/processed/.gitkeep


--------------------------------------------------------------------------------
/data/raw/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/data/raw/.gitkeep


--------------------------------------------------------------------------------
/experiments/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/experiments/__init__.py


--------------------------------------------------------------------------------
/experiments/classification/baseline_lstm.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import numpy as np
 4 | from sklearn.metrics import classification_report
 5 | sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
 6 | from gcn.data.multi_nli_dataset import MultiNLIDataset
 7 | from gcn.classification.baseline import LSTMClassifier
 8 | from gcn.classification.baseline_trainer import BaselineTrainer
 9 | 
10 | 
11 | def main():
12 |     root = os.path.join(os.path.dirname(__file__), "../../")
13 |     dataset = MultiNLIDataset(root)
14 |     trainer = BaselineTrainer(root, log_dir="classifier_baseline")
15 |     trainer.build()
16 |     sequence_length = 25
17 | 
18 |     vocab_size = len(trainer.preprocessor.vocabulary.get())
19 | 
20 |     def preprocessor(x):
21 |         _x = trainer.preprocess(x, sequence_length)
22 |         return _x["text"]
23 | 
24 |     model = LSTMClassifier(vocab_size)
25 |     model.build(trainer.num_classes, preprocessor)
26 | 
27 |     metrics = trainer.train(model.model, epochs=25,
28 |                             sequence_length=sequence_length,
29 |                             representation="GloVe.6B.100d")
30 | 
31 |     test_data = dataset.test_data()
32 |     y_pred = model.predict(test_data["text"])
33 | 
34 |     print(classification_report(test_data["label"], y_pred,
35 |                                 target_names=dataset.labels()))
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     main()
40 | 


--------------------------------------------------------------------------------
/experiments/classification/baseline_merge.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import numpy as np
 4 | from sklearn.metrics import classification_report
 5 | sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
 6 | from gcn.data.multi_nli_dataset import MultiNLIDataset
 7 | from gcn.classification.baseline import MergeClassifier
 8 | from gcn.classification.baseline_trainer import BaselineTrainer
 9 | 
10 | 
11 | def main():
12 |     root = os.path.join(os.path.dirname(__file__), "../../")
13 |     dataset = MultiNLIDataset(root)
14 |     trainer = BaselineTrainer(root, log_dir="classifier_baseline")
15 |     trainer.build()
16 |     sequence_length = 25
17 | 
18 |     vocab_size = len(trainer.preprocessor.vocabulary.get())
19 | 
20 |     def preprocessor(x):
21 |         _x = trainer.preprocess(x, sequence_length)
22 |         return _x["text"]
23 | 
24 |     model = MergeClassifier(vocab_size)
25 |     model.build(trainer.num_classes, preprocessor)
26 | 
27 |     metrics = trainer.train(model.model, epochs=25,
28 |                             sequence_length=sequence_length,
29 |                             representation="GloVe.6B.100d")
30 | 
31 |     test_data = dataset.test_data()
32 |     y_pred = model.predict(test_data["text"])
33 | 
34 |     print(classification_report(test_data["label"], y_pred,
35 |                                 target_names=dataset.labels()))
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     main()
40 | 


--------------------------------------------------------------------------------
/experiments/classification/baseline_tfidf.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from sklearn.metrics import classification_report
 4 | sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
 5 | from gcn.data.multi_nli_dataset import MultiNLIDataset
 6 | from gcn.classification.baseline import TfidfClassifier
 7 | 
 8 | 
 9 | def main():
10 |     root = os.path.join(os.path.dirname(__file__), "../../")
11 |     dataset = MultiNLIDataset(root)
12 |     classifier = TfidfClassifier()
13 | 
14 |     train_data = dataset.train_data()
15 |     scores = classifier.fit(train_data["text"], train_data["label"])
16 | 
17 |     test_data = dataset.test_data()
18 |     y_pred = classifier.predict(test_data["text"])
19 | 
20 |     print(classification_report(test_data["label"], y_pred,
21 |                                 target_names=dataset.labels()))
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     main()
26 | 


--------------------------------------------------------------------------------
/experiments/classification/graph_based_experiment.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from sklearn.metrics import classification_report
 4 | sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
 5 | from gcn.data.multi_nli_dataset import MultiNLIDataset
 6 | from gcn.classification.trainer import Trainer
 7 | from gcn.graph.dependency_graph import DependencyGraph
 8 | from gcn.graph.similarity_graph import SimilarityGraph
 9 | from gcn.graph.static_graph import StaticGraph
10 | from gcn.classification.graph_based_classifier import GraphBasedClassifier
11 | 
12 | 
13 | def main(graph_type="dependency", epochs=25):
14 |     root = os.path.join(os.path.dirname(__file__), "../../")
15 |     dataset = MultiNLIDataset(root)
16 | 
17 |     if graph_type == "dependency":
18 |         graph_builder = DependencyGraph(lang="en")
19 |     elif graph_type == "similarity":
20 |         graph_builder = SimilarityGraph(lang="en")
21 |     else:
22 |         graph_builder = StaticGraph(lang="en")
23 | 
24 |     trainer = Trainer(graph_builder, root, log_dir="classifier")
25 |     trainer.build()
26 | 
27 |     sequence_length = 25
28 |     vocab_size = len(trainer.preprocessor.vocabulary.get())
29 | 
30 |     def preprocessor(x):
31 |         _x = trainer.preprocess(x, sequence_length)
32 |         values = (_x["text"], _x["graph"])
33 |         return values
34 | 
35 |     model = GraphBasedClassifier(vocab_size, sequence_length,
36 |                                  lstm=None)
37 |     model.build(trainer.num_classes, preprocessor)
38 | 
39 |     metrics = trainer.train(model.model, epochs=epochs)
40 | 
41 |     test_data = dataset.test_data()
42 |     y_pred = model.predict(test_data["text"])
43 | 
44 |     print(classification_report(test_data["label"], y_pred,
45 |                                 target_names=dataset.labels()))
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     main()
50 | 


--------------------------------------------------------------------------------
/experiments/language_model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/experiments/language_model/__init__.py


--------------------------------------------------------------------------------
/experiments/language_model/baseline.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
 4 | from gcn.language_model.trainer import Trainer
 5 | from gcn.language_model.baseline import LSTMLM
 6 | 
 7 | 
 8 | def main():
 9 |     root = os.path.join(os.path.dirname(__file__), "../../")
10 |     trainer = Trainer(root, log_dir="language_model_baseline")
11 |     trainer.build()
12 |     vocab_size = len(trainer.preprocessor.vocabulary.get())
13 |     print("vocab size: {}".format(vocab_size))
14 |     model = LSTMLM(vocab_size)
15 |     trainer.train(model, epochs=10)
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     main()
20 | 


--------------------------------------------------------------------------------
/experiments/language_model/baseline_test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
 4 | from gcn.language_model.trainer import Trainer
 5 | from gcn.language_model.baseline import LSTMLM
 6 | 
 7 | 
 8 | def main():
 9 |     root = os.path.join(os.path.dirname(__file__), "../../")
10 |     trainer = Trainer(root, preprocessor_name="baseline_preprocessor_test")
11 |     trainer.build(data_kind="valid")
12 |     vocab_size = len(trainer.preprocessor.vocabulary.get())
13 |     print("vocab size: {}".format(vocab_size))
14 |     model = LSTMLM(vocab_size)
15 |     trainer.train(model, data_kind="valid", epochs=10)
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     main()
20 | 


--------------------------------------------------------------------------------
/experiments/layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/experiments/layers/__init__.py


--------------------------------------------------------------------------------
/experiments/layers/gat_experiment.py:
--------------------------------------------------------------------------------
1 | from gat_experiment_base import run_experiment
2 | 
3 | 
4 | run_experiment(original=False, attention=True)
5 | 


--------------------------------------------------------------------------------
/experiments/layers/gat_experiment_base.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
  4 | import numpy as np
  5 | import scipy.sparse as sp
  6 | import tensorflow as tf
  7 | from tensorflow.python import keras as K
  8 | from chariot.storage import Storage
  9 | from gcn.data.graph_dataset import GraphDataset
 10 | 
 11 | 
 12 | """
 13 | Evaluation script is ported from
 14 | https://github.com/danielegrattarola/keras-gat/blob/master/examples/gat.py
 15 | """
 16 | 
 17 | 
 18 | def preprocess_features(features):
 19 |     """Row-normalize feature matrix and convert to tuple representation"""
 20 |     rowsum = np.array(features.sum(1))
 21 |     r_inv = np.power(rowsum, -1).flatten()
 22 |     r_inv[np.isinf(r_inv)] = 0.
 23 |     r_mat_inv = sp.diags(r_inv)
 24 |     features = r_mat_inv.dot(features)
 25 |     return features.todense()
 26 | 
 27 | 
 28 | def run_experiment(original=True, attention=True):
 29 |     # Read data
 30 |     root = os.path.join(os.path.dirname(__file__), "../../")
 31 |     storage = Storage(root)
 32 |     gd = GraphDataset(root, kind="cora")
 33 |     data = gd.download(return_mask=original)
 34 |     A, X, Y_train, Y_val, Y_test, idx_train, idx_val, idx_test = data
 35 | 
 36 |     # Parameters
 37 |     N = X.shape[0]                # Number of nodes in the graph
 38 |     F = X.shape[1]                # Original feature dimension
 39 |     n_classes = Y_train.shape[1]  # Number of classes
 40 |     F_ = 8                        # Output size of first GraphAttention layer
 41 |     n_attn_heads = 8              # Number of attention heads in first GAT layer
 42 |     dropout_rate = 0.6            # Dropout rate (between and inside GAT layers)
 43 |     l2_reg = 5e-4/2               # Factor for l2 regularization
 44 |     learning_rate = 5e-3          # Learning rate for Adam
 45 |     epochs = 120                  # Number of training epochs
 46 |     es_patience = 100             # Patience fot early stopping
 47 |     l2 = K.regularizers.l2
 48 |     node_size = 32
 49 | 
 50 |     # Preprocessing operations
 51 |     X = preprocess_features(X)
 52 |     A = A + np.eye(A.shape[0])  # Add self-loops
 53 | 
 54 |     # Model definition (as per Section 3.3 of the paper)
 55 |     if original:
 56 |         from gcn.layers.graph_attention_layer_original import GraphAttentionLayer
 57 |         X_in = K.layers.Input(shape=(F,))
 58 |         A_in = K.layers.Input(shape=(N,))
 59 |     else:
 60 |         from gcn.layers.graph_attention_layer import GraphAttentionLayer
 61 |         X_in = K.layers.Input(shape=(N, F))
 62 |         A_in = K.layers.Input(shape=(N, N))
 63 | 
 64 |     I_in = K.layers.Input(shape=(node_size,), dtype="int32")
 65 | 
 66 |     dropout1 = K.layers.Dropout(dropout_rate)(X_in)
 67 | 
 68 |     graph_attention_1 = GraphAttentionLayer(
 69 |                             feature_units=F_,
 70 |                             attn_heads=n_attn_heads,
 71 |                             attn_heads_reduction="concat",
 72 |                             dropout_rate=dropout_rate,
 73 |                             activation="elu",
 74 |                             kernel_regularizer=l2(l2_reg),
 75 |                             attention=attention,
 76 |                             attn_kernel_regularizer=l2(l2_reg))([dropout1, A_in])
 77 | 
 78 |     dropout2 = K.layers.Dropout(dropout_rate)(graph_attention_1)
 79 |     graph_attention_2 = GraphAttentionLayer(
 80 |                             n_classes,
 81 |                             attn_heads=1,
 82 |                             attn_heads_reduction="average",
 83 |                             dropout_rate=dropout_rate,
 84 |                             activation="softmax",
 85 |                             kernel_regularizer=l2(l2_reg),
 86 |                             attention=attention,
 87 |                             attn_kernel_regularizer=l2(l2_reg))([dropout2, A_in])
 88 | 
 89 |     # Build model
 90 |     optimizer = K.optimizers.Adam(lr=learning_rate)
 91 | 
 92 |     if original:
 93 |         model = K.models.Model(inputs=[X_in, A_in], outputs=graph_attention_2)
 94 |         model.compile(optimizer=optimizer,
 95 |                       loss="categorical_crossentropy",
 96 |                       weighted_metrics=["acc"])
 97 |     else:
 98 |         output = K.layers.Lambda(
 99 |                     lambda x: tf.reshape(tf.batch_gather(x, I_in),
100 |                                          (-1, node_size, n_classes)))(graph_attention_2)
101 |         model = K.models.Model(inputs=[X_in, A_in, I_in], outputs=output)
102 |         model.compile(optimizer=optimizer,
103 |                       loss="categorical_crossentropy",
104 |                       metrics=["acc"])
105 | 
106 |     model.summary()
107 | 
108 |     # Callbacks
109 |     experiment_dir = "log/gan_experiment"
110 |     monitor = "val_acc"
111 |     if original:
112 |         experiment_dir += "_o"
113 |         monitor = "val_weighted_acc"
114 |     if not attention:
115 |         experiment_dir += "_na"
116 | 
117 |     experiment_dir = storage.data_path(experiment_dir)
118 |     model_path = os.path.join(experiment_dir, "best_model.h5")
119 |     es_callback = K.callbacks.EarlyStopping(
120 |                     monitor=monitor, patience=es_patience)
121 |     tb_callback = K.callbacks.TensorBoard(log_dir=experiment_dir)
122 |     mc_callback = K.callbacks.ModelCheckpoint(
123 |                                 model_path,
124 |                                 monitor=monitor,
125 |                                 save_best_only=True,
126 |                                 save_weights_only=True)
127 | 
128 |     def batch_generator(indices, label):
129 |         if len(indices) != len(label):
130 |             raise Exception("Does not match length")
131 |         batch_size = len(indices)
132 |         batch_size = batch_size // node_size
133 | 
134 |         def generator():
135 |             while True:
136 |                 for i in range(batch_size):
137 |                     _X = np.array([X])
138 |                     _A = np.array([A])
139 |                     samples = np.random.randint(len(indices), size=node_size)
140 |                     _i = np.array([indices[samples]])
141 |                     _label = np.array([label[samples]])
142 |                     yield [_X, _A, _i], _label
143 |         return generator(), batch_size
144 | 
145 |     if original:
146 |         validation_data = ([X, A], Y_val, idx_val)
147 |         model.fit([X, A],
148 |                   Y_train,
149 |                   sample_weight=idx_train,
150 |                   epochs=epochs,
151 |                   batch_size=N,
152 |                   validation_data=validation_data,
153 |                   shuffle=False,  # Shuffling data means shuffling the whole graph
154 |                   callbacks=[es_callback, tb_callback, mc_callback])
155 | 
156 |         # Load best model
157 |         model.load_weights(model_path)
158 | 
159 |         # Evaluate model
160 |         eval_results = model.evaluate([X, A],
161 |                                       Y_test,
162 |                                       sample_weight=idx_test,
163 |                                       batch_size=N,
164 |                                       verbose=0)
165 |     else:
166 |         val_generator, val_steps = batch_generator(idx_val, Y_val)
167 |         train_generator, train_steps = batch_generator(idx_train, Y_train)
168 | 
169 |         model.fit_generator(
170 |                 train_generator, train_steps,
171 |                 validation_data=val_generator, validation_steps=val_steps,
172 |                 epochs=epochs,
173 |                 callbacks=[es_callback, tb_callback, mc_callback])
174 | 
175 |         # Load best model
176 |         model.load_weights(model_path)
177 | 
178 |         # Evaluate model
179 |         test_generator, test_steps = batch_generator(idx_test, Y_test)
180 |         eval_results = model.evaluate_generator(
181 |                             test_generator, test_steps,
182 |                             verbose=0)
183 | 
184 |     print("Done.\n"
185 |           "Test loss: {}\n"
186 |           "Test accuracy: {}".format(*eval_results))
187 | 


--------------------------------------------------------------------------------
/experiments/layers/gat_experiment_original.py:
--------------------------------------------------------------------------------
1 | from gat_experiment_base import run_experiment
2 | 
3 | 
4 | run_experiment(original=True, attention=True)
5 | 


--------------------------------------------------------------------------------
/experiments/layers/gat_experiment_original_without_attention.py:
--------------------------------------------------------------------------------
1 | from gat_experiment_base import run_experiment
2 | 
3 | 
4 | run_experiment(original=True, attention=False)
5 | 


--------------------------------------------------------------------------------
/experiments/layers/gat_experiment_without_attention.py:
--------------------------------------------------------------------------------
1 | from gat_experiment_base import run_experiment
2 | 
3 | 
4 | run_experiment(original=False, attention=False)
5 | 


--------------------------------------------------------------------------------
/gcn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/gcn/__init__.py


--------------------------------------------------------------------------------
/gcn/base_trainer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from sklearn.externals import joblib
 4 | from chariot.storage import Storage
 5 | import chariot.transformer as ct
 6 | from chariot.preprocessor import Preprocessor
 7 | 
 8 | 
 9 | class BaseTrainer():
10 | 
11 |     def __init__(self, root="", lang=None, min_df=5, max_df=sys.maxsize,
12 |                  unknown="<unk>", preprocessor_name="preprocessor", log_dir=""):
13 |         default_root = os.path.join(os.path.dirname(__file__), "../../")
14 |         _root = root if root else default_root
15 | 
16 |         self.storage = Storage(_root)
17 |         self.preprocessor_name = preprocessor_name
18 |         self._base_log_dir = log_dir
19 |         self._built = False
20 |         self.preprocessor = Preprocessor(
21 |                                 text_transformers=[
22 |                                     ct.text.UnicodeNormalizer(),
23 |                                     ct.text.LowerNormalizer()
24 |                                 ],
25 |                                 tokenizer=ct.Tokenizer(lang=lang),
26 |                                 vocabulary=ct.Vocabulary(
27 |                                             min_df=min_df, max_df=max_df,
28 |                                             unknown=unknown))
29 | 
30 |     def load_preprocessor(self):
31 |         if os.path.exists(self.preprocessor_path):
32 |             self._built = True
33 |             self.preprocessor = joblib.load(self.preprocessor_path)
34 | 
35 |     @property
36 |     def preprocessor_path(self):
37 |         if self._base_log_dir:
38 |             path = self._log_dir + "/{}.pkl".format(self.preprocessor_name)
39 |             return self.storage.data_path(path)
40 |         else:
41 |             path = "interim/{}.pkl".format(self.preprocessor_name)
42 |             return self.storage.data_path(path)
43 | 
44 |     @property
45 |     def _log_dir(self):
46 |         folder = "/" + self._base_log_dir if self._base_log_dir else ""
47 |         log_dir = "log{}".format(folder)
48 |         if not os.path.exists(self.storage.data_path(log_dir)):
49 |             os.mkdir(self.storage.data_path(log_dir))
50 | 
51 |         return log_dir
52 | 
53 |     @property
54 |     def log_dir(self):
55 |         return self.storage.data_path(self._log_dir)
56 | 
57 |     @property
58 |     def model_path(self):
59 |         return self.storage.data_path(self._log_dir + "/model.h5")
60 | 
61 |     @property
62 |     def tensorboard_dir(self):
63 |         return self.storage.data_path(self._log_dir)
64 | 
65 |     def download(self):
66 |         raise Exception("You have to specify what kinds of data you use.")
67 | 
68 |     def build(self, data_kind="train", field="", save=True):
69 |         if not self._built:
70 |             self.load_preprocessor()
71 |         if self._built:
72 |             print("Load existing preprocessor {}.".format(
73 |                 os.path.basename(self.preprocessor_path)))
74 |             return 0
75 | 
76 |         r = self.download()
77 |         if data_kind == "test":
78 |             data = r.test_data()
79 |         elif data_kind == "valid":
80 |             data = r.valid_data()
81 |         else:
82 |             data = r.train_data()
83 | 
84 |         print("Building Dictionary from {} data...".format(data_kind))
85 |         if not field:
86 |             self.preprocessor.fit(data)
87 |         else:
88 |             self.preprocessor.fit(data[field])
89 | 
90 |         if save:
91 |             joblib.dump(self.preprocessor, self.preprocessor_path)
92 |         self._built = True
93 |         print("Done!")
94 | 


--------------------------------------------------------------------------------
/gcn/classification/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/gcn/classification/__init__.py


--------------------------------------------------------------------------------
/gcn/classification/baseline.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from sklearn.feature_extraction.text import TfidfVectorizer
  3 | from sklearn.linear_model import LogisticRegression
  4 | from sklearn.pipeline import Pipeline
  5 | from sklearn.model_selection import cross_val_score
  6 | from tensorflow.python import keras as K
  7 | import tensorflow as tf
  8 | from gcn.util import gpu_enable
  9 | 
 10 | 
 11 | class TfidfClassifier():
 12 | 
 13 |     def __init__(self, max_df=1.0, min_df=1, vocabulary=None):
 14 |         self.vectorizer = TfidfVectorizer(max_df=max_df, min_df=min_df,
 15 |                                           vocabulary=vocabulary)
 16 |         self.classifier = LogisticRegression(penalty="l1", solver="liblinear",
 17 |                                              multi_class="ovr")
 18 |         self.model = Pipeline([("vectorizer", self.vectorizer),
 19 |                                ("classifier", self.classifier)])
 20 | 
 21 |     def fit(self, x, y, cv=5):
 22 |         scores = cross_val_score(self.model, x, y, cv=cv, scoring="f1_micro")
 23 |         self.model.fit(x, y)
 24 |         return scores
 25 | 
 26 |     def predict(self, x):
 27 |         return self.model.predict(x)
 28 | 
 29 |     def predict_proba(self, x):
 30 |         return self.model.predict_proba(x)
 31 | 
 32 | 
 33 | class MergeClassifier():
 34 | 
 35 |     def __init__(self, vocab_size, embedding_size=100,
 36 |                  merge_method="add"):
 37 |         self.vocab_size = vocab_size
 38 |         self.embedding_size = embedding_size
 39 |         self.merge_method = merge_method
 40 |         self.model = None
 41 | 
 42 |     def build(self, num_classes, preprocessor=None):
 43 |         self.preprocessor = preprocessor
 44 |         model = K.Sequential()
 45 |         embedding = K.layers.Embedding(input_dim=self.vocab_size,
 46 |                                        output_dim=self.embedding_size,
 47 |                                        embeddings_regularizer=K.regularizers.l2(),
 48 |                                        name="embedding",
 49 |                                        mask_zero=True)
 50 |         model.add(embedding)
 51 | 
 52 |         if self.merge_method == "mean":
 53 |             def mask_mean(x, mask):
 54 |                 sum = K.backend.sum(x, axis=1)
 55 |                 total = K.backend.sum(tf.to_float(mask), axis=1, keepdims=True)
 56 |                 return tf.divide(sum, total)
 57 | 
 58 |             model.add(K.layers.Lambda(mask_mean))
 59 |         else:
 60 |             model.add(K.layers.Lambda(lambda x: K.backend.sum(x, axis=1)))
 61 | 
 62 |         model.add(K.layers.Dense(num_classes, activation="softmax"))
 63 | 
 64 |         self.model = model
 65 | 
 66 |     def predict(self, x):
 67 |         preds = self.predict_proba(x)
 68 |         return np.argmax(preds, axis=1)
 69 | 
 70 |     def predict_proba(self, x):
 71 |         _x = x if self.preprocessor is None else self.preprocessor(x)
 72 |         return self.model.predict(_x)
 73 | 
 74 | 
 75 | class LSTMClassifier():
 76 | 
 77 |     def __init__(self, vocab_size, embedding_size=100, hidden_size=100,
 78 |                  layers=1, dropout=0.5, bidirectional=False):
 79 | 
 80 |         self.vocab_size = vocab_size
 81 |         self.embedding_size = embedding_size
 82 |         self.hidden_size = hidden_size
 83 |         self.layers = layers
 84 |         self.dropout = dropout
 85 |         self.bidirectional = bidirectional
 86 |         self.model = None
 87 | 
 88 |     def build(self, num_classes, preprocessor=None):
 89 |         self.preprocessor = preprocessor
 90 |         model = K.Sequential()
 91 |         embedding = K.layers.Embedding(input_dim=self.vocab_size,
 92 |                                        output_dim=self.embedding_size,
 93 |                                        embeddings_regularizer=K.regularizers.l2(),
 94 |                                        name="embedding",
 95 |                                        mask_zero=True)
 96 |         model.add(embedding)
 97 |         model.add(K.layers.Dropout(self.dropout))
 98 |         for layer in range(self.layers):
 99 |             lstm_layer = K.layers.CuDNNLSTM if gpu_enable() else K.layers.LSTM
100 |             lstm = lstm_layer(self.hidden_size)
101 |             if self.bidirectional:
102 |                 lstm = K.layers.Bidirectional(lstm, merge_mode="concat")
103 |             model.add(lstm)
104 | 
105 |         model.add(K.layers.Dropout(self.dropout))
106 |         model.add(K.layers.Dense(num_classes, activation="softmax"))
107 | 
108 |         self.model = model
109 | 
110 |     def predict(self, x):
111 |         preds = self.predict_proba(x)
112 |         return np.argmax(preds, axis=1)
113 | 
114 |     def predict_proba(self, x):
115 |         _x = x if self.preprocessor is None else self.preprocessor(x)
116 |         return self.model.predict(_x)
117 | 


--------------------------------------------------------------------------------
/gcn/classification/baseline_trainer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from tensorflow.python import keras as K
 4 | import chariot.transformer as ct
 5 | from chariot.preprocess import Preprocess
 6 | from chariot.feeder import Feeder
 7 | from chariot.transformer.formatter import Padding
 8 | from gcn.base_trainer import BaseTrainer
 9 | from gcn.data.multi_nli_dataset import MultiNLIDataset
10 | 
11 | 
12 | class BaselineTrainer(BaseTrainer):
13 | 
14 |     def __init__(self, root="", lang=None, min_df=1, max_df=1.0,
15 |                  unknown="<unk>", preprocessor_name="preprocessor",
16 |                  log_dir=""):
17 |         super().__init__(root, lang, min_df, max_df, unknown,
18 |                          preprocessor_name, log_dir)
19 | 
20 |     def download(self):
21 |         r = MultiNLIDataset(self.storage.root).download()
22 |         return r
23 | 
24 |     @property
25 |     def num_classes(self):
26 |         return len(MultiNLIDataset.labels())
27 | 
28 |     def build(self, data_kind="train", save=True):
29 |         super().build(data_kind, "text", save)
30 |         if self.preprocessor.vocabulary.pad != 0:
31 |             raise Exception("Padding is not executed by zero.")
32 | 
33 |     def train(self, model, data_kind="train", lr=1e-3,
34 |               batch_size=20, sequence_length=25,
35 |               representation="GloVe.6B.100d",
36 |               epochs=40, verbose=2):
37 | 
38 |         if not self._built:
39 |             raise Exception("Trainer's preprocessor is not built.")
40 | 
41 |         if representation is not None:
42 |             print("Load word embedding...")
43 |             self.storage.chakin(name=representation)
44 |             file_path = "external/{}.txt".format(representation.lower())
45 |             weights = [self.preprocessor.vocabulary.make_embedding(
46 |                                 self.storage.data_path(file_path))]
47 |             model.get_layer("embedding").set_weights(weights)
48 | 
49 |         r = self.download()
50 | 
51 |         train_data = self.preprocess(r.train_data(), sequence_length)
52 |         test_data = self.preprocess(r.test_data(), sequence_length)
53 | 
54 |         # Set optimizer
55 |         model.compile(loss="sparse_categorical_crossentropy",
56 |                       optimizer=K.optimizers.Adam(lr=lr),
57 |                       metrics=["accuracy"])
58 | 
59 |         metrics = model.fit(train_data["text"], train_data["label"],
60 |                             validation_data=(test_data["text"], test_data["label"]),
61 |                             batch_size=batch_size,
62 |                             epochs=epochs, verbose=verbose)
63 | 
64 |         return metrics
65 | 
66 |     def preprocess(self, data, length):
67 |         _data = data
68 |         if isinstance(data, (list, tuple)):
69 |             _data = pd.Series(data, name="text").to_frame()
70 |         elif isinstance(data, pd.Series):
71 |             _data = data.to_frame()
72 | 
73 |         preprocess = Preprocess({
74 |             "text": self.preprocessor
75 |         })
76 |         feeder = Feeder({"text": Padding.from_(self.preprocessor,
77 |                                                length=length)})
78 | 
79 |         _data = preprocess.transform(_data)
80 |         _data = feeder.transform(_data)
81 | 
82 |         return _data
83 | 


--------------------------------------------------------------------------------
/gcn/classification/graph_based_classifier.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from tensorflow.python import keras as K
  3 | import tensorflow as tf
  4 | from gcn.layers.graph_attention_layer import GraphAttentionLayer
  5 | from gcn.util import gpu_enable
  6 | 
  7 | 
  8 | class GraphBasedClassifier():
  9 | 
 10 |     def __init__(self, vocab_size, graph_size,
 11 |                  embedding_size=100, hidden_size=100,
 12 |                  head_types=("concat",), heads=1, dropout=0.6,
 13 |                  node_level_bias=False, with_attention=True,
 14 |                  lstm=None, bidirectional=False):
 15 | 
 16 |         self.vocab_size = vocab_size
 17 |         self.graph_size = graph_size
 18 |         self.embedding_size = embedding_size
 19 |         self.hidden_size = hidden_size
 20 |         self.head_types = head_types
 21 |         self.heads = heads
 22 |         self.dropout = dropout
 23 |         self.node_level_bias = node_level_bias
 24 |         self.with_attention = with_attention
 25 |         self.lstm = lstm
 26 |         self.bidirectional = bidirectional
 27 |         self.model = None
 28 |         self._attention = None
 29 |         self.preprocessor = None
 30 | 
 31 |     def build(self, num_classes, preprocessor=None):
 32 |         X_in = K.layers.Input(shape=(self.graph_size,))
 33 |         A_in = K.layers.Input(shape=(self.graph_size, self.graph_size))
 34 |         self.preprocessor = preprocessor
 35 | 
 36 |         embedding = K.layers.Embedding(input_dim=self.vocab_size,
 37 |                                        output_dim=self.embedding_size,
 38 |                                        input_length=self.graph_size,
 39 |                                        embeddings_regularizer=K.regularizers.l2(),
 40 |                                        name="embedding",
 41 |                                        mask_zero=True)
 42 |         vectors = embedding(X_in)
 43 |         _vectors = K.layers.Dropout(self.dropout)(vectors)
 44 | 
 45 |         def lstm(return_sequences):
 46 |             # CuDNNLSTM does not support mask.
 47 |             # layer = K.layers.CuDNNLSTM if gpu_enable() else K.layers.LSTM
 48 |             layer = K.layers.LSTM
 49 |             _lstm = layer(self.hidden_size, return_sequences=return_sequences,
 50 |                           dropout=self.dropout, recurrent_dropout=self.dropout)
 51 |             if self.bidirectional:
 52 |                 _lstm = K.layers.Bidirectional(_lstm, merge_mode="concat")
 53 |             return _lstm
 54 | 
 55 |         if self.lstm is not None and self.lstm == "before":
 56 |             _vectors = lstm(return_sequences=True)(_vectors)
 57 | 
 58 |         attentions = []
 59 |         for ht in self.head_types:
 60 |             gh = GraphAttentionLayer(
 61 |                         feature_units=self.hidden_size,
 62 |                         attn_heads=self.heads,
 63 |                         attn_heads_reduction=ht,
 64 |                         dropout_rate=self.dropout,
 65 |                         kernel_regularizer=K.regularizers.l2(),
 66 |                         attention=self.with_attention,
 67 |                         attn_kernel_regularizer=K.regularizers.l2(),
 68 |                         return_attention=True,
 69 |                         node_level_bias=self.node_level_bias)
 70 |             _vectors, attention = gh([_vectors, A_in])
 71 |             attentions.append(attention)
 72 | 
 73 |         if self.lstm is not None and self.lstm == "after":
 74 |             merged = lstm(return_sequences=False)(_vectors)
 75 |         else:
 76 |             merged = K.layers.Lambda(lambda x: K.backend.sum(x, axis=1))(_vectors)
 77 | 
 78 |         probs = K.layers.Dense(num_classes, activation="softmax")(merged)
 79 | 
 80 |         self.model = K.models.Model(inputs=[X_in, A_in], outputs=probs)
 81 |         self._attention = K.models.Model(inputs=[X_in, A_in],
 82 |                                          outputs=attentions)
 83 | 
 84 |     def predict(self, x):
 85 |         preds = self.predict_proba(x)
 86 |         return np.argmax(preds, axis=1)
 87 | 
 88 |     def predict_proba(self, x):
 89 |         _x = x if self.preprocessor is None else self.preprocessor(x)
 90 |         return self.model.predict(_x)
 91 | 
 92 |     def show_attention(self, x):
 93 |         _x = x if self.preprocessor is None else self.preprocessor(x)
 94 |         attentions = self._attention.predict(_x)
 95 |         if len(self.head_types) == 1:
 96 |             attentions = [attentions]
 97 | 
 98 |         # batch, layer, head, node_size, node_size
 99 |         attentions = np.array(attentions)
100 |         attentions = np.transpose(attentions, (1, 0, 2, 3, 4))
101 |         return attentions
102 | 


--------------------------------------------------------------------------------
/gcn/classification/trainer.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from tensorflow.python import keras as K
 3 | from chariot.preprocess import Preprocess
 4 | from chariot.feeder import Feeder
 5 | from chariot.transformer.formatter import Padding
 6 | from gcn.base_trainer import BaseTrainer
 7 | from gcn.data.multi_nli_dataset import MultiNLIDataset
 8 | from gcn.graph.dependency_graph import DependencyGraph
 9 | from gcn.graph.similarity_graph import SimilarityGraph
10 | from gcn.graph.static_graph import StaticGraph
11 | 
12 | 
13 | class Trainer(BaseTrainer):
14 | 
15 |     def __init__(self, graph_builder, root="", min_df=1, max_df=1.0,
16 |                  unknown="<unk>", preprocessor_name="preprocessor",
17 |                  log_dir=""):
18 |         super().__init__(root, graph_builder.lang, min_df, max_df, unknown,
19 |                          preprocessor_name, log_dir)
20 |         self.graph_builder = graph_builder
21 | 
22 |     def download(self):
23 |         r = MultiNLIDataset(self.storage.root).download()
24 |         return r
25 | 
26 |     @property
27 |     def num_classes(self):
28 |         return len(MultiNLIDataset.labels())
29 | 
30 |     def build(self, data_kind="train", save=True):
31 |         super().build(data_kind, "text", save)
32 |         if self.preprocessor.vocabulary.pad != 0:
33 |             raise Exception("Padding is not executed by zero.")
34 | 
35 |     def train(self, model, data_kind="train",
36 |               lr=1e-3, batch_size=20, sequence_length=25,
37 |               representation="GloVe.6B.100d",
38 |               epochs=40, verbose=2):
39 | 
40 |         if not self._built:
41 |             raise Exception("Trainer's preprocessor is not built.")
42 | 
43 |         if representation is not None:
44 |             self.storage.chakin(name=representation)
45 |             file_path = "external/{}.txt".format(representation.lower())
46 |             weights = [self.preprocessor.vocabulary.make_embedding(
47 |                                 self.storage.data_path(file_path))]
48 |             model.get_layer("embedding").set_weights(weights)
49 | 
50 |         r = self.download()
51 | 
52 |         train_data = self.preprocess(r.train_data(), sequence_length)
53 |         test_data = self.preprocess(r.test_data(), sequence_length)
54 | 
55 |         # Set optimizer
56 |         model.compile(loss="sparse_categorical_crossentropy",
57 |                       optimizer=K.optimizers.Adam(lr=lr),
58 |                       metrics=["accuracy"])
59 | 
60 |         validation_data = ((test_data["text"], test_data["graph"]), test_data["label"])
61 |         metrics = model.fit((train_data["text"], train_data["graph"]),
62 |                             train_data["label"],
63 |                             validation_data=validation_data,
64 |                             batch_size=batch_size,
65 |                             epochs=epochs, verbose=verbose)
66 | 
67 |         return metrics
68 | 
69 |     def preprocess(self, data, length):
70 |         _data = data
71 |         if isinstance(data, (list, tuple)):
72 |             _data = pd.Series(data, name="text").to_frame()
73 |         elif isinstance(data, pd.Series):
74 |             _data = data.to_frame()
75 | 
76 |         graph = self.graph_builder.batch_build(_data["text"], length)
77 | 
78 |         preprocess = Preprocess({
79 |             "text": self.preprocessor
80 |         })
81 |         feeder = Feeder({"text": Padding.from_(self.preprocessor,
82 |                                                length=length)})
83 | 
84 |         _data = preprocess.transform(_data)
85 |         _data = feeder.transform(_data)
86 |         _data["graph"] = graph
87 | 
88 |         return _data
89 | 


--------------------------------------------------------------------------------
/gcn/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/gcn/data/__init__.py


--------------------------------------------------------------------------------
/gcn/data/graph_dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pickle as pkl
  3 | import zipfile
  4 | import requests
  5 | import numpy as np
  6 | import scipy.sparse as sp
  7 | import networkx as nx
  8 | from chariot.storage import Storage
  9 | 
 10 | 
 11 | class GraphDataset():
 12 | 
 13 |     def __init__(self, root, kind="cora"):
 14 |         self.storage = Storage(root)
 15 |         self.kind = kind
 16 |         self.download_url = "https://s3-ap-northeast-1.amazonaws.com/dev.tech-sketch.jp/chakki/public/graph/"  # noqa
 17 |         if kind == "cora":
 18 |             self.download_url += "cora.zip"
 19 |         elif kind == "citeseer":
 20 |             self.download_url += "citeseer.zip"
 21 |         elif kind == "pubmed":
 22 |             self.download_url += "pubmed.zip"
 23 |         else:
 24 |             raise Exception("Graph dataset {} is not supported.".format(kind))
 25 | 
 26 |     @property
 27 |     def data_root(self):
 28 |         return self.storage.data_path("raw/{}".format(self.kind))
 29 | 
 30 |     @property
 31 |     def download_file_path(self):
 32 |         return self.storage.data_path("raw/{}.zip".format(self.kind))
 33 | 
 34 |     def download(self, return_mask=True):
 35 |         # Check downloaded file
 36 |         if os.path.isdir(self.data_root):
 37 |             print("{} dataset is already downloaded.".format(self.kind))
 38 |             return self.load(return_mask)
 39 | 
 40 |         # Download dataset
 41 |         resp = requests.get(self.download_url, stream=True)
 42 |         with open(self.download_file_path, "wb") as f:
 43 |             chunk_size = 1024
 44 |             for data in resp.iter_content(chunk_size=chunk_size):
 45 |                 f.write(data)
 46 | 
 47 |         # Expand file
 48 |         with zipfile.ZipFile(self.download_file_path) as z:
 49 |             z.extractall(path=self.data_root)
 50 |         os.remove(self.download_file_path)
 51 | 
 52 |         return self.load(return_mask)
 53 | 
 54 |     def load(self, return_mask):
 55 |         """
 56 |         Loads input data (reference from: https://github.com/tkipf/gcn/blob/master/gcn/utils.py)
 57 |         ind.dataset_str.x => the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object;
 58 |         ind.dataset_str.tx => the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object;
 59 |         ind.dataset_str.allx => the feature vectors of both labeled and unlabeled training instances
 60 |             (a superset of ind.dataset_str.x) as scipy.sparse.csr.csr_matrix object;
 61 |         ind.dataset_str.y => the one-hot labels of the labeled training instances as numpy.ndarray object;
 62 |         ind.dataset_str.ty => the one-hot labels of the test instances as numpy.ndarray object;
 63 |         ind.dataset_str.ally => the labels for instances in ind.dataset_str.allx as numpy.ndarray object;
 64 |         ind.dataset_str.graph => a dict in the format {index: [index_of_neighbor_nodes]} as collections.defaultdict
 65 |             object;
 66 |         ind.dataset_str.test.index => the indices of test instances in graph, for the inductive setting as list object.
 67 |         All objects above must be saved using python pickle module.
 68 |         :param dataset_str: Dataset name
 69 |         :return: All data input files loaded (as well the training/test data).
 70 |         """
 71 | 
 72 |         names = ["x", "y", "tx", "ty", "allx", "ally", "graph", "test.index"]
 73 |         objects = []
 74 |         for n in names:
 75 |             file_path = os.path.join(self.data_root,
 76 |                                      "ind.{}.{}".format(self.kind, n))
 77 | 
 78 |             if n != "test.index":
 79 |                 with open(file_path, "rb") as f:
 80 |                     objects.append(pkl.load(f, encoding="latin1"))
 81 |             else:
 82 |                 with open(file_path, encoding="latin1") as f:
 83 |                     lines = f.readlines()
 84 |                     indices = [int(ln.strip()) for ln in lines]
 85 |                 objects.append(indices)
 86 | 
 87 |         x, y, tx, ty, allx, ally, graph, test_idx = tuple(objects)
 88 |         test_idx_range = np.sort(test_idx)
 89 | 
 90 |         if self.kind == "citeseer":
 91 |             # Fix citeseer dataset (there are some isolated nodes in the graph)
 92 |             # Find isolated nodes, add them as zero-vecs into the right position
 93 |             test_idx_range_full = range(min(test_idx), max(test_idx)+1)
 94 |             tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
 95 |             tx_extended[test_idx_range-min(test_idx_range), :] = tx
 96 |             tx = tx_extended
 97 |             ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
 98 |             ty_extended[test_idx_range-min(test_idx_range), :] = ty
 99 |             ty = ty_extended
100 | 
101 |         features = sp.vstack((allx, tx)).tolil()
102 |         features[test_idx, :] = features[test_idx_range, :]
103 |         adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
104 | 
105 |         labels = np.vstack((ally, ty))
106 |         labels[test_idx, :] = labels[test_idx_range, :]
107 | 
108 |         idx_test = test_idx_range
109 |         idx_train = np.array(range(len(y)))
110 |         idx_val = np.array(range(len(y), len(y)+500))
111 | 
112 |         if return_mask:
113 |             train_mask = self.sample_mask(idx_train, labels.shape[0])
114 |             val_mask = self.sample_mask(idx_val, labels.shape[0])
115 |             test_mask = self.sample_mask(idx_test, labels.shape[0])
116 | 
117 |             y_train = np.zeros(labels.shape)
118 |             y_val = np.zeros(labels.shape)
119 |             y_test = np.zeros(labels.shape)
120 |             y_train[train_mask, :] = labels[train_mask, :]
121 |             y_val[val_mask, :] = labels[val_mask, :]
122 |             y_test[test_mask, :] = labels[test_mask, :]
123 | 
124 |             return adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask
125 |         else:
126 | 
127 |             y_train = labels[idx_train, :]
128 |             y_val = labels[idx_val, :]
129 |             y_test = labels[idx_test, :]
130 |             return adj, features, y_train, y_val, y_test, idx_train, idx_val, idx_test
131 | 
132 |     def sample_mask(self, idx, length):
133 |         """Create mask."""
134 |         mask = np.zeros(length)
135 |         mask[idx] = 1
136 |         return np.array(mask, dtype=np.bool)
137 | 


--------------------------------------------------------------------------------
/gcn/data/multi_nli_dataset.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import spacy
 3 | import chazutsu
 4 | from chariot.storage import Storage
 5 | 
 6 | 
 7 | class MultiNLIDataset():
 8 | 
 9 |     def __init__(self, root, min_word_count=3, max_word_count=25,
10 |                  prefix=""):
11 |         self.storage = Storage(root)
12 |         self.nlp = spacy.load("en", parser=False, entity=False)
13 |         self.min_word_count = min_word_count
14 |         self.max_word_count = max_word_count
15 |         self.prefix = prefix
16 | 
17 |     def train_data(self):
18 |         return pd.read_csv(self.processed_file("train"))
19 | 
20 |     def test_data(self):
21 |         return pd.read_csv(self.processed_file("test"))
22 | 
23 |     @classmethod
24 |     def labels(self):
25 |         return ["fiction", "government", "slate", "telephone", "travel",
26 |                 "nineeleven", "facetoface", "letters", "oup", "verbatim"]
27 | 
28 |     def download(self):
29 |         download_dir = self.storage.data_path("raw")
30 |         matched = chazutsu.datasets.MultiNLI.matched().download(download_dir)
31 |         mismatched = chazutsu.datasets.MultiNLI.mismatched().download(download_dir)
32 | 
33 |         for kind in ["train", "test"]:
34 |             data = self._merge_data(matched, mismatched, kind)
35 |             data.to_csv(self.interim_file(kind))
36 |             preprocessed = self.preprocess(data)
37 |             preprocessed = pd.concat([preprocessed["text"],
38 |                                       preprocessed["label"]], axis=1)
39 |             preprocessed.to_csv(self.processed_file(kind), index=False)
40 |         return self
41 | 
42 |     def interim_file(self, kind):
43 |         if self.prefix:
44 |             p = "interim/{}_multi_nli_{}.csv".format(self.prefix, kind)
45 |         else:
46 |             p = "interim/multi_nli_{}.csv".format(kind)
47 | 
48 |         return self.storage.data_path(p)
49 | 
50 |     def processed_file(self, kind):
51 |         if self.prefix:
52 |             p = "processed/{}_multi_nli_{}.csv".format(self.prefix, kind)
53 |         else:
54 |             p = "processed/multi_nli_{}.csv".format(kind)
55 | 
56 |         return self.storage.data_path(p)
57 | 
58 |     def preprocess(self, df):
59 |         # Drop duplicates
60 |         except_d = df.drop_duplicates(["text"])
61 | 
62 |         # Count words
63 |         word_count = except_d["text"].apply(lambda x: len(self.nlp(x)))
64 |         except_d = except_d.assign(word_count=pd.Series(word_count).values)
65 | 
66 |         limited = except_d[(self.min_word_count <= except_d["word_count"]) &
67 |                            (except_d["word_count"] <= self.max_word_count)]
68 | 
69 |         # Equalize data count
70 |         min_count = limited["label"].value_counts().min()
71 |         selected = limited.groupby("label").apply(lambda x: x.sample(n=min_count))
72 |         selected = selected.drop(columns=["label", "index"]).reset_index()
73 | 
74 |         # Convert label to index
75 |         selected["label"] = selected["label"].apply(
76 |                                 lambda x: self.labels().index(x))
77 | 
78 |         return selected
79 | 
80 |     def _merge_data(self, matched, mismatched, kind="train"):
81 |         dataset = []
82 |         for d in [matched, mismatched]:
83 |             if kind == "train":
84 |                 _d = d.dev_data()
85 |             else:
86 |                 _d = d.test_data()
87 | 
88 |             _d = pd.concat([_d["genre"], _d["sentence1"]], axis=1)
89 |             dataset.append(_d)
90 |         merged = pd.concat(dataset).reset_index()
91 |         merged.rename(columns={"sentence1": "text", "genre": "label"},
92 |                       inplace=True)
93 |         return merged
94 | 


--------------------------------------------------------------------------------
/gcn/graph/__init__.py:
--------------------------------------------------------------------------------
1 | from .similarity_graph import SimilarityGraph
2 | from .dependency_graph import DependencyGraph
3 | from .static_graph import StaticGraph
4 | 


--------------------------------------------------------------------------------
/gcn/graph/dependency_graph.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import spacy
 3 | 
 4 | 
 5 | class DependencyGraph():
 6 | 
 7 |     def __init__(self, lang):
 8 |         self.lang = lang
 9 |         self._parser = spacy.load(self.lang, disable=["ner", "textcat"])
10 | 
11 |     def get_nodes(self, sentence):
12 |         return [t.text for t in self._parser(sentence)]
13 | 
14 |     def build(self, sentence, size=-1, return_label=False):
15 |         tokens = self._parser(sentence)
16 |         _size = size if size > 0 else len(tokens)
17 |         matrix = np.zeros((_size, _size))
18 |         if return_label:
19 |             matrix = [[""] * matrix.shape[1] for r in range(matrix.shape[0])]
20 |         for token in tokens:
21 |             # print("{} =({})=> {}".format(token.text, token.dep_, token.head.text))
22 |             if not token.dep_:
23 |                 raise Exception("Dependency Parse does not work well.")
24 | 
25 |             if token.i < _size and token.head.i < _size:
26 |                 v = token.dep_ if return_label else 1
27 |                 matrix[token.i][token.head.i] = v
28 | 
29 |         return matrix
30 | 
31 |     def batch_build(self, sentences, size=-1):
32 |         matrices = [self.build(s, size) for s in sentences]
33 |         return np.array(matrices)
34 | 


--------------------------------------------------------------------------------
/gcn/graph/similarity_graph.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | from sklearn.metrics.pairwise import cosine_similarity
 4 | import spacy
 5 | from chariot.storage import Storage
 6 | from chariot.resource.word_vector import WordVector
 7 | 
 8 | 
 9 | class SimilarityGraph():
10 | 
11 |     def __init__(self, lang, nearest_neighbor=4, threshold=0.3,
12 |                  mode="similarity", representation="GloVe.6B.100d", root=""):
13 |         self.lang = lang
14 |         self._parser = spacy.load(self.lang, disable=["ner", "textcat"])
15 |         self.nearest_neighbor = nearest_neighbor
16 |         self.threshold = threshold
17 |         self.mode = mode
18 |         self.representation = representation
19 |         default_root = os.path.join(os.path.dirname(__file__), "../../")
20 |         _root = root if root else default_root
21 | 
22 |         self.storage = Storage(_root)
23 |         self.key_vector = {}
24 |         self._unknown = None
25 | 
26 |     def get_nodes(self, sentence):
27 |         return [t.text for t in self._parser(sentence)]
28 | 
29 |     def build(self, sentence, size=-1):
30 |         if 0 < size < self.nearest_neighbor:
31 |             raise Exception("Matrix size is not enough for neighbors.")
32 | 
33 |         if len(self.key_vector) == 0:
34 |             # download representation
35 |             self.storage.chakin(name=self.representation)
36 | 
37 |             # Make embedding matrix
38 |             file_path = "external/{}.txt".format(self.representation.lower())
39 |             wv = WordVector(self.storage.data_path(file_path))
40 |             self.key_vector = wv.load()
41 | 
42 |             for k in self.key_vector:
43 |                 self._unknown = np.zeros(len(self.key_vector[k]))
44 |                 break
45 | 
46 |         tokens = self._parser(sentence)
47 |         vectors = []
48 |         for t in tokens:
49 |             if t.text in self.key_vector:
50 |                 vectors.append(self.key_vector[t.text])
51 |             else:
52 |                 vectors.append(self._unknown)
53 | 
54 |         vectors = np.vstack(vectors)
55 |         matrix = self._build(vectors, size)
56 |         return matrix
57 | 
58 |     def _build(self, vectors, size=-1):
59 |         _size = size if size > 0 else len(vectors)
60 |         similarity = cosine_similarity(vectors[:_size])
61 |         similarity -= np.eye(similarity.shape[0])  # exclude similarity to self
62 |         top_k = np.argsort(-similarity, axis=1)[:, :self.nearest_neighbor]
63 | 
64 |         matrix = np.zeros((_size, _size))
65 |         for i, top in enumerate(top_k):
66 |             _top = np.array([t for t in top
67 |                              if np.abs(similarity[i, t]) >= self.threshold])
68 | 
69 |             if len(_top) == 0:
70 |                 continue
71 | 
72 |             if self.mode == "connectivity":
73 |                 matrix[i, _top] = 1
74 |             else:
75 |                 matrix[i, _top] = similarity[i, _top]
76 | 
77 |         return matrix
78 | 
79 |     def batch_build(self, sentences, size=-1):
80 |         matrices = [self.build(s, size) for s in sentences]
81 |         return np.array(matrices)
82 | 


--------------------------------------------------------------------------------
/gcn/graph/static_graph.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import spacy
 3 | 
 4 | 
 5 | class StaticGraph():
 6 | 
 7 |     def __init__(self, lang, kind="previous", fill=True):
 8 |         self.lang = lang
 9 |         self._parser = spacy.load(self.lang, disable=["ner", "textcat"])
10 |         self.kind = kind
11 |         self.fill = fill
12 | 
13 |     def get_nodes(self, sentence):
14 |         return [t.text for t in self._parser(sentence)]
15 | 
16 |     def build(self, sentence, size=-1):
17 |         nodes = self.get_nodes(sentence)
18 |         _size = size if size > 0 else len(nodes)
19 |         if self.fill:
20 |             func = lambda s, k=0: np.tril(np.ones((s, s)), k)
21 |         else:
22 |             func = np.eye
23 | 
24 |         if self.kind == "self":
25 |             return func(_size)
26 |         elif self.kind == "previous":
27 |             return func(_size, k=-1)
28 | 
29 |     def batch_build(self, sentences, size=-1):
30 |         matrices = [self.build(s, size) for s in sentences]
31 |         return np.array(matrices)
32 | 


--------------------------------------------------------------------------------
/gcn/language_model/baseline.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.python import keras as K
 2 | from gcn.layers import ProjectionLayer
 3 | from gcn.util import gpu_enable
 4 | 
 5 | 
 6 | def LSTMLM(vocab_size, embedding_size=100, hidden_size=100,
 7 |            layers=1, dropout=0.5):
 8 |     # Prepare initializer
 9 |     initializer = K.initializers.RandomUniform(minval=-0.1, maxval=0.1)
10 | 
11 |     # Build the model
12 |     model = K.Sequential()
13 |     embedding = K.layers.Embedding(input_dim=vocab_size,
14 |                                    output_dim=embedding_size,
15 |                                    embeddings_initializer=initializer)
16 |     model.add(embedding)
17 |     model.add(K.layers.Dropout(dropout))
18 |     rnn_layer = K.layers.CuDNNLSTM if gpu_enable() else K.layers.LSTM
19 |     for layer in range(layers):
20 |         model.add(rnn_layer(hidden_size, return_sequences=True))
21 |     model.add(K.layers.Dropout(dropout))
22 |     if hidden_size != embedding_size:
23 |         model.add(K.layers.TimeDistributed(
24 |                     K.layers.Dense(embedding_size,
25 |                                    kernel_initializer=initializer)
26 |                 ))
27 |     # Tying encoder/decoder
28 |     #model.add(K.layers.TimeDistributed(ProjectionLayer(embedding)))
29 |     model.add(K.layers.TimeDistributed(
30 |             K.layers.Dense(vocab_size,
31 |                            kernel_initializer=initializer, activation="softmax")
32 |         ))
33 | 
34 |     #model.add(K.layers.Activation(activation="softmax"))
35 | 
36 |     return model
37 | 


--------------------------------------------------------------------------------
/gcn/language_model/similarity_graph_lm.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.python import keras as K
 3 | from gcn.layers import GraphAttentionLayer
 4 | from gcn.metrics import perplexity
 5 | 
 6 | 
 7 | def SimilarityGraphLM(vocab_size, sequence_length,
 8 |                       embedding_size, dropout=0.7, num_graph_conv=2):
 9 | 
10 |     words = K.layers.Input(shape=(sequence_length,))
11 |     matrix = K.layers.Input(shape=(sequence_length,))
12 | 
13 |     embeddings = K.layers.Embedding(output_dim=embedding_size,
14 |                                     input_dim=vocab_size,
15 |                                     input_length=sequence_length)(words)
16 | 
17 |     # context feature
18 |     context = K.layers.LSTM(embedding_size, dropout=dropout,
19 |                             return_sequences=True, return_state=True)(embeddings)
20 | 
21 |     # graph feature
22 |     features = tf.transpose(embeddings, [1, 0, 2])
23 |     for layer in range(num_graph_conv):
24 |         features = K.layers.TimeDistributed(
25 |                         GraphAttentionLayer(
26 |                             embedding_size,
27 |                             attn_heads_reduction="average"))([features, matrix])
28 |     return None
29 | 
30 |     features = K.backend.transpose(features)
31 |     merged = K.layers.concatenate([context, features])
32 |     output = K.layers.Dense(vocab_size, activation="softmax")(merged)
33 | 
34 |     model = K.models.Model(inputs=[words, matrix], outputs=output)
35 | 
36 |     # Set optimizer
37 |     model.compile(loss="sparse_categorical_crossentropy",
38 |                   optimizer="adam",
39 |                   metrics=["accuracy", perplexity])
40 | 
41 |     return model
42 | 


--------------------------------------------------------------------------------
/gcn/language_model/trainer.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from tensorflow.python import keras as K
 3 | import numpy as np
 4 | import chazutsu
 5 | import chariot.transformer as ct
 6 | from chariot.feeder import LanguageModelFeeder
 7 | from gcn.base_trainer import BaseTrainer
 8 | from gcn.metrics import perplexity
 9 | 
10 | 
11 | class Trainer(BaseTrainer):
12 | 
13 |     def __init__(self, root="", lang=None, min_df=5, max_df=sys.maxsize,
14 |                  unknown="<unk>", preprocessor_name="preprocessor",
15 |                  log_dir=""):
16 |         super().__init__(root, lang, min_df, max_df, unknown,
17 |                          preprocessor_name, log_dir)
18 | 
19 |     def download(self):
20 |         download_dir = self.storage.data_path("raw")
21 |         r = chazutsu.datasets.WikiText2().download(download_dir)
22 |         return r
23 | 
24 |     def train(self, model, data_kind="train", lr=1e-3,
25 |               batch_size=20, sequence_length=35, epochs=40):
26 |         if not self._built:
27 |             raise Exception("Trainer's preprocessor is not built.")
28 | 
29 |         r = self.download()
30 |         step_generators = {"train": {}, "valid": {}}
31 | 
32 |         # Set optimizer
33 |         model.compile(loss="sparse_categorical_crossentropy",
34 |                       optimizer=K.optimizers.Adam(lr=lr),
35 |                       metrics=["accuracy", perplexity])
36 | 
37 |         for k in step_generators:
38 |             if k == "train":
39 |                 if data_kind == "train":
40 |                     data = r.train_data()
41 |                 else:
42 |                     data = r.valid_data()
43 |             else:
44 |                 data = r.test_data()
45 | 
46 |             spec = {"sentence": ct.formatter.ShiftGenerator()}
47 |             feeder = LanguageModelFeeder(spec)
48 |             data = self.preprocessor.transform(data)
49 |             step, generator = feeder.make_generator(
50 |                                 data, batch_size=batch_size,
51 |                                 sequence_length=sequence_length,
52 |                                 sequencial=False)
53 | 
54 |             step_generators[k]["g"] = generator
55 |             step_generators[k]["s"] = step
56 | 
57 |         callbacks = [K.callbacks.ModelCheckpoint(self.model_path,
58 |                                                  save_best_only=True),
59 |                      K.callbacks.TensorBoard(self.tensorboard_dir)]
60 | 
61 |         metrics = model.fit_generator(
62 |                     step_generators["train"]["g"](),
63 |                     step_generators["train"]["s"],
64 |                     validation_data=step_generators["valid"]["g"](),
65 |                     validation_steps=step_generators["valid"]["s"],
66 |                     epochs=epochs,
67 |                     callbacks=callbacks)
68 | 
69 |         return metrics
70 | 
71 |     def generate_text(self, model, seed_text,
72 |                       sequence_length=10, iteration=20):
73 |         preprocessed = self.preprocessor.transform([seed_text])[0]
74 | 
75 |         def pad_sequence(tokens, length):
76 |             if len(tokens) < length:
77 |                 pad_size = length - len(tokens)
78 |                 return tokens + [self.preprocessor.vocabulary.pad] * pad_size
79 |             elif len(tokens) > length:
80 |                 return tokens[-length:]
81 |             else:
82 |                 return tokens
83 | 
84 |         for _ in range(iteration):
85 |             x = pad_sequence(preprocessed, sequence_length)
86 |             y = model.predict([x])
87 |             index = min(len(preprocessed) - 1, sequence_length - 1)
88 |             target_word_probs = y[index][0]
89 |             w = np.random.choice(np.arange(len(target_word_probs)),
90 |                                  1, p=target_word_probs)[0]
91 |             preprocessed.append(w)
92 | 
93 |         decoded = self.preprocessor.inverse_transform([preprocessed])
94 |         text = " ".join(decoded[0])
95 | 
96 |         return text
97 | 


--------------------------------------------------------------------------------
/gcn/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .attention_layer import AttentionLayer
2 | from .graph_attention_layer import GraphAttentionLayer
3 | from .projection_layer import ProjectionLayer
4 | 


--------------------------------------------------------------------------------
/gcn/layers/attention_layer.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.python.keras import backend as K
 2 | from tensorflow.python.keras.layers import Layer
 3 | from tensorflow.python.keras import initializers, regularizers, constraints
 4 | 
 5 | 
 6 | class AttentionLayer(Layer):
 7 |     """
 8 |     import from Bidirectional LSTM and Attention
 9 |     https://www.kaggle.com/takuok/bidirectional-lstm-and-attention-lb-0-043
10 |     """
11 | 
12 |     def __init__(self, sequence_length,
13 |                  W_regularizer=None, b_regularizer=None,
14 |                  W_constraint=None, b_constraint=None,
15 |                  bias=True, return_attentions=False, **kwargs):
16 |         self.sequence_length = sequence_length
17 |         self.supports_masking = True
18 |         self.return_attentions = return_attentions
19 |         self.init = initializers.get("glorot_uniform")
20 | 
21 |         self.W_regularizer = regularizers.get(W_regularizer)
22 |         self.b_regularizer = regularizers.get(b_regularizer)
23 | 
24 |         self.W_constraint = constraints.get(W_constraint)
25 |         self.b_constraint = constraints.get(b_constraint)
26 | 
27 |         self.bias = bias
28 |         self.embedding_dim = 0
29 |         super(AttentionLayer, self).__init__(**kwargs)
30 | 
31 |     def build(self, input_shape):
32 |         assert len(input_shape) == 3
33 | 
34 |         _input_shape = input_shape.as_list()
35 |         self.embedding_dim = _input_shape[-1]
36 |         self.W = self.add_weight(name="{}_W".format(self.name),
37 |                                  shape=(self.embedding_dim,),
38 |                                  initializer=self.init,
39 |                                  regularizer=self.W_regularizer,
40 |                                  constraint=self.W_constraint)
41 | 
42 |         if self.bias:
43 |             self.b = self.add_weight(name="{}_b".format(self.name),
44 |                                      shape=(_input_shape[1],),
45 |                                      initializer="zero",
46 |                                      regularizer=self.b_regularizer,
47 |                                      constraint=self.b_constraint)
48 |         else:
49 |             self.b = None
50 | 
51 |         self.built = True
52 | 
53 |     def compute_mask(self, input, input_mask=None):
54 |         return None
55 | 
56 |     def call(self, x, mask=None):
57 |         embedding_dim = self.embedding_dim
58 |         sequence_length = self.sequence_length
59 | 
60 |         eij = K.reshape(K.dot(K.reshape(x, (-1, embedding_dim)),
61 |                               K.reshape(self.W, (embedding_dim, 1))),
62 |                         (-1, sequence_length))
63 | 
64 |         if self.bias:
65 |             eij += self.b
66 | 
67 |         eij = K.tanh(eij)
68 | 
69 |         a = K.exp(eij)
70 | 
71 |         if mask is not None:
72 |             a *= K.cast(mask, K.floatx())
73 | 
74 |         a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
75 | 
76 |         weighted_input = x * K.expand_dims(a)
77 |         output = K.sum(weighted_input, axis=1)
78 |         if self.return_attentions:
79 |             return output, a
80 |         else:
81 |             return output
82 | 
83 |     def compute_output_shape(self, input_shape):
84 |         return input_shape[0], self.embedding_dim
85 | 


--------------------------------------------------------------------------------
/gcn/layers/graph_attention_layer.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.python.keras import backend as K
  3 | from tensorflow.python.keras.engine.base_layer import InputSpec
  4 | from tensorflow.python.keras import initializers, regularizers, constraints
  5 | from tensorflow.python.keras.layers import Dense, Dropout
  6 | 
  7 | 
  8 | class GraphAttentionLayer(Dense):
  9 |     """
 10 |     import from danielegrattarola/keras-gat
 11 |     https://github.com/danielegrattarola/keras-gat/blob/master/keras_gat/graph_attention_layer.py
 12 |     """
 13 | 
 14 |     def __init__(self,
 15 |                  feature_units,
 16 |                  attn_heads=1,
 17 |                  attn_heads_reduction="concat",  # {"concat", "average"}
 18 |                  dropout_rate=0.5,
 19 |                  activation="relu",
 20 |                  attn_kernel_initializer="glorot_uniform",
 21 |                  attn_kernel_regularizer=None,
 22 |                  attn_kernel_constraint=None,
 23 |                  attention=True,
 24 |                  return_attention=False,
 25 |                  node_level_bias=False,
 26 |                  **kwargs):
 27 | 
 28 |         if attn_heads_reduction not in {"concat", "average"}:
 29 |             raise ValueError("Possbile reduction methods: concat, average")
 30 | 
 31 |         super().__init__(units=feature_units,
 32 |                          activation=activation,
 33 |                          **kwargs)
 34 | 
 35 |         # Number of attention heads (K in the paper)
 36 |         self.attn_heads = attn_heads
 37 |         # Eq. 5 and 6 in the paper
 38 |         self.attn_heads_reduction = attn_heads_reduction
 39 |         # Internal dropout rate
 40 |         self.dropout_rate = dropout_rate
 41 | 
 42 |         self.attn_kernel_initializer \
 43 |             = initializers.get(attn_kernel_initializer)
 44 |         self.attn_kernel_regularizer \
 45 |             = regularizers.get(attn_kernel_regularizer)
 46 |         self.attn_kernel_constraint = constraints.get(attn_kernel_constraint)
 47 |         self.attention = attention
 48 |         self.return_attention = return_attention
 49 |         self.node_level_bias = node_level_bias
 50 |         self.input_spec = [InputSpec(ndim=3), InputSpec(ndim=3)]
 51 |         self.supports_masking = True
 52 |         # Populated by build()
 53 |         self.kernels = []
 54 |         self.biases = []
 55 |         self.neighbor_kernels = []
 56 |         self.attn_kernels = []
 57 |         self.attention_biases = []
 58 | 
 59 |         if attn_heads_reduction == "concat":
 60 |             # Output will have shape (..., K * F")
 61 |             self.output_dim = self.units * self.attn_heads
 62 |         else:
 63 |             # Output will have shape (..., F")
 64 |             self.output_dim = self.units
 65 | 
 66 |     def build(self, input_shape):
 67 |         X_dims, A_dims = [dims.as_list() for dims in input_shape]
 68 |         assert len(X_dims) == 3
 69 |         assert len(A_dims) == 3 and A_dims[1] == A_dims[2]
 70 | 
 71 |         _, N, F = X_dims
 72 | 
 73 |         # Initialize weights for each attention head
 74 |         for head in range(self.attn_heads):
 75 |             # Layer kernel
 76 |             kernel = self.add_weight(shape=(F, self.units),
 77 |                                      initializer=self.kernel_initializer,
 78 |                                      regularizer=self.kernel_regularizer,
 79 |                                      constraint=self.kernel_constraint,
 80 |                                      name="kernel_{}".format(head))
 81 |             self.kernels.append(kernel)
 82 | 
 83 |             # Layer bias
 84 |             if self.use_bias:
 85 |                 bias = self.add_weight(shape=(self.units,),
 86 |                                        initializer=self.bias_initializer,
 87 |                                        regularizer=self.bias_regularizer,
 88 |                                        constraint=self.bias_constraint,
 89 |                                        name="bias_{}".format(head))
 90 |                 self.biases.append(bias)
 91 | 
 92 |             if not self.attention:
 93 |                 continue
 94 | 
 95 |             # Attention kernels
 96 |             neighbor_kernel = self.add_weight(
 97 |                                     shape=(F, self.units),
 98 |                                     initializer=self.kernel_initializer,
 99 |                                     regularizer=self.kernel_regularizer,
100 |                                     constraint=self.kernel_constraint,
101 |                                     name="kernel_neighbor_{}".format(head))
102 | 
103 |             attn_kernel = self.add_weight(
104 |                                     shape=(self.units, 1),
105 |                                     initializer=self.attn_kernel_initializer,
106 |                                     regularizer=self.attn_kernel_regularizer,
107 |                                     constraint=self.attn_kernel_constraint,
108 |                                     name="attn_kernel_{}".format(head))
109 | 
110 |             self.neighbor_kernels.append(neighbor_kernel)
111 |             self.attn_kernels.append(attn_kernel)
112 | 
113 |             if self.use_bias:
114 |                 if self.node_level_bias:
115 |                     biases = self.add_weight(shape=(N, N),
116 |                                              initializer=self.bias_initializer,
117 |                                              regularizer=self.bias_regularizer,
118 |                                              constraint=self.bias_constraint,
119 |                                              name="attention_bias")
120 |                 else:
121 |                     biases = []
122 |                     for kind in ["self", "neigbor"]:
123 |                         name = "bias_attn_{}_{}".format(kind, head)
124 |                         bias = self.add_weight(shape=(N,),
125 |                                                initializer=self.bias_initializer,
126 |                                                regularizer=self.bias_regularizer,
127 |                                                constraint=self.bias_constraint,
128 |                                                name=name)
129 |                         biases.append(bias)
130 |                 self.attention_biases.append(biases)
131 | 
132 |         self.built = True
133 | 
134 |     def call(self, inputs):
135 |         X = inputs[0]  # Node features (B x N x F)
136 |         A = inputs[1]  # Adjacency matrix (B x N x N)
137 | 
138 |         X_dims = X.get_shape().as_list()
139 |         B, N, F = X_dims
140 | 
141 |         outputs = []
142 |         attentions = []
143 |         for head in range(self.attn_heads):
144 |             # W in the paper (F x F")
145 |             kernel = self.kernels[head]
146 | 
147 |             # Compute inputs to attention network
148 |             features = K.dot(X, kernel)  # (B x N x F")
149 |             dropout_feat = Dropout(self.dropout_rate)(features)  # (B x N x F")
150 | 
151 |             if not self.attention:
152 |                 attention = A
153 |                 aggregation = tf.matmul(attention, dropout_feat)  # (N x F")
154 |             else:
155 |                 # Attention kernel a in the paper (2F" x 1)
156 |                 neighbor_kernel = self.neighbor_kernels[head]
157 |                 attn_kernel = self.attn_kernels[head]
158 | 
159 |                 neighbor_features = K.dot(X, neighbor_kernel)
160 | 
161 |                 attn_self = K.dot(features, attn_kernel)
162 |                 attn_neighbor = K.dot(neighbor_features, attn_kernel)
163 | 
164 |                 if self.use_bias and not self.node_level_bias:
165 |                     self_attn_bias, neigbor_attn_bias = self.attention_biases[head]
166 |                     attn_self = K.bias_add(attn_self, self_attn_bias)
167 |                     attn_neighbor = K.bias_add(attn_neighbor, neigbor_attn_bias)
168 | 
169 |                 attention = attn_neighbor + tf.transpose(attn_self, (0, 2, 1))
170 |                 attention = tf.nn.tanh(attention)
171 |                 attention = K.reshape(attention, (-1, N, N))
172 |                 if self.use_bias and self.node_level_bias:
173 |                     bias = self.attention_biases[head]
174 |                     attention = K.bias_add(attention, bias)
175 | 
176 |                 has_connection = tf.to_float(tf.greater(A, 0.0))
177 | 
178 |                 mask = -10e9 * (1.0 - has_connection)
179 |                 attention += mask
180 | 
181 |                 attention = tf.nn.softmax(attention) * has_connection
182 | 
183 |                 dropout_attn = Dropout(self.dropout_rate)(attention)
184 |                 aggregation = tf.matmul(dropout_attn, dropout_feat)
185 | 
186 |             node_features = dropout_feat + aggregation
187 |             if self.use_bias:
188 |                 node_features = K.bias_add(node_features, self.biases[head])
189 | 
190 |             # Add output of attention
191 |             if self.return_attention:
192 |                 attentions.append(attention)
193 | 
194 |             outputs.append(node_features)
195 | 
196 |         # Aggregate the heads" output according to the reduction method
197 |         if self.attn_heads_reduction == "concat":
198 |             output = K.concatenate(outputs, axis=-1)  # (B x N x KF")
199 |         else:
200 |             output = K.mean(K.stack(outputs), axis=0)  # (B x N x F")
201 |             # If "average", compute the activation here (Eq. 6)
202 | 
203 |         output = self.activation(output)
204 | 
205 |         if self.return_attention:
206 |             attentions = K.stack(attentions, axis=1)
207 |             return (output, attentions)
208 |         else:
209 |             return output
210 | 
211 |     def compute_output_shape(self, input_shape):
212 |         X_dims, A_dims = [dims.as_list() for dims in input_shape]
213 |         assert len(X_dims) == 3
214 |         assert len(A_dims) == 3
215 |         output_shape = X_dims[0], X_dims[1], self.output_dim
216 | 
217 |         if self.return_attention:
218 |             return (tf.TensorShape(output_shape),
219 |                     tf.TensorShape(A_dims.insert(1, self.attn_heads)))
220 |         else:
221 |             return tf.TensorShape(output_shape)
222 | 
223 |     def compute_mask(self, inputs, mask):
224 |         if isinstance(mask, list):
225 |             output_mask = mask[0]
226 |         else:
227 |             output_mask = mask
228 | 
229 |         if self.return_attention:
230 |             return [output_mask] + [None]
231 |         else:
232 |             return output_mask
233 | 


--------------------------------------------------------------------------------
/gcn/layers/graph_attention_layer_before.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.python.keras import backend as K
  3 | from tensorflow.python.keras.engine.base_layer import InputSpec
  4 | from tensorflow.python.keras import initializers, regularizers, constraints
  5 | from tensorflow.python.keras.layers import Dense, Dropout, LeakyReLU
  6 | 
  7 | 
  8 | class GraphAttentionLayer(Dense):
  9 |     """
 10 |     import from danielegrattarola/keras-gat
 11 |     https://github.com/danielegrattarola/keras-gat/blob/master/keras_gat/graph_attention_layer.py
 12 |     """
 13 | 
 14 |     def __init__(self,
 15 |                  feature_units,
 16 |                  attn_heads=1,
 17 |                  attn_heads_reduction="concat",  # {"concat", "average"}
 18 |                  dropout_rate=0.5,
 19 |                  activation="relu",
 20 |                  attn_kernel_initializer="glorot_uniform",
 21 |                  attn_kernel_regularizer=None,
 22 |                  attn_kernel_constraint=None,
 23 |                  attention=True,
 24 |                  return_attention=False,
 25 |                  **kwargs):
 26 | 
 27 |         if attn_heads_reduction not in {"concat", "average"}:
 28 |             raise ValueError("Possbile reduction methods: concat, average")
 29 | 
 30 |         super(GraphAttentionLayer, self).__init__(units=feature_units,
 31 |                                                   activation=activation,
 32 |                                                   **kwargs)
 33 | 
 34 |         # Number of attention heads (K in the paper)
 35 |         self.attn_heads = attn_heads
 36 |         # Eq. 5 and 6 in the paper
 37 |         self.attn_heads_reduction = attn_heads_reduction
 38 |         # Internal dropout rate
 39 |         self.dropout_rate = dropout_rate
 40 | 
 41 |         self.attn_kernel_initializer \
 42 |             = initializers.get(attn_kernel_initializer)
 43 |         self.attn_kernel_regularizer \
 44 |             = regularizers.get(attn_kernel_regularizer)
 45 |         self.attn_kernel_constraint = constraints.get(attn_kernel_constraint)
 46 |         self.attention = attention
 47 |         self.return_attention = return_attention
 48 |         self.input_spec = [InputSpec(ndim=3), InputSpec(ndim=3)]
 49 |         self.supports_masking = False
 50 | 
 51 |         # Populated by build()
 52 |         self.kernels = []       # Layer kernels for attention heads
 53 |         self.biases = []        # Layer biases for attention heads
 54 |         self.attn_kernels = []  # Attention kernels for attention heads
 55 | 
 56 |         if attn_heads_reduction == "concat":
 57 |             # Output will have shape (..., K * F")
 58 |             self.output_dim = self.units * self.attn_heads
 59 |         else:
 60 |             # Output will have shape (..., F")
 61 |             self.output_dim = self.units
 62 | 
 63 |     def build(self, input_shape):
 64 |         X_dims, A_dims = [dims.as_list() for dims in input_shape]
 65 |         assert len(X_dims) == 3
 66 |         assert len(A_dims) == 3 and A_dims[1] == A_dims[2]
 67 | 
 68 |         F = X_dims[-1]
 69 | 
 70 |         # Initialize weights for each attention head
 71 |         for head in range(self.attn_heads):
 72 |             # Layer kernel
 73 |             kernel = self.add_weight(shape=(F, self.units),
 74 |                                      initializer=self.kernel_initializer,
 75 |                                      regularizer=self.kernel_regularizer,
 76 |                                      constraint=self.kernel_constraint,
 77 |                                      name="kernel_{}".format(head))
 78 |             self.kernels.append(kernel)
 79 | 
 80 |             # Layer bias
 81 |             if self.use_bias:
 82 |                 bias = self.add_weight(shape=(self.units, ),
 83 |                                        initializer=self.bias_initializer,
 84 |                                        regularizer=self.bias_regularizer,
 85 |                                        constraint=self.bias_constraint,
 86 |                                        name="bias_{}".format(head))
 87 |                 self.biases.append(bias)
 88 | 
 89 |             if not self.attention:
 90 |                 continue
 91 | 
 92 |             # Attention kernels
 93 |             attn_kernel_self = self.add_weight(
 94 |                                     shape=(self.units, 1),
 95 |                                     initializer=self.attn_kernel_initializer,
 96 |                                     regularizer=self.attn_kernel_regularizer,
 97 |                                     constraint=self.attn_kernel_constraint,
 98 |                                     name="attn_kernel_self_{}".format(head),)
 99 |             attn_kernel_neighs = self.add_weight(
100 |                                     shape=(self.units, 1),
101 |                                     initializer=self.attn_kernel_initializer,
102 |                                     regularizer=self.attn_kernel_regularizer,
103 |                                     constraint=self.attn_kernel_constraint,
104 |                                     name="attn_kernel_neigh_{}".format(head))
105 | 
106 |             self.attn_kernels.append([attn_kernel_self, attn_kernel_neighs])
107 | 
108 |         self.built = True
109 | 
110 |     def call(self, inputs):
111 |         X = inputs[0]  # Node features (B x N x F)
112 |         A = inputs[1]  # Adjacency matrix (B x N x N)
113 | 
114 |         outputs = []
115 |         attentions = []
116 |         for head in range(self.attn_heads):
117 |             # W in the paper (F x F")
118 |             kernel = self.kernels[head]
119 | 
120 |             # Compute inputs to attention network
121 |             features = K.dot(X, kernel)  # (B x N x F")
122 |             dropout_feat = Dropout(self.dropout_rate)(features)  # (B x N x F")
123 | 
124 |             if not self.attention:
125 |                 attention = A
126 |                 node_features = tf.matmul(attention, dropout_feat)  # (N x F")
127 |             else:
128 |                 # Attention kernel a in the paper (2F" x 1)
129 |                 attention_kernel = self.attn_kernels[head]
130 | 
131 |                 # Compute feature combinations
132 |                 # Note: [[a_1], [a_2]]^T [[Wh_i], [Wh_2]]
133 |                 #       = [a_1]^T [Wh_i] + [a_2]^T [Wh_j]
134 |                 # Both (B x N x 1)
135 |                 attn_for_self = K.dot(features, attention_kernel[0])
136 |                 attn_for_neighs = K.dot(features, attention_kernel[1])
137 | 
138 |                 # Attention head a(Wh_i, Wh_j) = a^T [[Wh_i], [Wh_j]]
139 |                 # attention becomes (B x N x N) via broadcasting
140 |                 attention = attn_for_self + tf.transpose(attn_for_neighs,
141 |                                                          (0, 2, 1))
142 | 
143 |                 # Add nonlinearty (alpha=0.2 is tensorflow default)
144 |                 attention = LeakyReLU(alpha=0.2)(attention)
145 | 
146 |                 # Mask values before activation (Vaswani et al., 2017)
147 |                 mask = -10e9 * (1.0 - A)
148 |                 attention += mask
149 | 
150 |                 # Apply softmax to get attention coefficients
151 |                 attention = K.softmax(attention)  # (B x N x N)
152 | 
153 |                 # Apply dropout to features and attention coefficients
154 |                 dropout_attn = Dropout(self.dropout_rate)(attention)  # (B x N x N)
155 | 
156 |                 # Linear combination with neighbors" features
157 |                 # (B x N x F")
158 |                 node_features = tf.matmul(dropout_attn, dropout_feat)  # (N x F")
159 | 
160 |             if self.use_bias:
161 |                 node_features = K.bias_add(node_features, self.biases[head])
162 | 
163 |             if self.attn_heads_reduction == "concat":
164 |                 # If "concat", compute the activation here (Eq. 5)
165 |                 node_features = self.activation(node_features)
166 | 
167 |             if self.return_attention:
168 |                 attentions.append(attention)
169 |             # Add output of attention head to final output
170 |             outputs.append(node_features)
171 | 
172 |         # Aggregate the heads" output according to the reduction method
173 |         if self.attn_heads_reduction == "concat":
174 |             output = K.concatenate(outputs, axis=-1)  # (B x N x KF")
175 |         else:
176 |             output = K.mean(K.stack(outputs), axis=0)  # (B x N x F")
177 |             # If "average", compute the activation here (Eq. 6)
178 | 
179 |         output = self.activation(output)
180 | 
181 |         if self.return_attention:
182 |             attentions = K.stack(attentions, axis=1)
183 |             return (output, attentions)
184 |         else:
185 |             return output
186 | 
187 |     def compute_output_shape(self, input_shape):
188 |         X_dims, A_dims = [dims.as_list() for dims in input_shape]
189 |         assert len(X_dims) == 3
190 |         assert len(A_dims) == 3
191 |         output_shape = X_dims[0], X_dims[1], self.output_dim
192 | 
193 |         if self.return_attention:
194 |             return (tf.TensorShape(output_shape),
195 |                     tf.TensorShape(A_dims.insert(1, self.attn_heads)))
196 |         else:
197 |             return tf.TensorShape(output_shape)
198 | 


--------------------------------------------------------------------------------
/gcn/layers/graph_attention_layer_multi.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.python.keras import backend as K
  3 | from tensorflow.python.keras.engine.base_layer import InputSpec
  4 | from tensorflow.python.keras import initializers, regularizers, constraints
  5 | from tensorflow.python.keras.layers import Dense, Dropout
  6 | 
  7 | 
  8 | class GraphAttentionLayer(Dense):
  9 |     """
 10 |     import from danielegrattarola/keras-gat
 11 |     https://github.com/danielegrattarola/keras-gat/blob/master/keras_gat/graph_attention_layer.py
 12 |     """
 13 | 
 14 |     def __init__(self,
 15 |                  feature_units,
 16 |                  attn_heads=1,
 17 |                  attn_heads_reduction="concat",  # {"concat", "average"}
 18 |                  dropout_rate=0.5,
 19 |                  activation="relu",
 20 |                  attn_kernel_initializer="glorot_uniform",
 21 |                  attn_kernel_regularizer=None,
 22 |                  attn_kernel_constraint=None,
 23 |                  attention=True,
 24 |                  return_attention=False,
 25 |                  **kwargs):
 26 | 
 27 |         if attn_heads_reduction not in {"concat", "average"}:
 28 |             raise ValueError("Possbile reduction methods: concat, average")
 29 | 
 30 |         super(GraphAttentionLayer, self).__init__(units=feature_units,
 31 |                                                   activation=activation,
 32 |                                                   **kwargs)
 33 | 
 34 |         # Number of attention heads (K in the paper)
 35 |         self.attn_heads = attn_heads
 36 |         # Eq. 5 and 6 in the paper
 37 |         self.attn_heads_reduction = attn_heads_reduction
 38 |         # Internal dropout rate
 39 |         self.dropout_rate = dropout_rate
 40 | 
 41 |         self.attn_kernel_initializer \
 42 |             = initializers.get(attn_kernel_initializer)
 43 |         self.attn_kernel_regularizer \
 44 |             = regularizers.get(attn_kernel_regularizer)
 45 |         self.attn_kernel_constraint = constraints.get(attn_kernel_constraint)
 46 |         self.attention = attention
 47 |         self.return_attention = return_attention
 48 |         self.input_spec = [InputSpec(ndim=3), InputSpec(ndim=3)]
 49 |         self.supports_masking = False
 50 | 
 51 |         # Populated by build()
 52 |         self.kernels = []
 53 |         self.biases = []
 54 |         self.neighbor_kernels = []
 55 |         self.attn_kernels = []
 56 |         self.attention_biases = []
 57 | 
 58 |         if attn_heads_reduction == "concat":
 59 |             # Output will have shape (..., K * F")
 60 |             self.output_dim = self.units * self.attn_heads
 61 |         else:
 62 |             # Output will have shape (..., F")
 63 |             self.output_dim = self.units
 64 | 
 65 |     def build(self, input_shape):
 66 |         X_dims, A_dims = [dims.as_list() for dims in input_shape]
 67 |         assert len(X_dims) == 3
 68 |         assert len(A_dims) == 3 and A_dims[1] == A_dims[2]
 69 | 
 70 |         _, N, F = X_dims
 71 | 
 72 |         # Initialize weights for each attention head
 73 |         for head in range(self.attn_heads):
 74 |             # Layer kernel
 75 |             kernel = self.add_weight(shape=(F, self.units),
 76 |                                      initializer=self.kernel_initializer,
 77 |                                      regularizer=self.kernel_regularizer,
 78 |                                      constraint=self.kernel_constraint,
 79 |                                      name="kernel_{}".format(head))
 80 |             self.kernels.append(kernel)
 81 | 
 82 |             # Layer bias
 83 |             if self.use_bias:
 84 |                 bias = self.add_weight(shape=(self.units,),
 85 |                                        initializer=self.bias_initializer,
 86 |                                        regularizer=self.bias_regularizer,
 87 |                                        constraint=self.bias_constraint,
 88 |                                        name="bias_{}".format(head))
 89 |                 self.biases.append(bias)
 90 | 
 91 |             if not self.attention:
 92 |                 continue
 93 | 
 94 |             # Attention kernels
 95 |             neighbor_kernel = self.add_weight(
 96 |                                     shape=(F, self.units),
 97 |                                     initializer=self.kernel_initializer,
 98 |                                     regularizer=self.kernel_regularizer,
 99 |                                     constraint=self.kernel_constraint,
100 |                                     name="kernel_neighbor_{}".format(head))
101 | 
102 |             attn_kernel = self.add_weight(
103 |                                     shape=(self.units, self.units),
104 |                                     initializer=self.attn_kernel_initializer,
105 |                                     regularizer=self.attn_kernel_regularizer,
106 |                                     constraint=self.attn_kernel_constraint,
107 |                                     name="attn_kernel_{}".format(head))
108 | 
109 |             self.neighbor_kernels.append(neighbor_kernel)
110 |             self.attn_kernels.append(attn_kernel)
111 | 
112 |         self.built = True
113 | 
114 |     def call(self, inputs):
115 |         X = inputs[0]  # Node features (B x N x F)
116 |         A = inputs[1]  # Adjacency matrix (B x N x N)
117 | 
118 |         X_dims = X.get_shape().as_list()
119 |         B, N, F = X_dims
120 | 
121 |         outputs = []
122 |         attentions = []
123 |         for head in range(self.attn_heads):
124 |             # W in the paper (F x F")
125 |             kernel = self.kernels[head]
126 | 
127 |             # Compute inputs to attention network
128 |             features = K.dot(X, kernel)  # (B x N x F")
129 |             dropout_feat = Dropout(self.dropout_rate)(features)  # (B x N x F")
130 | 
131 |             neighbor_kernel = self.neighbor_kernels[head]
132 |             attn_kernel = self.attn_kernels[head]
133 | 
134 |             neighbor_features = K.dot(X, neighbor_kernel)
135 |             dropout_neighbor = Dropout(self.dropout_rate)(neighbor_features)
136 | 
137 |             merged = tf.matmul(K.dot(dropout_feat, attn_kernel),
138 |                                tf.transpose(dropout_neighbor, (0, 2, 1)))
139 | 
140 |             attention = tf.nn.tanh(merged)
141 |             attention = K.reshape(attention, (-1, N, N))
142 | 
143 |             mask = -10e9 * (1.0 - A)
144 |             attention += mask
145 | 
146 |             attention = tf.nn.softmax(attention)
147 |             dropout_attn = Dropout(self.dropout_rate)(attention)
148 | 
149 |             node_features = tf.matmul(dropout_attn, dropout_feat)
150 | 
151 |             if self.use_bias:
152 |                 node_features = K.bias_add(node_features, self.biases[head])
153 | 
154 |             if self.return_attention:
155 |                 attentions.append(attention)
156 |             # Add output of attention head to final output
157 |             outputs.append(node_features)
158 | 
159 |         # Aggregate the heads" output according to the reduction method
160 |         if self.attn_heads_reduction == "concat":
161 |             output = K.concatenate(outputs, axis=-1)  # (B x N x KF")
162 |         else:
163 |             output = K.mean(K.stack(outputs), axis=0)  # (B x N x F")
164 |             # If "average", compute the activation here (Eq. 6)
165 | 
166 |         output = self.activation(output)
167 | 
168 |         if self.return_attention:
169 |             attentions = K.stack(attentions, axis=1)
170 |             return (output, attentions)
171 |         else:
172 |             return output
173 | 
174 |     def compute_output_shape(self, input_shape):
175 |         X_dims, A_dims = [dims.as_list() for dims in input_shape]
176 |         assert len(X_dims) == 3
177 |         assert len(A_dims) == 3
178 |         output_shape = X_dims[0], X_dims[1], self.output_dim
179 | 
180 |         if self.return_attention:
181 |             return (tf.TensorShape(output_shape),
182 |                     tf.TensorShape(A_dims.insert(1, self.attn_heads)))
183 |         else:
184 |             return tf.TensorShape(output_shape)
185 | 


--------------------------------------------------------------------------------
/gcn/layers/graph_attention_layer_original.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.python.keras import backend as K
  3 | from tensorflow.python.keras import activations, constraints, initializers, regularizers
  4 | from tensorflow.python.keras.layers import Layer, Dropout, LeakyReLU
  5 | 
  6 | 
  7 | class GraphAttentionLayer(Layer):
  8 |     """
  9 |     import from danielegrattarola/keras-gat
 10 |     https://github.com/danielegrattarola/keras-gat/blob/master/keras_gat/graph_attention_layer.py
 11 |     """
 12 | 
 13 |     def __init__(self,
 14 |                  feature_units,
 15 |                  attn_heads=1,
 16 |                  attn_heads_reduction="concat",  # {"concat", "average"}
 17 |                  dropout_rate=0.5,
 18 |                  activation="relu",
 19 |                  use_bias=True,
 20 |                  kernel_initializer="glorot_uniform",
 21 |                  bias_initializer="zeros",
 22 |                  attn_kernel_initializer="glorot_uniform",
 23 |                  kernel_regularizer=None,
 24 |                  bias_regularizer=None,
 25 |                  attn_kernel_regularizer=None,
 26 |                  activity_regularizer=None,
 27 |                  kernel_constraint=None,
 28 |                  bias_constraint=None,
 29 |                  attn_kernel_constraint=None,
 30 |                  attention=True,
 31 |                  **kwargs):
 32 | 
 33 |         if attn_heads_reduction not in {"concat", "average"}:
 34 |             raise ValueError("Possbile reduction methods: concat, average")
 35 | 
 36 |         self.F_ = feature_units  # Number of output features (F" in the paper)
 37 |         self.attn_heads = attn_heads  # Number of attention heads (K in the paper)
 38 |         self.attn_heads_reduction = attn_heads_reduction  # Eq. 5 and 6 in the paper
 39 |         self.dropout_rate = dropout_rate  # Internal dropout rate
 40 |         self.activation = activations.get(activation)  # Eq. 4 in the paper
 41 |         self.use_bias = use_bias
 42 | 
 43 |         self.kernel_initializer = initializers.get(kernel_initializer)
 44 |         self.bias_initializer = initializers.get(bias_initializer)
 45 |         self.attn_kernel_initializer = initializers.get(attn_kernel_initializer)
 46 | 
 47 |         self.kernel_regularizer = regularizers.get(kernel_regularizer)
 48 |         self.bias_regularizer = regularizers.get(bias_regularizer)
 49 |         self.attn_kernel_regularizer = regularizers.get(attn_kernel_regularizer)
 50 |         self.activity_regularizer = regularizers.get(activity_regularizer)
 51 | 
 52 |         self.kernel_constraint = constraints.get(kernel_constraint)
 53 |         self.bias_constraint = constraints.get(bias_constraint)
 54 |         self.attn_kernel_constraint = constraints.get(attn_kernel_constraint)
 55 |         self.supports_masking = False
 56 |         self.attention = attention
 57 | 
 58 |         # Populated by build()
 59 |         self.kernels = []       # Layer kernels for attention heads
 60 |         self.biases = []        # Layer biases for attention heads
 61 |         self.attn_kernels = []  # Attention kernels for attention heads
 62 | 
 63 |         if attn_heads_reduction == "concat":
 64 |             # Output will have shape (..., K * F")
 65 |             self.output_dim = self.F_ * self.attn_heads
 66 |         else:
 67 |             # Output will have shape (..., F")
 68 |             self.output_dim = self.F_
 69 | 
 70 |         super(GraphAttentionLayer, self).__init__(**kwargs)
 71 | 
 72 |     def build(self, input_shape):
 73 |         assert len(input_shape) >= 2
 74 |         F = input_shape[0][-1].value
 75 | 
 76 |         # Initialize weights for each attention head
 77 |         for head in range(self.attn_heads):
 78 |             # Layer kernel
 79 |             kernel = self.add_weight(shape=(F, self.F_),
 80 |                                      initializer=self.kernel_initializer,
 81 |                                      regularizer=self.kernel_regularizer,
 82 |                                      constraint=self.kernel_constraint,
 83 |                                      name="kernel_{}".format(head))
 84 |             self.kernels.append(kernel)
 85 | 
 86 |             # # Layer bias
 87 |             if self.use_bias:
 88 |                 bias = self.add_weight(shape=(self.F_, ),
 89 |                                        initializer=self.bias_initializer,
 90 |                                        regularizer=self.bias_regularizer,
 91 |                                        constraint=self.bias_constraint,
 92 |                                        name="bias_{}".format(head))
 93 |                 self.biases.append(bias)
 94 | 
 95 |             if not self.attention:
 96 |                 continue
 97 | 
 98 |             # Attention kernels
 99 |             attn_kernel_self = self.add_weight(shape=(self.F_, 1),
100 |                                                initializer=self.attn_kernel_initializer,
101 |                                                regularizer=self.attn_kernel_regularizer,
102 |                                                constraint=self.attn_kernel_constraint,
103 |                                                name="attn_kernel_self_{}".format(head),)
104 |             attn_kernel_neighs = self.add_weight(shape=(self.F_, 1),
105 |                                                  initializer=self.attn_kernel_initializer,
106 |                                                  regularizer=self.attn_kernel_regularizer,
107 |                                                  constraint=self.attn_kernel_constraint,
108 |                                                  name="attn_kernel_neigh_{}".format(head))
109 |             self.attn_kernels.append([attn_kernel_self, attn_kernel_neighs])
110 | 
111 |         self.built = True
112 | 
113 |     def call(self, inputs):
114 |         X = inputs[0]  # Node features (N x F)
115 |         A = inputs[1]  # Adjacency matrix (N x N)
116 | 
117 |         outputs = []
118 |         for head in range(self.attn_heads):
119 |             kernel = self.kernels[head]  # W in the paper (F x F")
120 | 
121 |             # Compute inputs to attention network
122 |             features = K.dot(X, kernel)  # (N x F")
123 |             dropout_feat = Dropout(self.dropout_rate)(features)  # (N x F")
124 | 
125 |             if not self.attention:
126 |                 node_features = tf.matmul(A, dropout_feat)  # (N x F")
127 |             else:
128 |                 attention_kernel = self.attn_kernels[head]  # Attention kernel a in the paper (2F" x 1)
129 | 
130 |                 # Compute feature combinations
131 |                 # Note: [[a_1], [a_2]]^T [[Wh_i], [Wh_2]] = [a_1]^T [Wh_i] + [a_2]^T [Wh_j]
132 |                 attn_for_self = K.dot(features, attention_kernel[0])    # (N x 1), [a_1]^T [Wh_i]
133 |                 attn_for_neighs = K.dot(features, attention_kernel[1])  # (N x 1), [a_2]^T [Wh_j]
134 | 
135 |                 # Attention head a(Wh_i, Wh_j) = a^T [[Wh_i], [Wh_j]]
136 |                 dense = attn_for_self + K.transpose(attn_for_neighs)  # (N x N) via broadcasting
137 | 
138 |                 # Add nonlinearty
139 |                 dense = LeakyReLU(alpha=0.2)(dense)
140 | 
141 |                 # Mask values before activation (Vaswani et al., 2017)
142 |                 mask = -10e9 * (1.0 - A)
143 |                 dense += mask
144 | 
145 |                 # Apply softmax to get attention coefficients
146 |                 dense = K.softmax(dense)  # (N x N)
147 | 
148 |                 # Apply dropout to features and attention coefficients
149 |                 dropout_attn = Dropout(self.dropout_rate)(dense)  # (N x N)
150 | 
151 |                 # Linear combination with neighbors" features
152 |                 node_features = K.dot(dropout_attn, dropout_feat)  # (N x F")
153 | 
154 |             if self.use_bias:
155 |                 node_features = K.bias_add(node_features, self.biases[head])
156 | 
157 |             if self.attn_heads_reduction == "concat":
158 |                 # If "concat", compute the activation here (Eq. 5)
159 |                 node_features = self.activation(node_features)
160 | 
161 |             # Add output of attention head to final output
162 |             outputs.append(node_features)
163 | 
164 |         # Aggregate the heads" output according to the reduction method
165 |         if self.attn_heads_reduction == "concat":
166 |             output = K.concatenate(outputs)  # (N x KF")
167 |         else:
168 |             output = K.mean(K.stack(outputs), axis=0)  # N x F")
169 |             # If "average", compute the activation here (Eq. 6)
170 | 
171 |         output = self.activation(output)
172 |         return output
173 | 
174 |     def compute_output_shape(self, input_shape):
175 |         output_shape = input_shape[0][0].value, self.output_dim
176 |         return tf.TensorShape(output_shape)
177 | 


--------------------------------------------------------------------------------
/gcn/layers/projection_layer.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.python.keras import backend as K
 3 | from tensorflow.python.keras.layers import Layer
 4 | 
 5 | 
 6 | class ProjectionLayer(Layer):
 7 | 
 8 |     def __init__(self, embedding, **kwargs):
 9 |         super(ProjectionLayer, self).__init__(**kwargs)
10 |         self.weight = embedding.embeddings
11 |         self.output_dim = self.weight.shape[0]
12 | 
13 |     def call(self, x):
14 |         return K.dot(x, K.transpose(self.weight))
15 | 
16 |     def compute_output_shape(self, input_shape):
17 |         assert input_shape and len(input_shape) >= 2
18 |         assert input_shape[-1]
19 |         output_shape = list(input_shape)
20 |         output_shape[-1] = self.output_dim
21 |         return tf.TensorShape(output_shape)
22 | 


--------------------------------------------------------------------------------
/gcn/metrics.py:
--------------------------------------------------------------------------------
1 | from tensorflow.python.keras import backend as K
2 | 
3 | 
4 | def perplexity(y_true, y_pred):
5 |     cross_entropy = K.mean(K.sparse_categorical_crossentropy(y_true, y_pred))
6 |     perplexity = K.exp(cross_entropy)
7 |     return perplexity
8 | 


--------------------------------------------------------------------------------
/gcn/util.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.python.client import device_lib
 2 | 
 3 | 
 4 | def gpu_enable():
 5 |     local_device_protos = device_lib.list_local_devices()
 6 |     gpus = [x.name for x in local_device_protos if x.device_type == "GPU"]
 7 |     if len(gpus) > 0:
 8 |         return True
 9 |     else:
10 |         return False
11 | 


--------------------------------------------------------------------------------
/gcn/visualize/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/gcn/visualize/__init__.py


--------------------------------------------------------------------------------
/gcn/visualize/draw.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import networkx as nx
 3 | import matplotlib.pyplot as plt
 4 | from gcn.graph.dependency_graph import DependencyGraph
 5 | 
 6 | 
 7 | class AttentionDrawer():
 8 | 
 9 |     def __init__(self, graph_builder):
10 |         self.graph_builder = graph_builder
11 | 
12 |     def draw(self, sentence, attention=()):
13 |         edge_matrix = ()
14 |         nodes = self.graph_builder.get_nodes(sentence)
15 | 
16 |         size = len(attention) if len(attention) > 0 else len(nodes)
17 |         if isinstance(self.graph_builder, DependencyGraph):
18 |             edge_matrix = self.graph_builder.build(
19 |                             sentence, size, return_label=True)
20 |         matrix = attention
21 |         if len(attention) == 0:
22 |             matrix = self.graph_builder.build(sentence, size)
23 |         graph = self._build(nodes, matrix, edge_matrix)
24 |         return graph
25 | 
26 |     def _build(self, nodes, matrix, edge_matrix=()):
27 |         graph = nx.Graph()
28 |         _size = min(len(nodes), len(matrix))
29 |         graph.add_nodes_from(nodes[i] for i in range(_size))
30 |         for i in range(_size):
31 |             for j in range(_size):
32 |                 if matrix[i][j] > 0:
33 |                     if len(edge_matrix) == 0:
34 |                         graph.add_edge(nodes[i], nodes[j],
35 |                                        weight=matrix[i][j])
36 |                     else:
37 |                         graph.add_edge(nodes[i], nodes[j],
38 |                                        weight=matrix[i][j],
39 |                                        label=edge_matrix[i][j])
40 | 
41 |         return graph
42 | 
43 |     def show(self, graph, figsize=(6, 6),
44 |              node_color="skyblue", edge_color="grey",
45 |              font_size=15, max_width=5):
46 |         plt.figure(figsize=figsize)
47 |         pos = nx.spring_layout(graph)
48 |         weights = np.array([graph[u][v]["weight"] for u, v in graph.edges()])
49 |         width = 1 + (np.abs(weights) * max_width - 1)
50 | 
51 |         nx.draw_networkx(graph, pos,
52 |                          node_color=node_color,
53 |                          font_size=font_size, edge_color=edge_color,
54 |                          width=width)
55 | 
56 |         if isinstance(self.graph_builder, DependencyGraph):
57 |             labels = {(u, v): graph[u][v]["label"] for u, v in graph.edges()}
58 |             nx.draw_networkx_edge_labels(graph, pos, edge_labels=labels)
59 | 
60 |         plt.axis("off")
61 |         plt.tight_layout()
62 |         plt.show()
63 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/requirements.txt


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/tests/__init__.py


--------------------------------------------------------------------------------
/tests/classification/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/tests/classification/__init__.py


--------------------------------------------------------------------------------
/tests/classification/test_baseline_tfidf.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import unittest
 3 | from gcn.data.multi_nli_dataset import MultiNLIDataset
 4 | from gcn.classification.baseline import TfidfClassifier
 5 | 
 6 | 
 7 | class TestBaseline(unittest.TestCase):
 8 | 
 9 |     def test_baseline(self):
10 |         root = os.path.join(os.path.dirname(__file__), "../../")
11 |         dataset = MultiNLIDataset(root)
12 |         data = dataset.test_data()
13 | 
14 |         classifier = TfidfClassifier()
15 |         scores = classifier.fit(data["text"], data["label"])
16 |         self.assertTrue(len(scores) > 0)
17 | 


--------------------------------------------------------------------------------
/tests/classification/test_baseline_trainer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import unittest
 3 | from gcn.classification.baseline_trainer import BaselineTrainer
 4 | from gcn.classification.baseline import LSTMClassifier
 5 | 
 6 | 
 7 | class TestBaselineTrainer(unittest.TestCase):
 8 | 
 9 |     def test_build(self):
10 |         root = os.path.join(os.path.dirname(__file__), "../../")
11 |         trainer = BaselineTrainer(root, preprocessor_name="test_cbt_preprocessor")
12 | 
13 |         trainer.build()
14 |         self.assertTrue(len(trainer.preprocessor.vocabulary.get()) > 1000)
15 |         print(trainer.preprocessor.vocabulary.get()[:100])
16 |         print(trainer.preprocessor_path)
17 |         os.remove(trainer.preprocessor_path)
18 | 
19 |     def test_train(self):
20 |         root = os.path.join(os.path.dirname(__file__), "../../")
21 |         trainer = BaselineTrainer(root, preprocessor_name="test_cbt_preprocessor")
22 |         trainer.build()
23 | 
24 |         vocab_size = len(trainer.preprocessor.vocabulary.get())
25 |         model = LSTMClassifier(vocab_size)
26 |         model.build(trainer.num_classes)
27 | 
28 |         metrics = trainer.train(model.model, epochs=2)
29 |         self.assertTrue(metrics.history["acc"][-1] - metrics.history["acc"][0] > 0)
30 | 


--------------------------------------------------------------------------------
/tests/classification/test_trainer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import unittest
 4 | import numpy as np
 5 | from gcn.classification.trainer import Trainer
 6 | from gcn.classification.graph_based_classifier import GraphBasedClassifier
 7 | from gcn.data.multi_nli_dataset import MultiNLIDataset
 8 | from gcn.graph.dependency_graph import DependencyGraph
 9 | from gcn.graph.similarity_graph import SimilarityGraph
10 | from gcn.graph.static_graph import StaticGraph
11 | 
12 | 
13 | class TestTrainer(unittest.TestCase):
14 | 
15 |     def test_train_by_dependency_graph(self):
16 |         self._test_train("dependency")
17 | 
18 |     def test_train_by_similarity_graph(self):
19 |         self._test_train("similarity")
20 | 
21 |     def test_train_by_static_graph(self):
22 |         self._test_train("static")
23 | 
24 |     def _test_train(self, graph_type):
25 |         root = os.path.join(os.path.dirname(__file__), "../../")
26 |         sequence_length = 25
27 |         heads = 3
28 | 
29 |         dataset = MultiNLIDataset(root)
30 |         test_data = dataset.test_data()
31 |         index = np.random.randint(len(test_data), size=1)[0]
32 |         text = test_data["text"].iloc[index]
33 | 
34 |         graph_builder = None
35 |         if graph_type == "dependency":
36 |             graph_builder = DependencyGraph(lang="en")
37 |         elif graph_type == "similarity":
38 |             graph_builder = SimilarityGraph(lang="en")
39 |         else:
40 |             graph_builder = StaticGraph(lang="en")
41 | 
42 |         trainer = Trainer(graph_builder, root,
43 |                           preprocessor_name="test_ct_preprocessor")
44 | 
45 |         trainer.build(data_kind="test")
46 | 
47 |         def preprocessor(x):
48 |             _x = trainer.preprocess(x, sequence_length)
49 |             values = (_x["text"], _x["graph"])
50 |             return values
51 | 
52 |         _, g = preprocessor([text])
53 |         vocab_size = len(trainer.preprocessor.vocabulary.get())
54 |         model = GraphBasedClassifier(vocab_size, sequence_length, heads=heads)
55 |         model.build(trainer.num_classes, preprocessor)
56 | 
57 |         metrics = trainer.train(model.model, epochs=2)
58 |         os.remove(trainer.preprocessor_path)
59 |         self.assertTrue(metrics.history["acc"][-1] - metrics.history["acc"][0] > 0)
60 | 
61 |         attention = model.show_attention([text])
62 |         self.assertEqual(len(attention), 1)  # batch size
63 |         attention = attention[0]
64 |         self.assertEqual(len(attention), 2)  # layer count
65 |         attention = attention[0]
66 |         self.assertEqual(attention.shape, (heads, sequence_length, sequence_length))
67 | 


--------------------------------------------------------------------------------
/tests/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/tests/data/__init__.py


--------------------------------------------------------------------------------
/tests/data/test_graph_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import unittest
 3 | from gcn.data.graph_dataset import GraphDataset
 4 | 
 5 | 
 6 | class TestGraphDataset(unittest.TestCase):
 7 | 
 8 |     def test_citeseer(self):
 9 |         root = os.path.join(os.path.dirname(__file__), "../../")
10 |         gd = GraphDataset(root, kind="citeseer")
11 |         x, y, tx, ty, allx, ally, graph, test_idx = gd.download()
12 | 


--------------------------------------------------------------------------------
/tests/data/test_multi_nli_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import unittest
 3 | from gcn.data.multi_nli_dataset import MultiNLIDataset
 4 | 
 5 | 
 6 | class TestMultiNLIDataset(unittest.TestCase):
 7 | 
 8 |     def test_download(self):
 9 |         root = os.path.join(os.path.dirname(__file__), "../../")
10 |         dataset = MultiNLIDataset(root, prefix="test")
11 |         dataset.download()
12 | 
13 |         train_data = dataset.train_data()
14 |         test_data = dataset.test_data()
15 | 
16 |         for d in [train_data, test_data]:
17 |             self.assertTrue(len(d) > 0)
18 |             counts = d["label"].value_counts().values.tolist()
19 |             c = counts[0]
20 |             for _c in counts:
21 |                 self.assertEqual(c, _c)
22 | 
23 |         for k in ["train", "test"]:
24 |             self.assertTrue(os.path.exists(dataset.interim_file(k)))
25 |             os.remove(dataset.interim_file(k))
26 | 
27 |             self.assertTrue(os.path.exists(dataset.processed_file(k)))
28 |             os.remove(dataset.processed_file(k))
29 | 


--------------------------------------------------------------------------------
/tests/graph/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/tests/graph/__init__.py


--------------------------------------------------------------------------------
/tests/graph/test_dependency_graph.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | from chariot.transformer.vocabulary import Vocabulary
 4 | from gcn.graph import DependencyGraph
 5 | 
 6 | 
 7 | class TestDependencyGraph(unittest.TestCase):
 8 | 
 9 |     def test_build(self):
10 |         graph = DependencyGraph("en")
11 |         matrix = graph.build("I am living at house")
12 | 
13 |         answer = np.array([
14 |             [0, 0, 1, 0, 0],
15 |             [0, 0, 1, 0, 0],
16 |             [0, 0, 1, 0, 0],
17 |             [0, 0, 1, 0, 0],
18 |             [0, 0, 0, 1, 0],
19 |         ])
20 |         self.assertEqual(tuple(matrix.tolist()),
21 |                          tuple(answer.tolist()))
22 | 
23 |     def test_build_label(self):
24 |         graph = DependencyGraph("en")
25 |         matrix = graph.build("I am living at house", return_label=True)
26 | 
27 |         answer = [
28 |             ["", "", "nsubj", "", ""],
29 |             ["", "", "aux", "", ""],
30 |             ["", "", "ROOT", "", ""],
31 |             ["", "", "prep", "", ""],
32 |             ["", "", "", "pobj", ""],
33 |         ]
34 |         self.assertEqual(tuple(matrix),
35 |                          tuple(answer))
36 | 
37 |     def test_batch_build(self):
38 |         graph = DependencyGraph("en")
39 | 
40 |         sentences = ["I am living at house",
41 |                      "You are waiting on the station"]
42 |         matrices = graph.batch_build(sentences, size=6)
43 | 
44 |         self.assertEqual(matrices.shape, (2, 6, 6))
45 | 


--------------------------------------------------------------------------------
/tests/graph/test_similarity_graph.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import unittest
 3 | import numpy as np
 4 | from sklearn.metrics.pairwise import cosine_similarity
 5 | from chariot.transformer.vocabulary import Vocabulary
 6 | from gcn.graph import SimilarityGraph
 7 | 
 8 | 
 9 | class TestSimilarityGraph(unittest.TestCase):
10 | 
11 |     def test_build(self):
12 |         root = os.path.join(os.path.dirname(__file__), "../../")
13 |         nearest_neighbor = 3
14 |         node_count = 10
15 |         feature_size = 5
16 | 
17 |         graph = SimilarityGraph("en", nearest_neighbor, root=root)
18 | 
19 |         vectors = np.random.uniform(size=node_count * feature_size)
20 |         vectors = vectors.reshape(node_count, feature_size)
21 | 
22 |         similarity = cosine_similarity(vectors)
23 |         similarity -= np.eye(node_count)
24 |         top_k = np.argsort(-similarity, axis=1)[:, :nearest_neighbor]
25 | 
26 |         for mode in ["connectivity", "distance"]:
27 |             graph.mode = mode
28 |             matrix = graph._build(vectors)
29 | 
30 |             for i, top in enumerate(top_k):
31 |                 if mode == "connectivity":
32 |                     self.assertEqual(sum(matrix[i, top]), nearest_neighbor)
33 |                 else:
34 |                     self.assertEqual(tuple(similarity[i, top]),
35 |                                      tuple(matrix[i, top]))
36 | 
37 |     def test_build_from_vocab(self):
38 |         root = os.path.join(os.path.dirname(__file__), "../../")
39 |         graph = SimilarityGraph("en", nearest_neighbor=2, root=root)
40 |         matrix = graph.build("you loaded now")
41 |         self.assertTrue(matrix.shape, (3, 3))
42 | 
43 |     def test_batch_build(self):
44 |         root = os.path.join(os.path.dirname(__file__), "../../")
45 |         sentences = ["I am living at house",
46 |                      "You are waiting on the station"]
47 |         graph = SimilarityGraph("en", nearest_neighbor=2, root=root)
48 |         matrices = graph.batch_build(sentences, size=6)
49 | 
50 |         self.assertEqual(matrices.shape, (2, 6, 6))
51 | 


--------------------------------------------------------------------------------
/tests/graph/test_static_graph.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | from gcn.graph import StaticGraph
 4 | 
 5 | 
 6 | class TestStaticGraph(unittest.TestCase):
 7 | 
 8 |     def test_build(self):
 9 |         for k in ("self", "previous"):
10 |             for f in (True, False):
11 |                 graph = StaticGraph("en", kind=k, fill=f)
12 |                 matrix = graph.build("You can get static graph.")
13 |                 self.check_graph(matrix, k, f)
14 | 
15 |     def check_graph(self, matrix, kind, fill):
16 |         print("kind={}, fill={}".format(kind, fill))
17 |         print(matrix)
18 |         for r in range(len(matrix)):
19 |             for c in range(len(matrix[r])):
20 |                 spike = False
21 |                 offset = 0 if kind == "self" else -1
22 |                 _r = r + offset
23 |                 if c == _r:
24 |                     spike = True
25 |                 elif fill and c <= _r:
26 |                     spike = True
27 | 
28 |                 if spike:
29 |                     self.assertEqual(matrix[r][c], 1)
30 |                 else:
31 |                     self.assertEqual(matrix[r][c], 0)
32 | 
33 |     def test_batch_build(self):
34 |         graph = StaticGraph("en")
35 |         sentences = ["I am living at house",
36 |                      "You are waiting on the station"]
37 |         matrix = graph.batch_build(sentences, size=3)
38 | 
39 |         self.assertEqual(matrix.shape, (2, 3, 3))
40 | 


--------------------------------------------------------------------------------
/tests/language_model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/tests/language_model/__init__.py


--------------------------------------------------------------------------------
/tests/language_model/test_similarity_graph_lm.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from gcn.language_model.similarity_graph_lm import SimilarityGraphLM
 3 | 
 4 | 
 5 | class TestSimilarityGraphLM(unittest.TestCase):
 6 | 
 7 |     def test_similarity_graph_lm(self):
 8 |         vocab_size = 100
 9 |         sequence_length = 15
10 |         embedding_size = 10
11 |         model = SimilarityGraphLM(vocab_size, sequence_length,
12 |                                   embedding_size)
13 | 


--------------------------------------------------------------------------------
/tests/language_model/test_trainer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import unittest
 4 | from gcn.language_model.trainer import Trainer
 5 | from gcn.language_model.baseline import LSTMLM
 6 | 
 7 | 
 8 | class TestTrainer(unittest.TestCase):
 9 | 
10 |     def test_download(self):
11 |         root = os.path.join(os.path.dirname(__file__), "../../")
12 |         trainer = Trainer(root)
13 | 
14 |         r = trainer.download()
15 |         self.assertTrue(r)
16 | 
17 |     def test_build(self):
18 |         root = os.path.join(os.path.dirname(__file__), "../../")
19 |         trainer = Trainer(root, preprocessor_name="test_lm_preprocessor")
20 | 
21 |         trainer.build("valid")
22 |         self.assertTrue(len(trainer.preprocessor.vocabulary.get()) > 1000)
23 |         print(trainer.preprocessor.vocabulary.get()[:100])
24 |         print(trainer.preprocessor_path)
25 |         os.remove(trainer.preprocessor_path)
26 | 
27 |     def test_train(self):
28 |         root = os.path.join(os.path.dirname(__file__), "../../")
29 |         trainer = Trainer(root, preprocessor_name="test_train_lm_preprocessor",
30 |                           log_dir="lm_test")
31 |         trainer.build("valid")
32 | 
33 |         vocab_size = len(trainer.preprocessor.vocabulary.get())
34 |         model = LSTMLM(vocab_size, embedding_size=100, hidden_size=50)
35 | 
36 |         metrics = trainer.train(model, data_kind="valid", epochs=2)
37 |         last_acc = metrics.history["acc"][-1]
38 |         shutil.rmtree(trainer.log_dir)
39 |         os.remove(trainer.preprocessor_path)
40 |         self.assertTrue(metrics.history["acc"][-1] - metrics.history["acc"][0] > 0)
41 | 


--------------------------------------------------------------------------------
/tests/layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/tests/layers/__init__.py


--------------------------------------------------------------------------------
/tests/layers/simple_attention_layer.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.python.keras import backend as K
  3 | from tensorflow.python.keras.engine.base_layer import InputSpec
  4 | from tensorflow.python.keras.layers import Dense
  5 | 
  6 | 
  7 | class SimpleAttentionLayer(Dense):
  8 | 
  9 |     def __init__(self,
 10 |                  feature_units,
 11 |                  activation="relu",
 12 |                  return_attention=False,
 13 |                  node_axis="row",
 14 |                  merge_method="add",
 15 |                  use_attention_kernel=True,
 16 |                  **kwargs):
 17 | 
 18 |         super(SimpleAttentionLayer, self).__init__(units=feature_units,
 19 |                                                    activation=activation,
 20 |                                                    **kwargs)
 21 |         if merge_method == "concat" and not use_attention_kernel:
 22 |             raise Exception("Can't use concat without attention")
 23 | 
 24 |         self.return_attention = return_attention
 25 |         self.node_axis = node_axis
 26 |         self.merge_method = merge_method
 27 |         self.use_attention_kernel = use_attention_kernel
 28 |         self.input_spec = [InputSpec(ndim=3), InputSpec(ndim=3)]
 29 |         self.supports_masking = False
 30 | 
 31 |         self.self_kernel = None
 32 |         self.neighbor_kernel = None
 33 |         self.attention_kernel = None
 34 |         self.bias = None
 35 | 
 36 |     def build(self, input_shape):
 37 |         X_dims, A_dims = [dims.as_list() for dims in input_shape]
 38 |         assert len(X_dims) == 3
 39 |         assert len(A_dims) == 3 and A_dims[1] == A_dims[2]
 40 | 
 41 |         F = X_dims[-1]
 42 |         N = X_dims[1]
 43 | 
 44 |         for kind in ["self", "neighbor", "attention"]:
 45 |             if kind in ["self", "neighbor"]:
 46 |                 if self.use_attention_kernel:
 47 |                     shape = (F, self.units)
 48 |                 else:
 49 |                     shape = (F, 1)
 50 |             elif kind == "attention" and self.use_attention_kernel:
 51 |                 if self.merge_method == "concat":
 52 |                     shape = (self.units * 2, 1)
 53 |                 else:
 54 |                     shape = (self.units, 1)
 55 |             else:
 56 |                 shape = ()
 57 | 
 58 |             if len(shape) == 0:
 59 |                 continue
 60 | 
 61 |             kernel = self.add_weight(shape=shape,
 62 |                                      initializer=self.kernel_initializer,
 63 |                                      regularizer=self.kernel_regularizer,
 64 |                                      constraint=self.kernel_constraint,
 65 |                                      name="{}_kernel".format(kind))
 66 | 
 67 |             if kind == "self":
 68 |                 self.self_kernel = kernel
 69 |             elif kind == "neighbor":
 70 |                 self.neighbor_kernel = kernel
 71 |             elif kind == "attention":
 72 |                 self.attention_kernel = kernel
 73 | 
 74 |         if self.use_bias:
 75 |             self.bias = self.add_weight(shape=(N, N),
 76 |                                         initializer=self.bias_initializer,
 77 |                                         regularizer=self.bias_regularizer,
 78 |                                         constraint=self.bias_constraint,
 79 |                                         name="bias")
 80 | 
 81 |         self.built = True
 82 | 
 83 |     def call(self, inputs):
 84 |         X = inputs[0]  # Node features (B x N x F)
 85 |         A = inputs[1]  # Adjacency matrix (B x N x N)
 86 | 
 87 |         X_dims = X.get_shape().as_list()
 88 |         B, N, F = X_dims
 89 | 
 90 |         feature_self = K.dot(X, self.self_kernel)
 91 |         feature_neighbor = K.dot(X, self.neighbor_kernel)
 92 | 
 93 |         # repeat_elements is same as np.repeat.
 94 |         # it repeats element to row direction.
 95 |         # Example.
 96 |         #  z = np.array([[1,2,3],[4,5,6]])  # shape=(2, 3)
 97 |         #  repeat = 4
 98 |         #  np.reshape(np.repeat(z, repeat, axis=-1), (2, 3, repeat))
 99 |         #  > array([[[1, 1, 1, 1],
100 |         #          [2, 2, 2, 2],
101 |         #          [3, 3, 3, 3]],
102 |         #         [[4, 4, 4, 4],
103 |         #          [5, 5, 5, 5],
104 |         #          [6, 6, 6, 6]]])
105 |         feature_self = K.repeat_elements(feature_self, N, axis=2)
106 |         feature_self = K.reshape(feature_self, (-1, N, N, self.units))
107 | 
108 |         feature_neighbor = K.repeat_elements(feature_neighbor, N, axis=2)
109 |         feature_neighbor = K.reshape(feature_neighbor, (-1, N, N, self.units))
110 | 
111 |         T = (0, 2, 1, 3)
112 |         if self.merge_method == "concat":
113 |             if self.node_axis == "row":
114 |                 merged = tf.concat([feature_self,
115 |                                     tf.transpose(feature_neighbor, T)],
116 |                                    axis=-1)
117 |             else:
118 |                 merged = tf.concat([tf.transpose(feature_self, T),
119 |                                     feature_neighbor],
120 |                                    axis=-1)
121 |         else:
122 |             if self.node_axis == "row":
123 |                 merged = feature_self + tf.transpose(feature_neighbor, T)
124 |             else:
125 |                 merged = tf.transpose(feature_self, T) + feature_neighbor
126 | 
127 |         activation_func = tf.nn.tanh
128 |         if self.use_attention_kernel:
129 |             attention = K.dot(activation_func(merged), self.attention_kernel)
130 |         else:
131 |             attention = activation_func(merged)
132 | 
133 |         attention = K.reshape(attention, (-1, N, N))
134 |         if self.use_bias:
135 |             attention = K.bias_add(attention, self.bias)
136 | 
137 |         mask = -10e9 * (1.0 - A)
138 |         attention += mask
139 | 
140 |         attention = tf.nn.softmax(attention)
141 | 
142 |         output = tf.matmul(attention, X)
143 | 
144 |         if self.return_attention:
145 |             return (output, attention)
146 |         else:
147 |             return output
148 | 
149 |     def compute_output_shape(self, input_shape):
150 |         X_dims, A_dims = [dims.as_list() for dims in input_shape]
151 |         assert len(X_dims) == 3
152 |         assert len(A_dims) == 3
153 |         output_shape = X_dims[0], X_dims[0], self.output_dim
154 | 
155 |         if self.return_attention:
156 |             return (tf.TensorShape(output_shape),
157 |                     tf.TensorShape(A_dims))
158 |         else:
159 |             return tf.TensorShape(output_shape)
160 | 


--------------------------------------------------------------------------------
/tests/layers/simple_attention_layer_multi.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.python.keras import backend as K
 3 | from tensorflow.python.keras.engine.base_layer import InputSpec
 4 | from tensorflow.python.keras.layers import Dense
 5 | 
 6 | 
 7 | class SimpleAttentionLayer(Dense):
 8 | 
 9 |     def __init__(self,
10 |                  feature_units,
11 |                  activation="relu",
12 |                  return_attention=False,
13 |                  node_axis="row",
14 |                  merge_method="add",
15 |                  use_attention_kernel=True,
16 |                  **kwargs):
17 | 
18 |         super(SimpleAttentionLayer, self).__init__(units=feature_units,
19 |                                                    activation=activation,
20 |                                                    **kwargs)
21 |         if merge_method == "concat" and not use_attention_kernel:
22 |             raise Exception("Can't use concat without attention")
23 | 
24 |         self.return_attention = return_attention
25 |         self.node_axis = node_axis
26 |         self.merge_method = merge_method
27 |         self.use_attention_kernel = use_attention_kernel
28 |         self.input_spec = [InputSpec(ndim=3), InputSpec(ndim=3)]
29 |         self.supports_masking = False
30 | 
31 |         self.kernel = None
32 |         self.bias = None
33 | 
34 |     def build(self, input_shape):
35 |         X_dims, A_dims = [dims.as_list() for dims in input_shape]
36 |         assert len(X_dims) == 3
37 |         assert len(A_dims) == 3 and A_dims[1] == A_dims[2]
38 | 
39 |         F = X_dims[-1]
40 |         N = X_dims[1]
41 | 
42 |         self.kernel = self.add_weight(shape=(F, F),
43 |                                       initializer=self.kernel_initializer,
44 |                                       regularizer=self.kernel_regularizer,
45 |                                       constraint=self.kernel_constraint,
46 |                                       name="kernel")
47 | 
48 |         if self.use_bias:
49 |             self.bias = self.add_weight(shape=(N, N),
50 |                                         initializer=self.bias_initializer,
51 |                                         regularizer=self.bias_regularizer,
52 |                                         constraint=self.bias_constraint,
53 |                                         name="bias")
54 | 
55 |         self.built = True
56 | 
57 |     def call(self, inputs):
58 |         X = inputs[0]  # Node features (B x N x F)
59 |         A = inputs[1]  # Adjacency matrix (B x N x N)
60 | 
61 |         X_dims = X.get_shape().as_list()
62 |         B, N, F = X_dims
63 | 
64 |         merged = tf.matmul(K.dot(X, self.self_kernel),
65 |                            tf.transpose(X, (0, 2, 1)))
66 |         attention = tf.nn.tanh(merged)
67 |         attention = K.reshape(attention, (-1, N, N))
68 | 
69 |         if self.use_bias:
70 |             attention = K.bias_add(attention, self.bias)
71 | 
72 |         mask = -10e9 * (1.0 - A)
73 |         attention += mask
74 | 
75 |         attention = tf.nn.softmax(attention)
76 |         output = tf.matmul(attention, X)
77 | 
78 |         if self.return_attention:
79 |             return (output, attention)
80 |         else:
81 |             return output
82 | 
83 |     def compute_output_shape(self, input_shape):
84 |         X_dims, A_dims = [dims.as_list() for dims in input_shape]
85 |         assert len(X_dims) == 3
86 |         assert len(A_dims) == 3
87 |         output_shape = X_dims[0], X_dims[0], self.output_dim
88 | 
89 |         if self.return_attention:
90 |             return (tf.TensorShape(output_shape),
91 |                     tf.TensorShape(A_dims))
92 |         else:
93 |             return tf.TensorShape(output_shape)
94 | 


--------------------------------------------------------------------------------
/tests/layers/test_attention_layer.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | from tensorflow.python import keras as K
 4 | from gcn.layers import AttentionLayer
 5 | 
 6 | 
 7 | class TestAttentionLayer(unittest.TestCase):
 8 | 
 9 |     def test_attention_layer(self):
10 |         sample_size = 20000
11 |         sequence_length = 20
12 |         embedding_size = 2
13 |         units = 8
14 |         attention_column = 3
15 | 
16 |         # Baseline Model
17 |         base = K.Sequential()
18 |         base.add(K.layers.LSTM(units,
19 |                                input_shape=[sequence_length, embedding_size]))
20 |         base.add(K.layers.Dense(1, activation="sigmoid"))
21 |         base.compile(optimizer="adam", loss="binary_crossentropy",
22 |                      metrics=["accuracy"])
23 | 
24 |         # Attention Model
25 |         def make_model():
26 |             input = K.layers.Input([sequence_length, embedding_size])
27 |             lstm_out = K.layers.LSTM(units, return_sequences=True)(input)
28 |             attn_out, prob = AttentionLayer(sequence_length,
29 |                                             return_attentions=True)(lstm_out)
30 |             output = K.layers.Dense(1, activation="sigmoid")(attn_out)
31 |             model = K.models.Model(inputs=input, outputs=output)
32 |             return model
33 | 
34 |         model = make_model()
35 |         model.compile(optimizer="adam", loss="binary_crossentropy",
36 |                       metrics=["accuracy"])
37 |         x, y = self.make_test_data(sample_size, sequence_length,
38 |                                    embedding_size, attention_column)
39 | 
40 |         base_metrics = base.fit(x, y, epochs=1, batch_size=32,
41 |                                 validation_split=0.1, verbose=1)
42 |         metrics = model.fit(x, y, epochs=1, batch_size=32,
43 |                             validation_split=0.1, verbose=1)
44 | 
45 |         base_score = base_metrics.history["val_acc"][-1]
46 |         score = metrics.history["val_acc"][-1]
47 |         self.assertTrue(score > base_score)
48 | 
49 |         attention_layer = model.layers[2]
50 |         attention_model = K.models.Model(inputs=model.input,
51 |                                          outputs=attention_layer.output)
52 |         activation, attention = attention_model.predict_on_batch(x)
53 |         attention_index = np.argmax(np.mean(attention, axis=0))
54 |         print(np.mean(attention, axis=0))
55 |         self.assertTrue(attention_index in [attention_column,
56 |                                             attention_column + 1])
57 | 
58 |     def make_test_data(self, sample_size, sequence_length, embedding_size,
59 |                        attention_column):
60 |         if attention_column >= sequence_length:
61 |             raise Exception("Directed column is larger than sequence_length.")
62 | 
63 |         x = np.random.standard_normal(size=(sample_size, sequence_length,
64 |                                             embedding_size))
65 |         y = np.random.randint(low=0, high=2, size=(sample_size, 1))
66 |         x[:, attention_column, :] = np.tile(y[:], (1, embedding_size))
67 | 
68 |         return x, y
69 | 


--------------------------------------------------------------------------------
/tests/layers/test_attention_on_graph.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import numpy as np
  3 | from scipy.spatial import distance_matrix
  4 | from tensorflow.python import keras as K
  5 | from tests.layers.simple_attention_layer import SimpleAttentionLayer
  6 | 
  7 | 
  8 | class TestAttentionOnGraph(unittest.TestCase):
  9 | 
 10 |     def test_attention_learning(self):
 11 |         exp1 = self.run_attention_learning("column", "add", False)  # original
 12 |         exp2 = self.run_attention_learning("column", "add", True)
 13 |         exp3 = self.run_attention_learning("column", "concat", True)
 14 |         exp4 = self.run_attention_learning("row", "add", False)
 15 |         exp5 = self.run_attention_learning("row", "add", True)
 16 |         exp6 = self.run_attention_learning("row", "concat", True)
 17 | 
 18 |         for acc in [exp2, exp3, exp4, exp5, exp6]:
 19 |             # original method should be most accurate
 20 |             self.assertGreater(exp1, acc)
 21 |         raise Exception("ex")
 22 | 
 23 |     def run_attention_learning(self, node_axis, merge_method,
 24 |                                use_attention_kernel):
 25 |         node_count = 10
 26 |         feature_size = 2
 27 |         feature_units = 2
 28 |         problem_count = 10000
 29 |         validation_count = 5
 30 | 
 31 |         last_accs = []
 32 |         for i in range(validation_count):
 33 |             model = self.make_simple_attention_network(
 34 |                         node_count, feature_size, feature_units,
 35 |                         node_axis, merge_method,
 36 |                         use_attention_kernel)
 37 | 
 38 |             model.compile(loss="categorical_crossentropy", optimizer="adam",
 39 |                           metrics=["accuracy"])
 40 | 
 41 |             params = self.make_problems(node_count, feature_size,
 42 |                                         feature_units, problem_count)
 43 |             node_inputs, matrix_inputs, answers, attn_answers = params
 44 | 
 45 |             metrics = model.fit([node_inputs, matrix_inputs], attn_answers,
 46 |                                 validation_split=0.2, epochs=8, verbose=0)
 47 |             acc = metrics.history["val_acc"][-1]
 48 |             last_accs.append(acc)
 49 | 
 50 |         def calc_baseline_acc(A, label):
 51 |             x = np.random.normal(size=A.shape) * A
 52 |             x_exp = np.exp(x)
 53 |             x = x_exp / np.sum(x_exp, axis=-1, keepdims=True)
 54 |             match = np.equal(np.argmax(label, axis=-1),
 55 |                              np.argmax(x_exp, axis=-1),)
 56 |             count = A.shape[0] * A.shape[1]
 57 |             acc = np.sum(match) / count
 58 |             return acc
 59 | 
 60 |         baseline_acc = calc_baseline_acc(matrix_inputs, attn_answers)
 61 |         method = "Merge: {} Node: {} Attention: {}".format(
 62 |             merge_method, node_axis, use_attention_kernel)
 63 |         if merge_method == "add" and node_axis == "column" and \
 64 |            not use_attention_kernel:
 65 |             method += " (original)"
 66 | 
 67 |         print(method)
 68 |         acc = np.mean(last_accs)
 69 |         print("\t acc: {}(+/-{}) (baseline {})".format(
 70 |             acc, np.std(last_accs), baseline_acc))
 71 |         return acc
 72 | 
 73 |     def make_problems(self, node_count, feature_size, feature_units,
 74 |                       problem_count):
 75 |         """
 76 |         Make task to extract the nearest node from neighbors.
 77 |         """
 78 | 
 79 |         node_samples = problem_count * node_count * feature_size
 80 |         node_inputs = np.random.uniform(high=10, size=node_samples).reshape(
 81 |                         (problem_count, node_count, feature_size))
 82 | 
 83 |         matrix_samples = problem_count * node_count * node_count
 84 |         matrix_inputs = np.random.randint(2, size=matrix_samples).reshape(
 85 |                             (problem_count, node_count, node_count))
 86 | 
 87 |         answers = []
 88 |         attention_answers = []
 89 |         for n, m in zip(node_inputs, matrix_inputs):
 90 |             distance = distance_matrix(n, n)
 91 |             mask = 10e9 * (1.0 - m)
 92 |             target_index = np.argmin(distance * m + mask, axis=1)
 93 | 
 94 |             answers.append(n[target_index])
 95 |             attn = np.zeros(m.shape)
 96 |             attn[np.arange(len(attn)), target_index] = 1
 97 |             attention_answers.append(attn)
 98 | 
 99 |         answers = np.array(answers)
100 |         attention_answers = np.array(attention_answers)
101 | 
102 |         return node_inputs, matrix_inputs, answers, attention_answers
103 | 
104 |     def make_simple_attention_network(self, node_count,
105 |                                       feature_size, feature_units,
106 |                                       node_axis, merge_method,
107 |                                       use_attention_kernel):
108 | 
109 |         nodes = K.layers.Input(shape=(node_count, feature_size))
110 |         matrix = K.layers.Input(shape=(node_count, node_count))
111 |         layer = SimpleAttentionLayer(feature_units=feature_units,
112 |                                      node_axis=node_axis,
113 |                                      merge_method=merge_method,
114 |                                      use_attention_kernel=use_attention_kernel,
115 |                                      return_attention=True)
116 | 
117 |         _, attn = layer([nodes, matrix])
118 |         model = K.models.Model(inputs=[nodes, matrix], outputs=attn)
119 |         return model
120 | 


--------------------------------------------------------------------------------
/tests/layers/test_graph_attention_layer.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import numpy as np
  3 | from scipy.spatial import distance_matrix
  4 | from tensorflow.python import keras as K
  5 | from gcn.layers.graph_attention_layer_before import GraphAttentionLayer as GraphAttentionLayerB
  6 | from gcn.layers.graph_attention_layer import GraphAttentionLayer
  7 | 
  8 | 
  9 | class TestGraphAttentionLayer(unittest.TestCase):
 10 |     TEST_PATTERN = (10, 2, 2)
 11 | 
 12 |     def test_forward(self):
 13 |         node_count = 12
 14 |         feature_size = 10
 15 |         feature_units = 8
 16 |         head = 3
 17 | 
 18 |         batch_size = 32
 19 |         node_samples = batch_size * node_count * feature_size
 20 |         node_inputs = np.random.uniform(size=node_samples)
 21 |         node_inputs = node_inputs.reshape((batch_size,
 22 |                                            node_count, feature_size))
 23 | 
 24 |         matrix_samples = batch_size * node_count * node_count
 25 |         matrix_inputs = np.random.randint(2, size=matrix_samples)
 26 |         matrix_inputs = matrix_inputs.reshape((batch_size,
 27 |                                                node_count, node_count))
 28 | 
 29 |         concat_model = self.make_graph_attention_network(
 30 |                             node_count, feature_size, feature_units,
 31 |                             head=head, merge="concat")
 32 |         outputs = concat_model.predict([node_inputs, matrix_inputs])
 33 |         self.assertEqual(outputs.shape, (batch_size, node_count,
 34 |                                          feature_units * head))
 35 | 
 36 |         mean_model = self.make_graph_attention_network(
 37 |                             node_count, feature_size, feature_units,
 38 |                             head=head, merge="average")
 39 |         outputs = mean_model.predict([node_inputs, matrix_inputs])
 40 |         self.assertEqual(outputs.shape, (batch_size, node_count,
 41 |                                          feature_units))
 42 | 
 43 |     def test_training(self):
 44 |         node_count = 4
 45 |         feature_size = 3
 46 |         feature_units = 1
 47 |         problem_count = 1000
 48 | 
 49 |         node_inputs, matrix_inputs, answers, _ = self.make_problems(
 50 |                                                     node_count, feature_size,
 51 |                                                     feature_units,
 52 |                                                     problem_count)
 53 | 
 54 |         model = self.make_graph_attention_network(
 55 |                     node_count, feature_size, feature_units,
 56 |                     merge="average")
 57 |         model.compile(loss="mse", optimizer="adam")
 58 |         metrics = model.fit([node_inputs, matrix_inputs], answers,
 59 |                             validation_split=0.3,
 60 |                             epochs=50)
 61 |         last_loss = metrics.history["val_loss"][-1]
 62 |         min_loss = np.min(metrics.history["val_loss"])
 63 |         self.assertEqual(last_loss, min_loss)
 64 | 
 65 |     def test_attention(self):
 66 |         node_count, feature_size, feature_units = self.TEST_PATTERN
 67 | 
 68 |         def make_model():
 69 |             model, model_attn = self.make_graph_attention_network(
 70 |                                     node_count, feature_size, feature_units,
 71 |                                     return_attention=True)
 72 |             return model, model_attn
 73 | 
 74 |         loss, hit_prob = self._test_attention(make_model,
 75 |                                               node_count,
 76 |                                               feature_size, feature_units,
 77 |                                               header="GAL After")
 78 | 
 79 |         self.assertGreater(hit_prob, 0.6)
 80 | 
 81 |     def test_attention_before(self):
 82 |         node_count, feature_size, feature_units = self.TEST_PATTERN
 83 | 
 84 |         def make_model():
 85 |             model, model_attn = self.make_graph_attention_network(
 86 |                                     node_count, feature_size, feature_units,
 87 |                                     return_attention=True, before=True)
 88 |             return model, model_attn
 89 | 
 90 |         loss, hit_prob = self._test_attention(make_model,
 91 |                                               node_count,
 92 |                                               feature_size, feature_units,
 93 |                                               header="GAL Before")
 94 |         self.assertGreater(hit_prob, 0.6)
 95 | 
 96 |     def test_attention_theoretical(self):
 97 |         node_count, feature_size, feature_units = self.TEST_PATTERN
 98 | 
 99 |         def make_model():
100 |             model, model_attn = self.make_simple_attention_network(
101 |                                     node_count, feature_size, feature_units,
102 |                                     return_attention=True)
103 |             return model, model_attn
104 | 
105 |         loss, hit_prob = self._test_attention(make_model,
106 |                                               node_count,
107 |                                               feature_size, feature_units,
108 |                                               header="Theoretical Attention")
109 |         self.assertGreater(hit_prob, 0.6)
110 | 
111 |     def _test_attention(self, make_model,
112 |                         node_count, feature_size, feature_units,
113 |                         problem_count=10000, varidation_count=5,
114 |                         header=""):
115 | 
116 |         losses = []
117 |         hit_probs = []
118 |         for i in range(varidation_count):
119 |             model, model_attn = make_model()
120 |             params = self.make_problems(node_count, feature_size,
121 |                                         feature_units, problem_count)
122 |             node_inputs, matrix_inputs, answers, attn_answers = params
123 | 
124 |             model.compile(loss="mse", optimizer="adam")
125 |             model.fit([node_inputs, matrix_inputs], answers,
126 |                       validation_split=0.3, epochs=20)
127 | 
128 |             attentions = model_attn.predict([node_inputs, matrix_inputs])
129 | 
130 |             if len(attentions.shape) == 4:
131 |                 attentions = attentions[:, 0, :, :]  # attention of head 0
132 | 
133 |             loss, hit_prob = self.calculate_attention_loss(
134 |                                 attentions, attn_answers)
135 |             losses.append(loss)
136 |             hit_probs.append(hit_prob)
137 | 
138 |         loss = np.mean(losses)
139 |         hit_prob = np.mean(hit_probs)
140 |         if header:
141 |             print(header)
142 |         print("\t loss: {}(+/-{}), hit_prob:{} (+/-{}).".format(
143 |             loss, np.std(losses), hit_prob, np.std(hit_prob)))
144 |         return loss, hit_prob
145 | 
146 |     def make_problems(self, node_count, feature_size, feature_units,
147 |                       problem_count):
148 |         """
149 |         Make task to extract the nearest node from neighbors.
150 |         """
151 | 
152 |         node_samples = problem_count * node_count * feature_size
153 |         node_inputs = np.random.uniform(high=10, size=node_samples).reshape(
154 |                         (problem_count, node_count, feature_size))
155 | 
156 |         matrix_samples = problem_count * node_count * node_count
157 |         matrix_inputs = np.random.randint(2, size=matrix_samples).reshape(
158 |                             (problem_count, node_count, node_count))
159 | 
160 |         answers = []
161 |         attention_answers = []
162 |         for n, m in zip(node_inputs, matrix_inputs):
163 |             distance = distance_matrix(n, n)
164 |             mask = 10e9 * (1.0 - m)
165 |             target_index = np.argmin(distance * m + mask, axis=1)
166 | 
167 |             if feature_size == feature_units:
168 |                 answers.append(n[target_index])
169 |             else:
170 |                 answers.append(n[target_index][:, :feature_units])
171 | 
172 |             attn = np.zeros(m.shape)
173 |             attn[np.arange(len(attn)), target_index] = 1
174 |             attention_answers.append(attn)
175 | 
176 |         answers = np.array(answers)
177 |         attention_answers = np.array(attention_answers)
178 | 
179 |         return node_inputs, matrix_inputs, answers, attention_answers
180 | 
181 |     def make_graph_attention_network(self, node_count,
182 |                                      feature_size, feature_units,
183 |                                      head=1, merge="average",
184 |                                      return_attention=False,
185 |                                      before=False):
186 | 
187 |         nodes = K.layers.Input(shape=(node_count, feature_size))
188 |         matrix = K.layers.Input(shape=(node_count, node_count))
189 | 
190 |         if before:
191 |             GAL = GraphAttentionLayerB
192 |         else:
193 |             GAL = GraphAttentionLayer
194 | 
195 |         layer = GAL(feature_units=feature_units,
196 |                     attn_heads=head,
197 |                     attn_heads_reduction=merge,
198 |                     dropout_rate=0.0,
199 |                     return_attention=return_attention)
200 | 
201 |         if return_attention:
202 |             output, attn = layer([nodes, matrix])
203 |         else:
204 |             output = layer([nodes, matrix])
205 | 
206 |         model = K.models.Model(inputs=[nodes, matrix], outputs=output)
207 |         if return_attention:
208 |             model_attn = K.models.Model(inputs=[nodes, matrix], outputs=attn)
209 |             return model, model_attn
210 |         else:
211 |             return model
212 | 
213 |     def make_simple_attention_network(self, node_count,
214 |                                       feature_size, feature_units,
215 |                                       return_attention=False):
216 | 
217 |         from tests.layers.simple_attention_layer import SimpleAttentionLayer
218 | 
219 |         nodes = K.layers.Input(shape=(node_count, feature_size))
220 |         matrix = K.layers.Input(shape=(node_count, node_count))
221 |         layer = SimpleAttentionLayer(feature_units=feature_units,
222 |                                      return_attention=return_attention)
223 | 
224 |         if return_attention:
225 |             output, attn = layer([nodes, matrix])
226 |             attn = attn
227 |         else:
228 |             output = layer([nodes, matrix])
229 | 
230 |         model = K.models.Model(inputs=[nodes, matrix], outputs=output)
231 |         if return_attention:
232 |             model_attn = K.models.Model(inputs=[nodes, matrix], outputs=attn)
233 |             return model, model_attn
234 |         else:
235 |             return model
236 | 
237 |     def calculate_attention_loss(self, predicted, answers):
238 |         loss = 0
239 |         hit_prob = 0
240 | 
241 |         for p, a in zip(predicted, answers):
242 |             norm = np.linalg.norm(p * a - a)
243 |             hits = np.sum(np.equal(np.argmax(p, axis=1),
244 |                                    np.argmax(a, axis=1)))
245 |             hit_prob += hits / len(p)
246 |             loss += norm
247 |         loss = loss / len(predicted)
248 |         hit_prob = hit_prob / len(predicted)
249 |         return loss, hit_prob
250 | 


--------------------------------------------------------------------------------
/tests/visualize/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icoxfog417/graph-convolution-nlp/2f15da072e401528d9faf76985d05afce336798f/tests/visualize/__init__.py


--------------------------------------------------------------------------------
/tests/visualize/test_draw.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | from chariot.transformer.vocabulary import Vocabulary
 4 | from gcn.graph import DependencyGraph, SimilarityGraph, StaticGraph
 5 | from gcn.visualize.draw import AttentionDrawer
 6 | 
 7 | 
 8 | class TestDraw(unittest.TestCase):
 9 | 
10 |     def test_draw_dependency_graph(self):
11 |         sentence = "I am living at house"
12 |         graph_builder = DependencyGraph("en")
13 |         attention = np.array([
14 |             [0, 0, 1, 0, 0],
15 |             [0, 0, 0.2, 0, 0],
16 |             [0, 0, 0.7, 0, 0],
17 |             [0, 0, 1, 0, 0],
18 |             [0, 0, 0, 0.5, 0],
19 |         ])
20 | 
21 |         drawer = AttentionDrawer(graph_builder)
22 |         graph = drawer.draw(sentence, attention)
23 |         drawer.show(graph)
24 | 
25 |     def test_draw_similarity_graph(self):
26 |         sentence = "I am building similarity graph structure"
27 |         graph_builder = SimilarityGraph("en")
28 |         drawer = AttentionDrawer(graph_builder)
29 |         graph = drawer.draw(sentence)
30 |         drawer.show(graph)
31 | 
32 |     def test_draw_static_graph(self):
33 |         sentence = "I am static graph"
34 |         graph_builder = StaticGraph("en", kind="previous")
35 |         drawer = AttentionDrawer(graph_builder)
36 |         graph = drawer.draw(sentence)
37 |         drawer.show(graph)
38 | 


--------------------------------------------------------------------------------