├── .gitignore
├── LP
├── GATNE
│ ├── LICENSE
│ ├── README.md
│ ├── scripts
│ │ └── run_example.sh
│ └── src
│ │ ├── gen_hom_data.py
│ │ ├── homGNN.py
│ │ ├── main.py
│ │ ├── main_pytorch.py
│ │ ├── utils.py
│ │ └── walk.py
├── HetGNN
│ ├── README.md
│ └── code
│ │ ├── DeepWalk.py
│ │ ├── HetGNN.py
│ │ ├── application.py
│ │ ├── args.py
│ │ ├── data_generator.py
│ │ ├── homoGNN.py
│ │ ├── homo_data_split.py
│ │ ├── input_data_process.py
│ │ ├── link_prediction_model.py
│ │ ├── node_classification_model.py
│ │ ├── node_clustering_model.py
│ │ ├── raw_data_process.py
│ │ └── tools.py
├── MAGNN
│ ├── GNN.py
│ ├── LP_MAGNN_training history.txt
│ ├── README.md
│ ├── model
│ │ ├── MAGNN_lp.py
│ │ ├── MAGNN_nc.py
│ │ ├── MAGNN_nc_mb.py
│ │ ├── __init__.py
│ │ └── base_MAGNN.py
│ ├── run_LastFM.py
│ ├── run_LastFM_GNN.py
│ └── utils
│ │ ├── __init__.py
│ │ ├── data.py
│ │ ├── preprocess.py
│ │ ├── pytorchtools.py
│ │ └── tools.py
├── RGCN-WN18
│ ├── README.md
│ ├── code
│ │ ├── __init__.py
│ │ ├── gnn_link_predict.py
│ │ ├── link_predict.py
│ │ ├── model.py
│ │ ├── scripts
│ │ │ ├── __init__.py
│ │ │ ├── data_loader.py
│ │ │ └── read_file.py
│ │ └── utils.py
│ └── data
│ │ └── wn18
│ │ ├── README
│ │ ├── Wordnet3.0-LICENSE
│ │ ├── entities.dict
│ │ ├── relations.dict
│ │ ├── test.txt
│ │ ├── train.txt
│ │ └── valid.txt
├── RGCN
│ ├── GNN.py
│ ├── HomGNN.py
│ ├── README.md
│ ├── gnn_link_predict.py
│ ├── link_predict.py
│ ├── model.py
│ └── utils.py
└── benchmark
│ ├── .gitignore
│ ├── README.md
│ ├── methods
│ ├── GATNE
│ │ ├── LICENSE
│ │ ├── README.md
│ │ ├── main_pytorch.py
│ │ ├── utils.py
│ │ └── walk.py
│ ├── GNN
│ │ ├── GNN.py
│ │ ├── README.md
│ │ ├── homoGNN.py
│ │ └── utils.py
│ ├── HGT
│ │ ├── README.md
│ │ ├── data_loader.py
│ │ ├── link_predict.py
│ │ ├── load_data_dgl.py.ipynb
│ │ ├── model.py
│ │ ├── run_LastFM.sh
│ │ ├── run_LastFM_magnn.sh
│ │ ├── run_PubMed.sh
│ │ ├── run_amazon.sh
│ │ └── utils
│ │ │ ├── __init__.py
│ │ │ ├── data.py
│ │ │ ├── preprocess.py
│ │ │ ├── pytorchtools.py
│ │ │ └── tools.py
│ ├── HetGNN
│ │ ├── DeepWalk.py
│ │ ├── README.md
│ │ ├── args.py
│ │ ├── data_generator.py
│ │ ├── do_LP.py
│ │ ├── main.py
│ │ └── tools.py
│ ├── MAGNN
│ │ ├── .gitignore
│ │ ├── README.md
│ │ ├── model
│ │ │ ├── MAGNN_lp.py
│ │ │ ├── MAGNN_nc.py
│ │ │ ├── MAGNN_nc_mb.py
│ │ │ ├── __init__.py
│ │ │ └── base_MAGNN.py
│ │ ├── run_DBLP.py
│ │ ├── run_IMDB.py
│ │ ├── run_LastFM.py
│ │ └── utils
│ │ │ ├── __init__.py
│ │ │ ├── data.py
│ │ │ ├── preprocess.py
│ │ │ ├── pytorchtools.py
│ │ │ └── tools.py
│ ├── MAGNN_ini
│ │ ├── .gitignore
│ │ ├── README.md
│ │ ├── model
│ │ │ ├── MAGNN_lp.py
│ │ │ ├── MAGNN_nc.py
│ │ │ ├── MAGNN_nc_mb.py
│ │ │ ├── __init__.py
│ │ │ └── base_MAGNN.py
│ │ ├── preprocess_DBLP.ipynb
│ │ ├── preprocess_IMDB.ipynb
│ │ ├── preprocess_LastFM.ipynb
│ │ ├── preprocess_LastFM_magnn.ipynb
│ │ ├── run_DBLP.py
│ │ ├── run_IMDB.py
│ │ ├── run_LastFM.py
│ │ ├── test_LastFM.py
│ │ ├── trans_format.py
│ │ └── utils
│ │ │ ├── __init__.py
│ │ │ ├── data.py
│ │ │ ├── preprocess.py
│ │ │ ├── pytorchtools.py
│ │ │ └── tools.py
│ ├── RGCN
│ │ ├── README.md
│ │ ├── link_predict.py
│ │ ├── model.py
│ │ └── utils.py
│ └── baseline
│ │ ├── GNN.py
│ │ ├── README.md
│ │ ├── conv.py
│ │ ├── run_dist.py
│ │ ├── run_new.py
│ │ └── utils
│ │ ├── __init__.py
│ │ ├── data.py
│ │ ├── preprocess.py
│ │ ├── pytorchtools.py
│ │ └── tools.py
│ ├── scripts
│ ├── LP_AUC_MRR.py
│ ├── README.md
│ ├── __init__.py
│ └── data_loader.py
│ └── test.py
├── NC
├── GTN
│ ├── .gitignore
│ ├── Data_Preprocessing.ipynb
│ ├── GNN.py
│ ├── GTN.png
│ ├── README.md
│ ├── gcn.py
│ ├── get_acm_data.py
│ ├── inits.py
│ ├── main.py
│ ├── main_gnn.py
│ ├── main_sparse.py
│ ├── messagepassing.py
│ ├── model.py
│ ├── model_sparse.py
│ └── utils.py
├── HAN
│ ├── GNN.py
│ ├── README.md
│ ├── main.py
│ ├── model.py
│ ├── model_hetero.py
│ └── utils.py
├── HetGNN
│ ├── README.md
│ └── code
│ │ ├── DeepWalk.py
│ │ ├── HetGNN.py
│ │ ├── application.py
│ │ ├── args.py
│ │ ├── data_generator.py
│ │ ├── homoGNN.py
│ │ ├── input_data_process.py
│ │ ├── link_prediction_model.py
│ │ ├── node_classification_model.py
│ │ ├── node_clustering_model.py
│ │ ├── raw_data_process.py
│ │ └── tools.py
├── MAGNN
│ ├── .gitignore
│ ├── GNN.py
│ ├── README.md
│ ├── get_lastfm_data.py
│ ├── get_lastfm_data_same.py
│ ├── model
│ │ ├── MAGNN_lp.py
│ │ ├── MAGNN_nc.py
│ │ ├── MAGNN_nc_mb.py
│ │ ├── __init__.py
│ │ └── base_MAGNN.py
│ ├── preprocess_DBLP.ipynb
│ ├── preprocess_IMDB.ipynb
│ ├── preprocess_LastFM.ipynb
│ ├── run_DBLP.py
│ ├── run_DBLP_gnn.py
│ ├── run_IMDB.py
│ ├── run_LastFM.py
│ └── utils
│ │ ├── __init__.py
│ │ ├── data.py
│ │ ├── preprocess.py
│ │ ├── pytorchtools.py
│ │ ├── tools.py
│ │ └── transform_2_com.py
├── RGCN
│ ├── README.md
│ ├── entity_classify.py
│ └── model.py
├── RSHN
│ ├── README.md
│ ├── build_coarsened_line_graph
│ │ ├── relation_graph.py
│ │ └── utils.py
│ ├── model
│ │ ├── GNN.py
│ │ ├── RSHN.py
│ │ └── RSHN_gnn.py
│ └── torch_geometric
│ │ ├── __init__.py
│ │ ├── data
│ │ ├── __init__.py
│ │ ├── batch.py
│ │ ├── data.py
│ │ ├── dataloader.py
│ │ ├── dataset.py
│ │ ├── download.py
│ │ ├── extract.py
│ │ ├── in_memory_dataset.py
│ │ └── makedirs.py
│ │ ├── datasets
│ │ ├── __init__.py
│ │ └── entities.py
│ │ ├── nn
│ │ ├── __init__.py
│ │ ├── conv
│ │ │ ├── __init__.py
│ │ │ ├── message_passing.py
│ │ │ ├── nn_conv.py
│ │ │ └── relation_conv.py
│ │ └── inits.py
│ │ └── utils
│ │ ├── __init__.py
│ │ ├── convert.py
│ │ ├── degree.py
│ │ ├── grid.py
│ │ ├── isolated.py
│ │ ├── loop.py
│ │ ├── metric.py
│ │ ├── normalized_cut.py
│ │ ├── num_nodes.py
│ │ ├── one_hot.py
│ │ ├── repeat.py
│ │ ├── scatter.py
│ │ ├── softmax.py
│ │ ├── sparse.py
│ │ ├── to_batch.py
│ │ └── undirected.py
└── benchmark
│ ├── .gitignore
│ ├── README.md
│ ├── methods
│ ├── GNN
│ │ ├── GNN.py
│ │ ├── README.md
│ │ ├── run.py
│ │ ├── run_multi.py
│ │ └── utils
│ │ │ ├── __init__.py
│ │ │ ├── data.py
│ │ │ └── pytorchtools.py
│ ├── GTN
│ │ ├── README.md
│ │ ├── gcn.py
│ │ ├── inits.py
│ │ ├── main.py
│ │ ├── main_multi.py
│ │ ├── main_sparse.py
│ │ ├── messagepassing.py
│ │ ├── model.py
│ │ ├── model_sparse.py
│ │ └── utils.py
│ ├── HAN
│ │ ├── README.md
│ │ ├── main.py
│ │ ├── main_multi.py
│ │ ├── model_hetero.py
│ │ ├── model_hetero_multi.py
│ │ └── utils.py
│ ├── HGT
│ │ ├── .gitignore
│ │ ├── GNN.py
│ │ ├── README.md
│ │ ├── data_loader.py
│ │ ├── gpu_memory_log.py
│ │ ├── load_data_dgl.py.ipynb
│ │ ├── model.py
│ │ ├── run_acm.sh
│ │ ├── run_dblp.py
│ │ ├── run_dblp.sh
│ │ ├── run_freebash.sh
│ │ ├── run_hgt.py
│ │ ├── run_imdb.sh
│ │ ├── train_hgt.py
│ │ └── utils
│ │ │ ├── __init__.py
│ │ │ ├── data.py
│ │ │ ├── preprocess.py
│ │ │ ├── pytorchtools.py
│ │ │ └── tools.py
│ ├── HetGNN
│ │ ├── README.md
│ │ └── code
│ │ │ ├── ACM
│ │ │ ├── DeepWalk.py
│ │ │ ├── args.py
│ │ │ ├── data_generator.py
│ │ │ ├── do_class.py
│ │ │ ├── main.py
│ │ │ └── tools.py
│ │ │ ├── DBLP
│ │ │ ├── DeepWalk.py
│ │ │ ├── args.py
│ │ │ ├── data_generator.py
│ │ │ ├── do_class.py
│ │ │ ├── main.py
│ │ │ └── tools.py
│ │ │ └── IMDB
│ │ │ ├── DeepWalk.py
│ │ │ ├── args.py
│ │ │ ├── data_generator.py
│ │ │ ├── do_class.py
│ │ │ ├── main.py
│ │ │ └── tools.py
│ ├── HetSANN
│ │ ├── HetSANN_MRV
│ │ │ ├── execute_sparse.py
│ │ │ ├── find_meta.py
│ │ │ ├── models
│ │ │ │ ├── __init__.py
│ │ │ │ ├── __pycache__
│ │ │ │ │ ├── __init__.cpython-36.pyc
│ │ │ │ │ ├── base_gattn.cpython-36.pyc
│ │ │ │ │ └── sp_hgat.cpython-36.pyc
│ │ │ │ ├── base_gattn.py
│ │ │ │ └── sp_hgat.py
│ │ │ ├── scripts
│ │ │ │ └── data_loader.py
│ │ │ └── utils
│ │ │ │ ├── __init__.py
│ │ │ │ ├── __pycache__
│ │ │ │ ├── __init__.cpython-36.pyc
│ │ │ │ ├── layers.cpython-36.pyc
│ │ │ │ └── process.cpython-36.pyc
│ │ │ │ ├── layers.py
│ │ │ │ └── process.py
│ │ ├── LICENSE
│ │ ├── README.md
│ │ └── fig
│ │ │ ├── attention.png
│ │ │ └── model.png
│ ├── MAGNN
│ │ ├── .gitignore
│ │ ├── README.md
│ │ ├── meta1.json
│ │ ├── meta2.json
│ │ ├── model
│ │ │ ├── MAGNN_lp.py
│ │ │ ├── MAGNN_nc.py
│ │ │ ├── MAGNN_nc_mb.py
│ │ │ ├── __init__.py
│ │ │ └── base_MAGNN.py
│ │ ├── preprocess_DBLP.ipynb
│ │ ├── preprocess_IMDB.ipynb
│ │ ├── preprocess_LastFM.ipynb
│ │ ├── run_ACM.py
│ │ ├── run_DBLP.py
│ │ ├── run_Freebase.py
│ │ ├── run_IMDB.py
│ │ ├── run_IMDB_new.py
│ │ ├── run_LastFM.py
│ │ └── utils
│ │ │ ├── __init__.py
│ │ │ ├── data.py
│ │ │ ├── preprocess.py
│ │ │ ├── pytorchtools.py
│ │ │ └── tools.py
│ ├── RGCN
│ │ ├── README.md
│ │ ├── entity_classify.py
│ │ ├── model.py
│ │ └── scripts
│ │ │ ├── __init__.py
│ │ │ └── data_loader.py
│ ├── RSHN
│ │ ├── README.md
│ │ ├── RSHN.py
│ │ ├── build_coarsened_line_graph
│ │ │ ├── relation_graph.py
│ │ │ └── utils.py
│ │ └── torch_geometric
│ │ │ ├── __init__.py
│ │ │ ├── data
│ │ │ ├── __init__.py
│ │ │ ├── batch.py
│ │ │ ├── data.py
│ │ │ ├── dataloader.py
│ │ │ ├── dataset.py
│ │ │ ├── download.py
│ │ │ ├── extract.py
│ │ │ ├── in_memory_dataset.py
│ │ │ └── makedirs.py
│ │ │ ├── datasets
│ │ │ ├── __init__.py
│ │ │ └── entities.py
│ │ │ ├── nn
│ │ │ ├── __init__.py
│ │ │ ├── conv
│ │ │ │ ├── __init__.py
│ │ │ │ ├── message_passing.py
│ │ │ │ ├── nn_conv.py
│ │ │ │ └── relation_conv.py
│ │ │ └── inits.py
│ │ │ └── utils
│ │ │ ├── __init__.py
│ │ │ ├── convert.py
│ │ │ ├── degree.py
│ │ │ ├── grid.py
│ │ │ ├── isolated.py
│ │ │ ├── loop.py
│ │ │ ├── metric.py
│ │ │ ├── normalized_cut.py
│ │ │ ├── num_nodes.py
│ │ │ ├── one_hot.py
│ │ │ ├── repeat.py
│ │ │ ├── scatter.py
│ │ │ ├── softmax.py
│ │ │ ├── sparse.py
│ │ │ ├── to_batch.py
│ │ │ └── undirected.py
│ └── baseline
│ │ ├── GNN.py
│ │ ├── README.md
│ │ ├── conv.py
│ │ ├── run.py
│ │ ├── run_multi.py
│ │ ├── run_new.py
│ │ └── utils
│ │ ├── __init__.py
│ │ ├── data.py
│ │ ├── preprocess.py
│ │ ├── pytorchtools.py
│ │ └── tools.py
│ ├── scripts
│ ├── NC_F1.py
│ ├── README.md
│ ├── __init__.py
│ └── data_loader.py
│ └── test.py
├── README.md
├── Recom
├── KGAT
│ ├── LICENSE
│ ├── Log
│ │ └── README.md
│ ├── Model
│ │ ├── BPRMF.py
│ │ ├── CFKG.py
│ │ ├── CKE.py
│ │ ├── KGAT.py
│ │ ├── Main.py
│ │ ├── NFM.py
│ │ ├── README.md
│ │ └── utility
│ │ │ ├── README.md
│ │ │ ├── batch_test.py
│ │ │ ├── helper.py
│ │ │ ├── load_data.py
│ │ │ ├── loader_bprmf.py
│ │ │ ├── loader_cfkg.py
│ │ │ ├── loader_cke.py
│ │ │ ├── loader_kgat.py
│ │ │ ├── loader_nfm.py
│ │ │ ├── metrics.py
│ │ │ └── parser.py
│ └── README.md
├── KGCN
│ ├── .gitignore
│ ├── LICENSE
│ ├── README.md
│ └── src
│ │ ├── aggregators.py
│ │ ├── data_loader.py
│ │ ├── gcn
│ │ ├── __init__.py
│ │ ├── inits.py
│ │ └── layers.py
│ │ ├── main.py
│ │ ├── model.py
│ │ ├── preprocess.py
│ │ ├── train.py
│ │ └── utils
│ │ ├── __init__.py
│ │ ├── layers.py
│ │ └── sp_gat.py
├── KGNN-LS
│ ├── README.md
│ └── src
│ │ ├── aggregators.py
│ │ ├── data_loader.py
│ │ ├── empirical_study.py
│ │ ├── gcn
│ │ ├── __init__.py
│ │ ├── inits.py
│ │ └── layers.py
│ │ ├── main.py
│ │ ├── model.py
│ │ ├── preprocess.py
│ │ ├── train.py
│ │ └── utils
│ │ ├── __init__.py
│ │ ├── layers.py
│ │ └── sp_gat.py
├── README.md
└── baseline
│ ├── LICENSE
│ ├── Model
│ ├── GNN.py
│ ├── conv.py
│ ├── main.py
│ └── utility
│ │ ├── batch_test.py
│ │ ├── helper.py
│ │ ├── load_data.py
│ │ ├── loader_kgat.py
│ │ ├── metrics.py
│ │ └── parser.py
│ └── README.md
└── TC
└── HGAT
├── README.md
├── build_data.py
├── build_features.py
├── build_network.py
├── data
├── example
│ ├── example.txt
│ ├── example2entity.txt
│ ├── test.list
│ ├── train.list
│ └── vali.list
└── stopwords_en.txt
├── model
├── code
│ ├── __init__.py
│ ├── base.py
│ ├── baseline
│ │ ├── GNN.py
│ │ ├── README.md
│ │ ├── conv.py
│ │ ├── new_main.py
│ │ ├── run.py
│ │ ├── run_multi.py
│ │ ├── run_new.py
│ │ └── utils
│ │ │ ├── __init__.py
│ │ │ ├── data.py
│ │ │ ├── preprocess.py
│ │ │ ├── pytorchtools.py
│ │ │ └── tools.py
│ ├── layers.py
│ ├── models.py
│ ├── print_log.py
│ ├── train.py
│ └── utils.py
└── data
│ └── example
│ ├── example.cites
│ ├── example.content.entity
│ ├── example.content.text
│ ├── example.content.topic
│ ├── mapindex.txt
│ ├── test.map
│ ├── train.map
│ └── vali.map
├── tagMe.py
└── utils.py
/LP/GATNE/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Yukuo Cen
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/LP/GATNE/scripts/run_example.sh:
--------------------------------------------------------------------------------
1 | #python src/main.py --input data/example
2 | python src/main_pytorch.py --input data/amazon --feature data/amazon/feature.txt
3 |
4 | read str
--------------------------------------------------------------------------------
/LP/GATNE/src/walk.py:
--------------------------------------------------------------------------------
1 | import random
2 | import multiprocessing
3 |
4 | from tqdm import tqdm
5 |
6 | def walk(args):
7 | walk_length, start, schema = args
8 | # Simulate a random walk starting from start node.
9 | rand = random.Random()
10 |
11 | if schema:
12 | schema_items = schema.split('-')
13 | assert schema_items[0] == schema_items[-1]
14 |
15 | walk = [start]
16 | while len(walk) < walk_length:
17 | cur = walk[-1]
18 | candidates = []
19 | for node in G[cur]:
20 | if schema == '' or node_type[node] == schema_items[len(walk) % (len(schema_items) - 1)]:
21 | candidates.append(node)
22 | if candidates:
23 | walk.append(rand.choice(candidates))
24 | else:
25 | break
26 | return [str(node) for node in walk]
27 |
28 | def initializer(init_G, init_node_type):
29 | global G
30 | G = init_G
31 | global node_type
32 | node_type = init_node_type
33 |
34 | class RWGraph():
35 | def __init__(self, nx_G, node_type_arr=None, num_workers=16):
36 | self.G = nx_G
37 | self.node_type = node_type_arr
38 | self.num_workers = num_workers
39 |
40 | def node_list(self, nodes, num_walks):
41 | for loop in range(num_walks):
42 | for node in nodes:
43 | yield node
44 |
45 | def simulate_walks(self, num_walks, walk_length, schema=None):
46 | all_walks = []
47 | nodes = list(self.G.keys())
48 | random.shuffle(nodes)
49 |
50 | if schema is None:
51 | with multiprocessing.Pool(self.num_workers, initializer=initializer, initargs=(self.G, self.node_type)) as pool:
52 | all_walks = list(pool.imap(walk, ((walk_length, node, '') for node in tqdm(self.node_list(nodes, num_walks))), chunksize=256))
53 | else:
54 | schema_list = schema.split(',')
55 | for schema_iter in schema_list:
56 | with multiprocessing.Pool(self.num_workers, initializer=initializer, initargs=(self.G, self.node_type)) as pool:
57 | walks = list(pool.imap(walk, ((walk_length, node, schema_iter) for node in tqdm(self.node_list(nodes, num_walks)) if schema_iter.split('-')[0] == self.node_type[node]), chunksize=512))
58 | all_walks.extend(walks)
59 |
60 | return all_walks
61 |
--------------------------------------------------------------------------------
/LP/HetGNN/code/DeepWalk.py:
--------------------------------------------------------------------------------
1 | import string;
2 | import re;
3 | import random
4 | import math
5 | import numpy as np
6 | from gensim.models import Word2Vec
7 | from itertools import *
8 | dimen = 128
9 | window = 5
10 |
11 |
12 | def read_random_walk_corpus():
13 | walks=[]
14 | #inputfile = open("../data/academic_test/meta_random_walk_APVPA_test.txt","r")
15 | inputfile = open("../data/academic_test/het_random_walk_test.txt", "r")
16 | for line in inputfile:
17 | path = []
18 | node_list=re.split(' ',line)
19 | for i in range(len(node_list)):
20 | path.append(node_list[i])
21 | walks.append(path)
22 | inputfile.close()
23 | return walks
24 |
25 |
26 | walk_corpus = read_random_walk_corpus()
27 | model = Word2Vec(walk_corpus, size = dimen, window = window, min_count = 0, workers = 2, sg = 1, hs = 0, negative = 5)
28 |
29 |
30 | print("Output...")
31 | #model.wv.save_word2vec_format("../data/node_embedding.txt")
32 | model.wv.save_word2vec_format("../data/academic_test/node_net_embedding.txt")
33 |
34 |
--------------------------------------------------------------------------------
/LP/HetGNN/code/args.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | def read_args():
4 | parser = argparse.ArgumentParser()
5 | parser.add_argument('--data_path', type = str, default = '../data/academic_test/',
6 | help='path to data')
7 | parser.add_argument('--model_path', type = str, default = '../model_save/',
8 | help='path to save model')
9 | parser.add_argument('--A_n', type = int, default = 28646,
10 | help = 'number of author node')
11 | parser.add_argument('--P_n', type = int, default = 21044,
12 | help = 'number of paper node')
13 | parser.add_argument('--V_n', type = int, default = 18,
14 | help = 'number of venue node')
15 | parser.add_argument('--in_f_d', type = int, default = 128,
16 | help = 'input feature dimension')
17 | parser.add_argument('--embed_d', type = int, default = 128,
18 | help = 'embedding dimension')
19 | parser.add_argument('--lr', type = int, default = 0.001,
20 | help = 'learning rate')
21 | parser.add_argument('--batch_s', type = int, default = 20000,
22 | help = 'batch size')
23 | parser.add_argument('--mini_batch_s', type = int, default = 200,
24 | help = 'mini batch size')
25 | parser.add_argument('--train_iter_n', type = int, default = 50,
26 | help = 'max number of training iteration')
27 | parser.add_argument('--walk_n', type = int, default = 10,
28 | help='number of walk per root node')
29 | parser.add_argument('--walk_L', type = int, default = 30,
30 | help='length of each walk')
31 | parser.add_argument('--window', type = int, default = 5,
32 | help='window size for relation extration')
33 | parser.add_argument("--random_seed", default = 10, type = int)
34 | parser.add_argument('--train_test_label', type= int, default = 0,
35 | help='train/test label: 0 - train, 1 - test, 2 - code test/generate negative ids for evaluation')
36 | parser.add_argument('--save_model_freq', type = float, default = 2,
37 | help = 'number of iterations to save model')
38 | parser.add_argument("--cuda", default = 0, type = int)
39 | parser.add_argument("--checkpoint", default = '', type=str)
40 |
41 | args = parser.parse_args()
42 |
43 | return args
44 |
--------------------------------------------------------------------------------
/LP/HetGNN/code/node_classification_model.py:
--------------------------------------------------------------------------------
1 | import random
2 | import string
3 | import re
4 | import numpy
5 | from itertools import *
6 | import sklearn
7 | from sklearn import linear_model
8 | import sklearn.metrics as Metric
9 | import csv
10 | import argparse
11 |
12 | parser = argparse.ArgumentParser(description = 'link prediction task')
13 | parser.add_argument('--A_n', type = int, default = 28646,
14 | help = 'number of author node')
15 | parser.add_argument('--P_n', type = int, default = 21044,
16 | help = 'number of paper node')
17 | parser.add_argument('--V_n', type = int, default = 18,
18 | help = 'number of venue node')
19 | parser.add_argument('--data_path', type = str, default = '../data/academic_test/',
20 | help='path to data')
21 | parser.add_argument('--embed_d', type = int, default = 128,
22 | help = 'embedding dimension')
23 |
24 | args = parser.parse_args()
25 | print(args)
26 |
27 |
28 | def load_data(data_file_name, n_features, n_samples):
29 | with open(data_file_name) as f:
30 | data_file = csv.reader(f)
31 | data = numpy.empty((n_samples, n_features))
32 | for i, d in enumerate(data_file):
33 | data[i] = numpy.asarray(d[:], dtype=numpy.float)
34 | f.close ()
35 |
36 | return data
37 |
38 |
39 | def model(train_num, test_num):
40 | train_data_f = args.data_path + "train_class_feature.txt"
41 | train_data = load_data(train_data_f, args.embed_d + 2, train_num)
42 | train_features = train_data.astype(numpy.float32)[:,2:-1]
43 | train_target = train_data.astype(numpy.float32)[:,1]
44 |
45 | #print(train_target[1])
46 | learner = linear_model.LogisticRegression()
47 | learner.fit(train_features, train_target)
48 | train_features = None
49 | train_target = None
50 |
51 | print("training finish!")
52 |
53 | test_data_f = args.data_path + "test_class_feature.txt"
54 | test_data = load_data(test_data_f, args.embed_d + 2, test_num)
55 | test_id = test_data.astype(numpy.int32)[:,0]
56 | test_features = test_data.astype(numpy.float32)[:,2:-1]
57 | test_target = test_data.astype(numpy.float32)[:,1]
58 | test_predict = learner.predict(test_features)
59 | test_features = None
60 |
61 | print("test prediction finish!")
62 |
63 | output_f = open(args.data_path + "NC_prediction.txt", "w")
64 | for i in range(len(test_predict)):
65 | output_f.write('%d,%lf\n'%(test_id[i],test_predict[i]))
66 | output_f.close()
67 |
68 | print ("MacroF1: ")
69 | print (sklearn.metrics.f1_score(test_target,test_predict,average='macro'))
70 |
71 | print ("MicroF1: ")
72 | print (sklearn.metrics.f1_score(test_target,test_predict,average='micro'))
73 |
74 |
75 |
76 |
77 |
--------------------------------------------------------------------------------
/LP/HetGNN/code/node_clustering_model.py:
--------------------------------------------------------------------------------
1 | import random
2 | import string
3 | import re
4 | import numpy
5 | from itertools import *
6 | import sklearn
7 | from sklearn.cluster import KMeans
8 | from sklearn.metrics.cluster import normalized_mutual_info_score
9 | from sklearn.metrics.cluster import adjusted_rand_score
10 | import csv
11 | import argparse
12 |
13 | parser = argparse.ArgumentParser(description = 'link prediction task')
14 | parser.add_argument('--C_n', type = int, default = 4,
15 | help = 'number of node class label')
16 | parser.add_argument('--data_path', type = str, default = '../data/academic_test/',
17 | help='path to data')
18 | parser.add_argument('--embed_d', type = int, default = 128,
19 | help = 'embedding dimension')
20 |
21 | args = parser.parse_args()
22 | print(args)
23 |
24 |
25 | def model(cluster_id_num):
26 | cluter_embed = numpy.around(numpy.random.normal(0, 0.01, [cluster_id_num, args.embed_d]), 4)
27 | cluster_embed_f = open(args.data_path + "cluster_embed.txt", "r")
28 | for line in cluster_embed_f:
29 | line=line.strip()
30 | author_index=int(re.split(' ',line)[0])
31 | embed_list=re.split(' ',line)[1:]
32 | for i in range(len(embed_list)):
33 | cluter_embed[author_index][i] = embed_list[i]
34 |
35 | kmeans = KMeans(n_clusters = args.C_n, random_state = 0).fit(cluter_embed)
36 |
37 | cluster_id_list = [0] * cluster_id_num
38 | cluster_id_f = open(args.data_path + "cluster.txt", "r")
39 | for line in cluster_id_f:
40 | line = line.strip()
41 | author_index = int(re.split(',',line)[0])
42 | cluster_id = int(re.split(',',line)[1])
43 | cluster_id_list[author_index] = cluster_id
44 |
45 | #NMI
46 | print ("NMI: " + str(normalized_mutual_info_score(kmeans.labels_, cluster_id_list)))
47 | print ("ARI: " + str(adjusted_rand_score(kmeans.labels_, cluster_id_list)))
48 |
49 |
--------------------------------------------------------------------------------
/LP/MAGNN/model/__init__.py:
--------------------------------------------------------------------------------
1 | from model.MAGNN_nc import MAGNN_nc
2 | from model.MAGNN_nc_mb import MAGNN_nc_mb
3 | from model.MAGNN_lp import MAGNN_lp
4 |
--------------------------------------------------------------------------------
/LP/MAGNN/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/LP/MAGNN/utils/__init__.py
--------------------------------------------------------------------------------
/LP/MAGNN/utils/pytorchtools.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 |
5 | class EarlyStopping:
6 | """Early stops the training if validation loss doesn't improve after a given patience."""
7 | def __init__(self, patience, verbose=False, delta=0, save_path='checkpoint.pt'):
8 | """
9 | Args:
10 | patience (int): How long to wait after last time validation loss improved.
11 | Default: 7
12 | verbose (bool): If True, prints a message for each validation loss improvement.
13 | Default: False
14 | delta (float): Minimum change in the monitored quantity to qualify as an improvement.
15 | Default: 0
16 | """
17 | self.patience = patience
18 | self.verbose = verbose
19 | self.counter = 0
20 | self.best_score = None
21 | self.early_stop = False
22 | self.val_loss_min = np.Inf
23 | self.delta = delta
24 | self.save_path = save_path
25 |
26 | def __call__(self, val_loss, model):
27 |
28 | score = -val_loss
29 |
30 | if self.best_score is None:
31 | self.best_score = score
32 | self.save_checkpoint(val_loss, model)
33 | elif score < self.best_score - self.delta:
34 | self.counter += 1
35 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
36 | if self.counter >= self.patience:
37 | self.early_stop = True
38 | else:
39 | self.best_score = score
40 | self.save_checkpoint(val_loss, model)
41 | self.counter = 0
42 |
43 | def save_checkpoint(self, val_loss, model):
44 | """Saves model when validation loss decrease."""
45 | if self.verbose:
46 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
47 | torch.save(model.state_dict(), self.save_path)
48 | self.val_loss_min = val_loss
49 |
--------------------------------------------------------------------------------
/LP/RGCN-WN18/README.md:
--------------------------------------------------------------------------------
1 | python link_predict.py --dataset=wn18 --gpu=1 --hidden=100
2 | python gnn_link_predict.py --dataset=wn18 --model=GCN --gpu=1
3 | python gnn_link_predict.py --dataset=wn18 --model=GAT --gpu=1
--------------------------------------------------------------------------------
/LP/RGCN-WN18/code/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/LP/RGCN-WN18/code/__init__.py
--------------------------------------------------------------------------------
/LP/RGCN-WN18/code/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/LP/RGCN-WN18/code/scripts/__init__.py
--------------------------------------------------------------------------------
/LP/RGCN-WN18/code/scripts/read_file.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | # Dictionary I/O:
4 |
5 | def read_dictionary(filename, id_lookup=True):
6 | d = {}
7 | for line in open(filename, 'r+'):
8 | line = line.strip().split('\t')
9 |
10 | if id_lookup:
11 | d[int(line[0])] = line[1]
12 | else:
13 | d[line[1]] = int(line[0])
14 |
15 | return d
16 |
17 | # Triplet file I/O:
18 |
19 | def read_triplets(filename):
20 | for line in open(filename, 'r+'):
21 | processed_line = line.strip().split('\t')
22 | yield processed_line
23 |
24 | def read_triplet_file(filename):
25 | return list(read_triplets(filename))
26 |
27 | def read_triplets_as_list(filename, entity_dict, relation_dict):
28 | entity_dict = read_dictionary(entity_dict, id_lookup=False)
29 | relation_dict = read_dictionary(relation_dict, id_lookup=False)
30 |
31 | l = []
32 | for triplet in read_triplets(filename):
33 | entity_1 = entity_dict[triplet[0]]
34 | relation = relation_dict[triplet[1]]
35 | entity_2 = entity_dict[triplet[2]]
36 |
37 | l.append([entity_1, relation, entity_2])
38 |
39 | return l
40 |
41 |
42 | #print(read_triplets_as_list('data/FB15k/freebase_mtr100_mte100-train.txt', 'data/FB15k/entities.dict', 'data/FB15k/relations.dict'))
43 |
--------------------------------------------------------------------------------
/LP/RGCN-WN18/data/wn18/README:
--------------------------------------------------------------------------------
1 | ----------------------------------------------
2 | -- WORDNET TENSOR DATA -- A. Bordes -- 2013 --
3 | ----------------------------------------------
4 |
5 | ------------------
6 | OUTLINE:
7 | 1. Introduction
8 | 2. Content
9 | 3. Data Format
10 | 4. Data Statistics
11 | 5. How to Cite
12 | 6. License
13 | 7. Contact
14 | -------------------
15 |
16 |
17 | 1. INTRODUCTION:
18 |
19 | This WORDNET TENSOR DATA consists of a collection of triplets (synset, relation_type,
20 | triplet) extracted from WordNet 3.0 (http://wordnet.princeton.edu). This data set can
21 | be seen as a 3-mode tensor depicting ternary relationships between synsets.
22 |
23 |
24 | 2. CONTENT:
25 |
26 | The data archive contains 6 files:
27 | - README 3K
28 | - wordnet-mlj12-definitions.txt 4,2M
29 | - wordnet-mlj12-train.txt 4,5M
30 | - wordnet-mlj12-valid.txt 165K
31 | - wordnet-mlj12-test.txt 165K
32 |
33 | The 3 files wordnet-mlj12-*.txt contain the triplets (training, validation
34 | and test sets), while the file wordnet-mlj12-definitions.txt lists the WordNet
35 | synsets definitions.
36 |
37 |
38 | 3. DATA FORMAT
39 |
40 | The definitions file (wordnet-mlj12-definitions.txt) contains one synset
41 | per line with the following format: synset_id (a 8-digit unique identifier)
42 | intelligible name (word+POS_tag+sense_index), definition. The previous 3
43 | pieces of information are separated by a tab ('\t').
44 |
45 | All wordnet-mlj12-*.txt files contain one triplet per line, with 2 synset_ids
46 | and relation type identifier in a tab separated format. The first element is the
47 | synset_id of the left hand side of the relation triple, the third one is the
48 | synset_id of the right hand side and the second element is the name of the type
49 | of relations between them.
50 |
51 |
52 | 4. DATA STATISTICS
53 |
54 | There are 40,943 synsets and 18 relation types among them. The training set contains
55 | 141,442 triplets, the validation set 5,000 and the test set 5,000.
56 |
57 | All triplets are unique and we made sure that all synsets appearing in
58 | the validation or test sets were occurring in the training set.
59 |
60 | 5. HOW TO CITE
61 |
62 | When using this data, one should cite the original paper:
63 | @article{bordes-mlj13,
64 | title = {A Semantic Matching Energy Function for Learning with Multi-relational Data},
65 | author = {Antoine Bordes and Xavier Glorot and Jason Weston and Yoshua Bengio},
66 | journal={Machine Learning},
67 | publisher={Springer},
68 | year={2013},
69 | note={to appear}
70 | }
71 |
72 | One should also point at the project page with either the long URL:
73 | https://www.hds.utc.fr/everest/doku.php?id=en:smemlj12 , or the short
74 | one: http://goo.gl/bHWsK .
75 |
76 | 6. LICENSE:
77 |
78 | WordNet data follows the attach license agreement.
79 |
80 | 7. CONTACT
81 |
82 | For all remarks or questions please contact Antoine Bordes: antoine
83 | (dot) bordes (at) utc (dot) fr .
84 |
85 |
86 |
87 |
--------------------------------------------------------------------------------
/LP/RGCN-WN18/data/wn18/Wordnet3.0-LICENSE:
--------------------------------------------------------------------------------
1 | WordNet Release 3.0 This software and database is being provided to you, the LICENSEE, by Princeton University under the following license. By obtaining, using and/or copying this software and database, you agree that you have read, understood, and will comply with these terms and conditions.: Permission to use, copy, modify and distribute this software and database and its documentation for any purpose and without fee or royalty is hereby granted, provided that you agree to comply with the following copyright notice and statements, including the disclaimer, and that the same appear on ALL copies of the software, database and documentation, including modifications that you make for internal use or for distribution. WordNet 3.0 Copyright 2006 by Princeton University. All rights reserved. THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND PRINCETON UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS. The name of Princeton University or Princeton may not be used in advertising or publicity pertaining to distribution of the software and/or database. Title to copyright in this software, database and any associated documentation shall at all times remain with Princeton University and LICENSEE agrees to preserve same.
--------------------------------------------------------------------------------
/LP/RGCN-WN18/data/wn18/relations.dict:
--------------------------------------------------------------------------------
1 | 0 _member_of_domain_topic
2 | 1 _has_part
3 | 2 _synset_domain_usage_of
4 | 3 _synset_domain_region_of
5 | 4 _member_of_domain_region
6 | 5 _member_meronym
7 | 6 _part_of
8 | 7 _verb_group
9 | 8 _derivationally_related_form
10 | 9 _instance_hypernym
11 | 10 _instance_hyponym
12 | 11 _similar_to
13 | 12 _hyponym
14 | 13 _also_see
15 | 14 _member_of_domain_usage
16 | 15 _synset_domain_topic_of
17 | 16 _member_holonym
18 | 17 _hypernym
19 |
--------------------------------------------------------------------------------
/LP/RGCN/README.md:
--------------------------------------------------------------------------------
1 | # RGCN code
2 |
3 | Adapted from [DGL example](https://github.com/dmlc/dgl/tree/master/examples/pytorch/rgcn).
4 |
5 | We replace the Benchmark.
6 |
7 | ## running environment
8 |
9 | * Python 3.7
10 | * torch 1.7.0
11 | * dgl 0.5.2
12 |
13 | ## running procedure
14 |
15 | * Dataset will be downloaded automatically at ~/.dgl/.
16 | * or you can download data from [tsinghua-cloud](https://cloud.tsinghua.edu.cn/d/8b9644cfa8344f26878c/) or [google-drive](https://drive.google.com/drive/folders/13o5dYuvpZWzgeUPVTLLtpYHAGss2sk_x?usp=sharing)
17 | * unzip all zip files
18 | * move them to ./data/
19 | * cd to RGCN/
20 | * run python file
21 |
22 | ```bash
23 | python3 link_predict.py -d FB15k-237 --gpu 0
24 | python3 gnn_link_predict.py -d FB15k-237 --gpu 0 --model=gcn
25 | python3 gnn_link_predict.py -d FB15k-237 --gpu 0 --model=gat
26 | ```
--------------------------------------------------------------------------------
/LP/benchmark/.gitignore:
--------------------------------------------------------------------------------
1 | data
2 | **/checkpoint
3 |
--------------------------------------------------------------------------------
/LP/benchmark/README.md:
--------------------------------------------------------------------------------
1 | # benchmark
2 |
3 | benchmark data loader and evaluation scripts
4 |
5 | ## data
6 |
7 | Warning: As we have opened test data, you should try not to overfit or leak data during training. For example, the order of test data is not random permuted. If you use BatchNorm, you will get a biased norm value.
8 |
9 | ## data format
10 |
11 | * All ids begin from 0.
12 | * Each node type takes a continuous range of node_id.
13 | * node_id and node_type id are with same order. I.e. nodes with node_type 0 take the first range of node_ids, nodes with node_type 1 take the second range, and so on.
14 | * One-hot node features can be omited.
--------------------------------------------------------------------------------
/LP/benchmark/methods/GATNE/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Yukuo Cen
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/LP/benchmark/methods/GATNE/walk.py:
--------------------------------------------------------------------------------
1 | import random
2 | import multiprocessing
3 |
4 | from tqdm import tqdm
5 |
6 | def walk(args):
7 | walk_length, start, schema = args
8 | # Simulate a random walk starting from start node.
9 | rand = random.Random()
10 |
11 | if schema:
12 | schema_items = schema.split('-')
13 | assert schema_items[0] == schema_items[-1]
14 |
15 | walk = [start]
16 | while len(walk) < walk_length:
17 | cur = walk[-1]
18 | candidates = []
19 | for node in G[cur]:
20 | if schema == '' or node_type[node] == schema_items[len(walk) % (len(schema_items) - 1)]:
21 | candidates.append(node)
22 | if candidates:
23 | walk.append(rand.choice(candidates))
24 | else:
25 | break
26 | return [str(node) for node in walk]
27 |
28 | def initializer(init_G, init_node_type):
29 | global G
30 | G = init_G
31 | global node_type
32 | node_type = init_node_type
33 |
34 | class RWGraph():
35 | def __init__(self, nx_G, node_type_arr=None, num_workers=16):
36 | self.G = nx_G
37 | self.node_type = node_type_arr
38 | self.num_workers = num_workers
39 |
40 | def node_list(self, nodes, num_walks):
41 | for loop in range(num_walks):
42 | for node in nodes:
43 | yield node
44 |
45 | def simulate_walks(self, num_walks, walk_length, schema=None):
46 | all_walks = []
47 | nodes = list(self.G.keys())
48 | random.shuffle(nodes)
49 |
50 | if schema is None or schema=='':
51 | with multiprocessing.Pool(self.num_workers, initializer=initializer, initargs=(self.G, self.node_type)) as pool:
52 | all_walks = list(pool.imap(walk, ((walk_length, node, '') for node in tqdm(self.node_list(nodes, num_walks))), chunksize=256))
53 | else:
54 | schema_list = schema.split(',')
55 | for schema_iter in schema_list:
56 | with multiprocessing.Pool(self.num_workers, initializer=initializer, initargs=(self.G, self.node_type)) as pool:
57 | walks = list(pool.imap(walk, ((walk_length, node, schema_iter) for node in tqdm(self.node_list(nodes, num_walks)) if schema_iter.split('-')[0] == self.node_type[node]), chunksize=512))
58 | all_walks.extend(walks)
59 |
60 | return all_walks
61 |
--------------------------------------------------------------------------------
/LP/benchmark/methods/GNN/README.md:
--------------------------------------------------------------------------------
1 | in dir `GNN`:
2 | * python homoGNN.py --data amazon --model GCN
3 |
4 |
--------------------------------------------------------------------------------
/LP/benchmark/methods/HGT/README.md:
--------------------------------------------------------------------------------
1 | # HGT code
2 |
3 | Adapted from [HGT-DGL](https://github.com/acbull/HGT-DGL).
4 |
5 | ## running environment
6 |
7 | * Python 3.7
8 | * torch 1.7.0
9 | * dgl 0.5.2
10 |
11 | ## running procedure
12 |
13 | * download data from [tsinghua-cloud](https://cloud.tsinghua.edu.cn/d/8b9644cfa8344f26878c/)
14 | * cd to HGT/
15 | * unzip all zip files
16 | * run scripts
17 | * mkdir checkpoint
18 |
19 | ```scripts
20 | sh run_LastFM.sh
21 | sh run_PubMed.sh
22 | sh run_amazon.sh
23 | sh run_LastFM_magnn.sh
24 | ```
--------------------------------------------------------------------------------
/LP/benchmark/methods/HGT/run_LastFM.sh:
--------------------------------------------------------------------------------
1 | CUDA_VISIBLE_DEVICES=0 python link_predict.py --device 0 --use_norm True --dataset LastFM
--------------------------------------------------------------------------------
/LP/benchmark/methods/HGT/run_LastFM_magnn.sh:
--------------------------------------------------------------------------------
1 | CUDA_VISIBLE_DEVICES=0 python link_predict.py --device 0 --use_norm True --dataset LastFM_magnn
--------------------------------------------------------------------------------
/LP/benchmark/methods/HGT/run_PubMed.sh:
--------------------------------------------------------------------------------
1 | CUDA_VISIBLE_DEVICES=0 python link_predict.py --device 0 --use_norm True --dataset PubMed
--------------------------------------------------------------------------------
/LP/benchmark/methods/HGT/run_amazon.sh:
--------------------------------------------------------------------------------
1 | CUDA_VISIBLE_DEVICES=0 python link_predict.py --device 0 --use_norm True --dataset amazon
--------------------------------------------------------------------------------
/LP/benchmark/methods/HGT/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/LP/benchmark/methods/HGT/utils/__init__.py
--------------------------------------------------------------------------------
/LP/benchmark/methods/HGT/utils/data.py:
--------------------------------------------------------------------------------
1 | import networkx as nx
2 | import numpy as np
3 | import scipy
4 | import pickle
5 | import scipy.sparse as sp
6 |
7 | def load_data(prefix='DBLP'):
8 | from data_loader import data_loader
9 | dl = data_loader(prefix)
10 | features = []
11 | for i in range(len(dl.nodes['count'])):
12 | th = dl.nodes['attr'][i]
13 | if th is None:
14 | features.append(sp.eye(dl.nodes['count'][i]))
15 | else:
16 | features.append(th)
17 | adjM = sum(dl.links['data'].values())
18 | return features,\
19 | adjM, \
20 | dl
21 |
--------------------------------------------------------------------------------
/LP/benchmark/methods/HGT/utils/pytorchtools.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 |
5 | class EarlyStopping:
6 | """Early stops the training if validation loss doesn't improve after a given patience."""
7 | def __init__(self, patience, verbose=False, delta=0, save_path='checkpoint.pt'):
8 | """
9 | Args:
10 | patience (int): How long to wait after last time validation loss improved.
11 | Default: 7
12 | verbose (bool): If True, prints a message for each validation loss improvement.
13 | Default: False
14 | delta (float): Minimum change in the monitored quantity to qualify as an improvement.
15 | Default: 0
16 | """
17 | self.patience = patience
18 | self.verbose = verbose
19 | self.counter = 0
20 | self.best_score = None
21 | self.early_stop = False
22 | self.val_loss_min = np.Inf
23 | self.delta = delta
24 | self.save_path = save_path
25 |
26 | def __call__(self, val_loss, model):
27 |
28 | score = -val_loss
29 |
30 | if self.best_score is None:
31 | self.best_score = score
32 | self.save_checkpoint(val_loss, model)
33 | elif score < self.best_score - self.delta:
34 | self.counter += 1
35 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
36 | if self.counter >= self.patience:
37 | self.early_stop = True
38 | else:
39 | self.best_score = score
40 | self.save_checkpoint(val_loss, model)
41 | self.counter = 0
42 |
43 | def save_checkpoint(self, val_loss, model):
44 | """Saves model when validation loss decrease."""
45 | if self.verbose:
46 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
47 | torch.save(model.state_dict(), self.save_path)
48 | self.val_loss_min = val_loss
49 |
--------------------------------------------------------------------------------
/LP/benchmark/methods/HetGNN/DeepWalk.py:
--------------------------------------------------------------------------------
1 | import string;
2 | import re;
3 | import random
4 | import math
5 | import numpy as np
6 | from gensim.models import Word2Vec
7 | from itertools import *
8 | import sys
9 |
10 | dimen = 128
11 | window = 5
12 |
13 |
14 | def read_random_walk_corpus():
15 | walks = []
16 | inputfile = open(sys.path[0] + "/het_random_walk.txt", "r")
17 | for line in inputfile:
18 | path = re.split(' ', line)
19 | walks.append(path)
20 | inputfile.close()
21 | return walks
22 |
23 |
24 | def gen_net_embed():
25 | walk_corpus = read_random_walk_corpus()
26 | model = Word2Vec(walk_corpus, size=dimen, window=window, min_count=0, workers=2, sg=1, hs=0, negative=5)
27 | file_ = sys.path[0] + "/node_net_embedding.txt"
28 | model.wv.save_word2vec_format(file_)
29 | print(f"Generate {file_} done.")
30 |
--------------------------------------------------------------------------------
/LP/benchmark/methods/HetGNN/README.md:
--------------------------------------------------------------------------------
1 | cd HetGNN
2 | python main.py --data amazon
3 | python do_LP.py --data amazon
--------------------------------------------------------------------------------
/LP/benchmark/methods/HetGNN/args.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 |
4 | def read_args():
5 | parser = argparse.ArgumentParser()
6 | parser.add_argument('--data', type=str, default='amazon',
7 | help='select data path')
8 | parser.add_argument('--model_path', type=str, default='../model_save/',
9 | help='path to save model')
10 | parser.add_argument('--in_f_d', type=int, default=128,
11 | help='input feature dimension')
12 | parser.add_argument('--embed_d', type=int, default=128,
13 | help='embedding dimension')
14 | parser.add_argument('--lr', type=int, default=0.01,
15 | help='learning rate')
16 | parser.add_argument('--batch_s', type=int, default=20000,
17 | help='batch size')
18 | parser.add_argument('--mini_batch_s', type=int, default=200,
19 | help='mini batch size')
20 | parser.add_argument('--train_iter_n', type=int, default=310,
21 | help='max number of training iteration')
22 | parser.add_argument('--walk_n', type=int, default=10,
23 | help='number of walk per root node')
24 | parser.add_argument('--walk_L', type=int, default=30,
25 | help='length of each walk')
26 | parser.add_argument('--window', type=int, default=5,
27 | help='window size for relation extration')
28 | parser.add_argument("--random_seed", default=10, type=int)
29 | parser.add_argument('--train_test_label', type=int, default=0,
30 | help='train/test label: 0 - train, 1 - test, 2 - code test/generate negative ids for evaluation')
31 | parser.add_argument('--save_model_freq', type=float, default=10,
32 | help='number of iterations to save model')
33 | parser.add_argument("--cuda", default=0, type=int)
34 | parser.add_argument("--checkpoint", default='', type=str)
35 | parser.add_argument("--feat_type", default=0, type=int,
36 | help='feat_type=0: all id vector'
37 | 'feat_type=1: load feat from data_loader')
38 | args = parser.parse_args()
39 |
40 | return args
41 |
--------------------------------------------------------------------------------
/LP/benchmark/methods/MAGNN/README.md:
--------------------------------------------------------------------------------
1 | ## MAGNN code for benchmark
2 |
3 | We have tried our best to fit MAGNN into benchmark data\_loader, but failed...
4 |
5 | The MAGNN code is toooooooo hard-coded and has many bugs. For example, we need to guarentee every node has at least one neighbour for every meta-path. Moreover, the meta-path generating is soooooo time-consuming.
6 |
7 | We have to fit our data format to the initial MAGNN repo. The code is in MAGNN\_ini folder.
8 |
9 | ## MAGNN
10 |
11 | This repository provides a reference implementation of MAGNN as described in the paper:
12 | > MAGNN: Metapath Aggregated Graph Neural Network for Heterogeneous Graph Embedding.
13 | > Xinyu Fu, Jiani Zhang, Ziqiao Meng, Irwin King.
14 | > The Web Conference, 2020.
15 |
16 | Available at [arXiv:2002.01680](https://arxiv.org/abs/2002.01680).
17 |
18 | ### Dependencies
19 |
20 | Recent versions of the following packages for Python 3 are required:
21 | * PyTorch 1.2.0
22 | * DGL 0.3.1
23 | * NetworkX 2.3
24 | * scikit-learn 0.21.3
25 | * NumPy 1.17.2
26 | * SciPy 1.3.1
27 |
28 | Dependencies for the preprocessing code are not listed here.
29 |
30 | ### Datasets
31 |
32 | The preprocessed datasets are available at:
33 | * IMDb - [Dropbox](https://www.dropbox.com/s/g0btk9ctr1es39x/IMDB_processed.zip?dl=0)
34 | * DBLP - [Dropbox](https://www.dropbox.com/s/yh4grpeks87ugr2/DBLP_processed.zip?dl=0)
35 | * Last.fm - [Dropbox](https://www.dropbox.com/s/jvlbs09pz6zwcka/LastFM_processed.zip?dl=0)
36 |
37 | The GloVe word vectors are obtained from [GloVe](https://nlp.stanford.edu/projects/glove/). Here is [the direct link](http://nlp.stanford.edu/data/glove.6B.zip) for the version we used in DBLP preprocessing.
38 |
39 | ### Usage
40 |
41 | 1. Create `checkpoint/` and `data/preprocessed` directories
42 | 2. Extract the zip file downloaded from the section above to `data/preprocessed`
43 | * E.g., extract the content of `IMDB_processed.zip` to `data/preprocessed/IMDB_processed`
44 | 2. Execute one of the following three commands from the project home directory:
45 | * `python run_IMDB.py`
46 | * `python run_DBLP.py`
47 | * `python run_LastFM.py`
48 |
49 | For more information about the available options of the model, you may check by executing `python run_IMDB.py --help`
50 |
51 | ### Citing
52 |
53 | If you find MAGNN useful in your research, please cite the following paper:
54 |
55 | @inproceedings{fu2020magnn,
56 | title={MAGNN: Metapath Aggregated Graph Neural Network for Heterogeneous Graph Embedding},
57 | author={Xinyu Fu and Jiani Zhang and Ziqiao Meng and Irwin King},
58 | booktitle = {WWW},
59 | year={2020}
60 | }
61 |
--------------------------------------------------------------------------------
/LP/benchmark/methods/MAGNN/model/__init__.py:
--------------------------------------------------------------------------------
1 | from model.MAGNN_nc import MAGNN_nc
2 | from model.MAGNN_nc_mb import MAGNN_nc_mb
3 | from model.MAGNN_lp import MAGNN_lp
4 |
--------------------------------------------------------------------------------
/LP/benchmark/methods/MAGNN/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/LP/benchmark/methods/MAGNN/utils/__init__.py
--------------------------------------------------------------------------------
/LP/benchmark/methods/MAGNN/utils/pytorchtools.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 |
5 | class EarlyStopping:
6 | """Early stops the training if validation loss doesn't improve after a given patience."""
7 | def __init__(self, patience, verbose=False, delta=0, save_path='checkpoint.pt'):
8 | """
9 | Args:
10 | patience (int): How long to wait after last time validation loss improved.
11 | Default: 7
12 | verbose (bool): If True, prints a message for each validation loss improvement.
13 | Default: False
14 | delta (float): Minimum change in the monitored quantity to qualify as an improvement.
15 | Default: 0
16 | """
17 | self.patience = patience
18 | self.verbose = verbose
19 | self.counter = 0
20 | self.best_score = None
21 | self.early_stop = False
22 | self.val_loss_min = np.Inf
23 | self.delta = delta
24 | self.save_path = save_path
25 |
26 | def __call__(self, val_loss, model):
27 |
28 | score = -val_loss
29 |
30 | if self.best_score is None:
31 | self.best_score = score
32 | self.save_checkpoint(val_loss, model)
33 | elif score < self.best_score - self.delta:
34 | self.counter += 1
35 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
36 | if self.counter >= self.patience:
37 | self.early_stop = True
38 | else:
39 | self.best_score = score
40 | self.save_checkpoint(val_loss, model)
41 | self.counter = 0
42 |
43 | def save_checkpoint(self, val_loss, model):
44 | """Saves model when validation loss decrease."""
45 | if self.verbose:
46 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
47 | torch.save(model.state_dict(), self.save_path)
48 | self.val_loss_min = val_loss
49 |
--------------------------------------------------------------------------------
/LP/benchmark/methods/MAGNN_ini/README.md:
--------------------------------------------------------------------------------
1 | # MAGNN for benchmark
2 |
3 | Frist, run trans\_format.py to transform our benchmark data to MAGNN format.
4 |
5 | Then, run ```ipython preprocess_LastFM.ipynb``` (or LastFM\_magnn) to get preprocessed data.
6 |
7 | Next, run ```python run_LastFM.py --save-postfix LastFM``` or (LastFM\_magnn) to train model.
8 |
9 | Last, run ```python test_LastFM.py --save-postfix LastFM``` or (LastFM\_magnn) to test model.
10 |
11 | ## MAGNN
12 |
13 | This repository provides a reference implementation of MAGNN as described in the paper:
14 | > MAGNN: Metapath Aggregated Graph Neural Network for Heterogeneous Graph Embedding.
15 | > Xinyu Fu, Jiani Zhang, Ziqiao Meng, Irwin King.
16 | > The Web Conference, 2020.
17 |
18 | Available at [arXiv:2002.01680](https://arxiv.org/abs/2002.01680).
19 |
20 | ### Dependencies
21 |
22 | Recent versions of the following packages for Python 3 are required:
23 | * PyTorch 1.2.0
24 | * DGL 0.3.1
25 | * NetworkX 2.3
26 | * scikit-learn 0.21.3
27 | * NumPy 1.17.2
28 | * SciPy 1.3.1
29 |
30 | Dependencies for the preprocessing code are not listed here.
31 |
32 | ### Datasets
33 |
34 | The preprocessed datasets are available at:
35 | * IMDb - [Dropbox](https://www.dropbox.com/s/g0btk9ctr1es39x/IMDB_processed.zip?dl=0)
36 | * DBLP - [Dropbox](https://www.dropbox.com/s/yh4grpeks87ugr2/DBLP_processed.zip?dl=0)
37 | * Last.fm - [Dropbox](https://www.dropbox.com/s/jvlbs09pz6zwcka/LastFM_processed.zip?dl=0)
38 |
39 | The GloVe word vectors are obtained from [GloVe](https://nlp.stanford.edu/projects/glove/). Here is [the direct link](http://nlp.stanford.edu/data/glove.6B.zip) for the version we used in DBLP preprocessing.
40 |
41 | ### Usage
42 |
43 | 1. Create `checkpoint/` and `data/preprocessed` directories
44 | 2. Extract the zip file downloaded from the section above to `data/preprocessed`
45 | * E.g., extract the content of `IMDB_processed.zip` to `data/preprocessed/IMDB_processed`
46 | 2. Execute one of the following three commands from the project home directory:
47 | * `python run_IMDB.py`
48 | * `python run_DBLP.py`
49 | * `python run_LastFM.py`
50 |
51 | For more information about the available options of the model, you may check by executing `python run_IMDB.py --help`
52 |
53 | ### Citing
54 |
55 | If you find MAGNN useful in your research, please cite the following paper:
56 |
57 | @inproceedings{fu2020magnn,
58 | title={MAGNN: Metapath Aggregated Graph Neural Network for Heterogeneous Graph Embedding},
59 | author={Xinyu Fu and Jiani Zhang and Ziqiao Meng and Irwin King},
60 | booktitle = {WWW},
61 | year={2020}
62 | }
63 |
--------------------------------------------------------------------------------
/LP/benchmark/methods/MAGNN_ini/model/__init__.py:
--------------------------------------------------------------------------------
1 | from model.MAGNN_nc import MAGNN_nc
2 | from model.MAGNN_nc_mb import MAGNN_nc_mb
3 | from model.MAGNN_lp import MAGNN_lp
4 |
--------------------------------------------------------------------------------
/LP/benchmark/methods/MAGNN_ini/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/LP/benchmark/methods/MAGNN_ini/utils/__init__.py
--------------------------------------------------------------------------------
/LP/benchmark/methods/MAGNN_ini/utils/pytorchtools.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 |
5 | class EarlyStopping:
6 | """Early stops the training if validation loss doesn't improve after a given patience."""
7 | def __init__(self, patience, verbose=False, delta=0, save_path='checkpoint.pt'):
8 | """
9 | Args:
10 | patience (int): How long to wait after last time validation loss improved.
11 | Default: 7
12 | verbose (bool): If True, prints a message for each validation loss improvement.
13 | Default: False
14 | delta (float): Minimum change in the monitored quantity to qualify as an improvement.
15 | Default: 0
16 | """
17 | self.patience = patience
18 | self.verbose = verbose
19 | self.counter = 0
20 | self.best_score = None
21 | self.early_stop = False
22 | self.val_loss_min = np.Inf
23 | self.delta = delta
24 | self.save_path = save_path
25 |
26 | def __call__(self, val_loss, model):
27 |
28 | score = -val_loss
29 |
30 | if self.best_score is None:
31 | self.best_score = score
32 | self.save_checkpoint(val_loss, model)
33 | elif score < self.best_score - self.delta:
34 | self.counter += 1
35 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
36 | if self.counter >= self.patience:
37 | self.early_stop = True
38 | else:
39 | self.best_score = score
40 | self.save_checkpoint(val_loss, model)
41 | self.counter = 0
42 |
43 | def save_checkpoint(self, val_loss, model):
44 | """Saves model when validation loss decrease."""
45 | if self.verbose:
46 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
47 | torch.save(model.state_dict(), self.save_path)
48 | self.val_loss_min = val_loss
49 |
--------------------------------------------------------------------------------
/LP/benchmark/methods/RGCN/README.md:
--------------------------------------------------------------------------------
1 | # RGCN code
2 |
3 | Adapted from [DGL example](https://github.com/dmlc/dgl/tree/master/examples/pytorch/rgcn).
4 |
5 | We replace the GNN module in paper by GCN for comparison.
6 |
7 | ## running environment
8 |
9 | * Python 3.7
10 | * torch 1.7.0
11 | * dgl 0.5.2
12 | * nvidia-ml-py3 7.352.0
13 |
14 | ## running procedure
15 |
16 | * Dataset will be downloaded automatically at ~/.dgl/.
17 | * or you can download data from [tsinghua-cloud](https://cloud.tsinghua.edu.cn/d/8b9644cfa8344f26878c/)
18 | * unzip all zip files
19 | * move them to ~/.dgl/
20 | * cd to RGCN/
21 | * run python file
22 |
23 | ```bash
24 | python link_predict.py --dataset=LastFM
25 | python link_predict.py --dataset=amazon --hidden-dim=60
26 | python link_predict.py --dataset=PubMed --hidden-dim=60
27 | python link_predict.py --dataset=LastFM_magnn
28 | ```
--------------------------------------------------------------------------------
/LP/benchmark/methods/baseline/README.md:
--------------------------------------------------------------------------------
1 | # Simple-HGN for HGB
2 |
3 |
4 | ```
5 | python run_new.py --dataset LastFM
6 | python run_dist.py --dataset amazon
7 | python run_dist.py --dataset PubMed --batch-size 8192
8 | ```
9 |
10 | ## running environment
11 |
12 | * torch 1.6.0 cuda 10.1
13 | * dgl 0.4.3 cuda 10.1
14 | * networkx 2.3
15 | * scikit-learn 0.23.2
16 | * scipy 1.5.2
17 |
--------------------------------------------------------------------------------
/LP/benchmark/methods/baseline/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/LP/benchmark/methods/baseline/utils/__init__.py
--------------------------------------------------------------------------------
/LP/benchmark/methods/baseline/utils/data.py:
--------------------------------------------------------------------------------
1 | import networkx as nx
2 | import numpy as np
3 | import scipy
4 | import pickle
5 | import scipy.sparse as sp
6 |
7 | def load_data(prefix='DBLP'):
8 | from scripts.data_loader import data_loader
9 | dl = data_loader('../../data/'+prefix)
10 | features = []
11 | for i in range(len(dl.nodes['count'])):
12 | th = dl.nodes['attr'][i]
13 | if th is None:
14 | features.append(sp.eye(dl.nodes['count'][i]))
15 | else:
16 | features.append(th)
17 | adjM = sum(dl.links['data'].values())
18 | return features,\
19 | adjM, \
20 | dl
21 |
--------------------------------------------------------------------------------
/LP/benchmark/methods/baseline/utils/pytorchtools.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 |
5 | class EarlyStopping:
6 | """Early stops the training if validation loss doesn't improve after a given patience."""
7 | def __init__(self, patience, verbose=False, delta=0, save_path='checkpoint.pt'):
8 | """
9 | Args:
10 | patience (int): How long to wait after last time validation loss improved.
11 | Default: 7
12 | verbose (bool): If True, prints a message for each validation loss improvement.
13 | Default: False
14 | delta (float): Minimum change in the monitored quantity to qualify as an improvement.
15 | Default: 0
16 | """
17 | self.patience = patience
18 | self.verbose = verbose
19 | self.counter = 0
20 | self.best_score = None
21 | self.early_stop = False
22 | self.val_loss_min = np.Inf
23 | self.delta = delta
24 | self.save_path = save_path
25 |
26 | def __call__(self, val_loss, model):
27 |
28 | score = -val_loss
29 |
30 | if self.best_score is None:
31 | self.best_score = score
32 | self.save_checkpoint(val_loss, model)
33 | elif score < self.best_score - self.delta:
34 | self.counter += 1
35 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
36 | if self.counter >= self.patience:
37 | self.early_stop = True
38 | else:
39 | self.best_score = score
40 | self.save_checkpoint(val_loss, model)
41 | self.counter = 0
42 |
43 | def save_checkpoint(self, val_loss, model):
44 | """Saves model when validation loss decrease."""
45 | if self.verbose:
46 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
47 | torch.save(model.state_dict(), self.save_path)
48 | self.val_loss_min = val_loss
49 |
--------------------------------------------------------------------------------
/LP/benchmark/scripts/README.md:
--------------------------------------------------------------------------------
1 | ## Evaluate AUC and MRR with prediction files.
2 | ```bash
3 | python LP_AUC_MRR.py --pred_zip lp.zip --log out.log
4 | ```
--------------------------------------------------------------------------------
/LP/benchmark/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/LP/benchmark/scripts/__init__.py
--------------------------------------------------------------------------------
/NC/GTN/.gitignore:
--------------------------------------------------------------------------------
1 | data/*
2 | .vscode/*
--------------------------------------------------------------------------------
/NC/GTN/GNN.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import dgl
3 |
4 | from dgl.nn.pytorch import GraphConv
5 |
6 | import dgl.function as fn
7 | from dgl.nn.pytorch import edge_softmax, GATConv
8 |
9 |
10 | class GAT(nn.Module):
11 | def __init__(self,
12 | g,
13 | num_layers,
14 | in_dim,
15 | num_hidden,
16 | num_classes,
17 | heads,
18 | activation,
19 | feat_drop,
20 | attn_drop,
21 | negative_slope,
22 | residual):
23 | super(GAT, self).__init__()
24 | self.g = g
25 | self.num_layers = num_layers
26 | self.gat_layers = nn.ModuleList()
27 | self.activation = activation
28 | # input projection (no residual)
29 | self.gat_layers.append(GATConv(
30 | in_dim, num_hidden, heads[0],
31 | feat_drop, attn_drop, negative_slope, False, self.activation))
32 | # hidden layers
33 | for l in range(1, num_layers):
34 | # due to multi-head, the in_dim = num_hidden * num_heads
35 | self.gat_layers.append(GATConv(
36 | num_hidden * heads[l-1], num_hidden, heads[l],
37 | feat_drop, attn_drop, negative_slope, residual, self.activation))
38 | # output projection
39 | self.gat_layers.append(GATConv(
40 | num_hidden * heads[-2], num_classes, heads[-1],
41 | feat_drop, attn_drop, negative_slope, residual, None))
42 |
43 | def forward(self, inputs):
44 | h = inputs
45 | for l in range(self.num_layers):
46 | h = self.gat_layers[l](self.g, h).flatten(1)
47 | # output projection
48 | logits = self.gat_layers[-1](self.g, h).mean(1)
49 | return logits
50 |
51 | class GCN(nn.Module):
52 | def __init__(self,
53 | g,
54 | in_feats,
55 | n_hidden,
56 | n_classes,
57 | n_layers,
58 | activation,
59 | dropout):
60 | super(GCN, self).__init__()
61 | self.g = g
62 | self.layers = nn.ModuleList()
63 | # input layer
64 | self.layers.append(GraphConv(in_feats, n_hidden, activation=activation))
65 | # hidden layers
66 | for i in range(n_layers - 1):
67 | self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation))
68 | # output layer
69 | self.layers.append(GraphConv(n_hidden, n_classes))
70 | self.dropout = nn.Dropout(p=dropout)
71 |
72 | def forward(self, features):
73 | h = features
74 | for i, layer in enumerate(self.layers):
75 | if i != 0:
76 | h = self.dropout(h)
77 | h = layer(self.g, h)
78 | return h
79 |
--------------------------------------------------------------------------------
/NC/GTN/GTN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/GTN/GTN.png
--------------------------------------------------------------------------------
/NC/GTN/inits.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 |
4 | def uniform(size, tensor):
5 | bound = 1.0 / math.sqrt(size)
6 | if tensor is not None:
7 | tensor.data.uniform_(-bound, bound)
8 |
9 |
10 | def kaiming_uniform(tensor, fan, a):
11 | bound = math.sqrt(6 / ((1 + a**2) * fan))
12 | if tensor is not None:
13 | tensor.data.uniform_(-bound, bound)
14 |
15 |
16 | def glorot(tensor):
17 | stdv = math.sqrt(6.0 / (tensor.size(-2) + tensor.size(-1)))
18 | if tensor is not None:
19 | tensor.data.uniform_(-stdv, stdv)
20 |
21 |
22 | def zeros(tensor):
23 | if tensor is not None:
24 | tensor.data.fill_(0)
25 |
26 |
27 | def ones(tensor):
28 | if tensor is not None:
29 | tensor.data.fill_(1)
30 |
31 |
32 | def reset(nn):
33 | def _reset(item):
34 | if hasattr(item, 'reset_parameters'):
35 | item.reset_parameters()
36 |
37 | if nn is not None:
38 | if hasattr(nn, 'children') and len(list(nn.children())) > 0:
39 | for item in nn.children():
40 | _reset(item)
41 | else:
42 | _reset(nn)
--------------------------------------------------------------------------------
/NC/HAN/README.md:
--------------------------------------------------------------------------------
1 | # HAN code
2 |
3 | Adapted from [dgl/han](https://github.com/dmlc/dgl/tree/master/examples/pytorch/han).
4 |
5 | We add GCN and GAT comparison under --hetero setting.
6 |
7 | ## running environment
8 |
9 | * Python 3.8.5
10 | * torch 1.4.0 cuda 10.1
11 | * dgl 0.5.2 cuda 10.1
12 |
13 | ## running procedure
14 |
15 | * Download ACM.mat from [tsinghua-cloud](https://cloud.tsinghua.edu.cn/d/0e784c52a6084b59bdee/files/?p=%2FDGL%E4%BB%A3%E7%A0%81%E7%89%88%E6%9C%AC%2FACM.mat) or [google-drive](https://drive.google.com/file/d/1NVT_IHhPDS8dwMmsrnTRHj90F7OZu0WY/view?usp=sharing)
16 | * Move ACM.dat to the current directory
17 | * run main.py
18 |
19 | ```bash
20 | python main.py --model gcn
21 | python main.py --model gat
22 | python main.py --model han
23 | ```
24 |
25 | ## performance report
26 |
27 | | | micro f1 score | macro f1 score |
28 | |--------------------|----------------|----------------|
29 | | Softmax regression | 89.66 | 89.62 |
30 | | HAN | 91.90 | 91.95 |
31 | | GCN | 92.79 | **92.87** |
32 | | GAT | **92.83** | 92.86 |
33 |
34 | ***The following content is from the initial dgl/han repo.***
35 |
36 | # Heterogeneous Graph Attention Network (HAN) with DGL
37 |
38 | This is an attempt to implement HAN with DGL's latest APIs for heterogeneous graphs.
39 | The authors' implementation can be found [here](https://github.com/Jhy1993/HAN).
40 |
41 | ## Usage
42 |
43 | `python main.py` for reproducing HAN's work on their dataset.
44 |
45 | `python main.py --hetero` for reproducing HAN's work on DGL's own dataset from
46 | [here](https://github.com/Jhy1993/HAN/tree/master/data/acm). The dataset is noisy
47 | because there are same author occurring multiple times as different nodes.
48 |
49 | ## Performance
50 |
51 | Reference performance numbers for the ACM dataset:
52 |
53 | | | micro f1 score | macro f1 score |
54 | | ------------------- | -------------- | -------------- |
55 | | Paper | 89.22 | 89.40 |
56 | | DGL | 88.99 | 89.02 |
57 | | Softmax regression (own dataset) | 89.66 | 89.62 |
58 | | DGL (own dataset) | 91.51 | 91.66 |
59 |
60 | We ran a softmax regression to check the easiness of our own dataset. HAN did show some improvements.
61 |
--------------------------------------------------------------------------------
/NC/HetGNN/code/DeepWalk.py:
--------------------------------------------------------------------------------
1 | import string;
2 | import re;
3 | import random
4 | import math
5 | import numpy as np
6 | from gensim.models import Word2Vec
7 | from itertools import *
8 | dimen = 128
9 | window = 5
10 |
11 |
12 | def read_random_walk_corpus():
13 | walks=[]
14 | #inputfile = open("../data/academic_test/meta_random_walk_APVPA_test.txt","r")
15 | inputfile = open("../data/academic_test/het_random_walk_test.txt", "r")
16 | for line in inputfile:
17 | path = []
18 | node_list=re.split(' ',line)
19 | for i in range(len(node_list)):
20 | path.append(node_list[i])
21 | walks.append(path)
22 | inputfile.close()
23 | return walks
24 |
25 |
26 | walk_corpus = read_random_walk_corpus()
27 | model = Word2Vec(walk_corpus, size = dimen, window = window, min_count = 0, workers = 2, sg = 1, hs = 0, negative = 5)
28 |
29 |
30 | print("Output...")
31 | #model.wv.save_word2vec_format("../data/node_embedding.txt")
32 | model.wv.save_word2vec_format("../data/academic_test/node_net_embedding.txt")
33 |
34 |
--------------------------------------------------------------------------------
/NC/HetGNN/code/args.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | def read_args():
4 | parser = argparse.ArgumentParser()
5 | parser.add_argument('--data_path', type = str, default = '../data/academic_test/',
6 | help='path to data')
7 | parser.add_argument('--model_path', type = str, default = '../model_save/',
8 | help='path to save model')
9 | parser.add_argument('--A_n', type = int, default = 28646,
10 | help = 'number of author node')
11 | parser.add_argument('--P_n', type = int, default = 21044,
12 | help = 'number of paper node')
13 | parser.add_argument('--V_n', type = int, default = 18,
14 | help = 'number of venue node')
15 | parser.add_argument('--in_f_d', type = int, default = 128,
16 | help = 'input feature dimension')
17 | parser.add_argument('--embed_d', type = int, default = 128,
18 | help = 'embedding dimension')
19 | parser.add_argument('--lr', type = int, default = 0.001,
20 | help = 'learning rate')
21 | parser.add_argument('--batch_s', type = int, default = 20000,
22 | help = 'batch size')
23 | parser.add_argument('--mini_batch_s', type = int, default = 200,
24 | help = 'mini batch size')
25 | parser.add_argument('--train_iter_n', type = int, default = 50,
26 | help = 'max number of training iteration')
27 | parser.add_argument('--walk_n', type = int, default = 10,
28 | help='number of walk per root node')
29 | parser.add_argument('--walk_L', type = int, default = 30,
30 | help='length of each walk')
31 | parser.add_argument('--window', type = int, default = 5,
32 | help='window size for relation extration')
33 | parser.add_argument("--random_seed", default = 10, type = int)
34 | parser.add_argument('--train_test_label', type= int, default = 0,
35 | help='train/test label: 0 - train, 1 - test, 2 - code test/generate negative ids for evaluation')
36 | parser.add_argument('--save_model_freq', type = float, default = 2,
37 | help = 'number of iterations to save model')
38 | parser.add_argument("--cuda", default = 0, type = int)
39 | parser.add_argument("--checkpoint", default = '', type=str)
40 |
41 | args = parser.parse_args()
42 |
43 | return args
44 |
--------------------------------------------------------------------------------
/NC/HetGNN/code/node_classification_model.py:
--------------------------------------------------------------------------------
1 | import random
2 | import string
3 | import re
4 | import numpy
5 | from itertools import *
6 | import sklearn
7 | from sklearn import linear_model
8 | import sklearn.metrics as Metric
9 | import csv
10 | import argparse
11 |
12 | parser = argparse.ArgumentParser(description = 'link prediction task')
13 | parser.add_argument('--A_n', type = int, default = 28646,
14 | help = 'number of author node')
15 | parser.add_argument('--P_n', type = int, default = 21044,
16 | help = 'number of paper node')
17 | parser.add_argument('--V_n', type = int, default = 18,
18 | help = 'number of venue node')
19 | parser.add_argument('--data_path', type = str, default = '../data/academic_test/',
20 | help='path to data')
21 | parser.add_argument('--embed_d', type = int, default = 128,
22 | help = 'embedding dimension')
23 |
24 | args = parser.parse_args()
25 | print(args)
26 |
27 |
28 | def load_data(data_file_name, n_features, n_samples):
29 | with open(data_file_name) as f:
30 | data_file = csv.reader(f)
31 | data = numpy.empty((n_samples, n_features))
32 | for i, d in enumerate(data_file):
33 | data[i] = numpy.asarray(d[:], dtype=numpy.float)
34 | f.close ()
35 |
36 | return data
37 |
38 |
39 | def model(train_num, test_num):
40 | train_data_f = args.data_path + "train_class_feature.txt"
41 | train_data = load_data(train_data_f, args.embed_d + 2, train_num)
42 | train_features = train_data.astype(numpy.float32)[:,2:-1]
43 | train_target = train_data.astype(numpy.float32)[:,1]
44 |
45 | #print(train_target[1])
46 | learner = linear_model.LogisticRegression()
47 | learner.fit(train_features, train_target)
48 | train_features = None
49 | train_target = None
50 |
51 | print("training finish!")
52 |
53 | test_data_f = args.data_path + "test_class_feature.txt"
54 | test_data = load_data(test_data_f, args.embed_d + 2, test_num)
55 | test_id = test_data.astype(numpy.int32)[:,0]
56 | test_features = test_data.astype(numpy.float32)[:,2:-1]
57 | test_target = test_data.astype(numpy.float32)[:,1]
58 | test_predict = learner.predict(test_features)
59 | test_features = None
60 |
61 | print("test prediction finish!")
62 |
63 | output_f = open(args.data_path + "NC_prediction.txt", "w")
64 | for i in range(len(test_predict)):
65 | output_f.write('%d,%lf\n'%(test_id[i],test_predict[i]))
66 | output_f.close()
67 |
68 | print ("MicroF1: ")
69 | print (sklearn.metrics.f1_score(test_target,test_predict,average='micro'))
70 |
71 | print("MacroF1: ")
72 | print(sklearn.metrics.f1_score(test_target, test_predict, average='macro'))
73 |
74 |
75 |
76 |
77 |
--------------------------------------------------------------------------------
/NC/HetGNN/code/node_clustering_model.py:
--------------------------------------------------------------------------------
1 | import random
2 | import string
3 | import re
4 | import numpy
5 | from itertools import *
6 | import sklearn
7 | from sklearn.cluster import KMeans
8 | from sklearn.metrics.cluster import normalized_mutual_info_score
9 | from sklearn.metrics.cluster import adjusted_rand_score
10 | import csv
11 | import argparse
12 |
13 | parser = argparse.ArgumentParser(description = 'link prediction task')
14 | parser.add_argument('--C_n', type = int, default = 4,
15 | help = 'number of node class label')
16 | parser.add_argument('--data_path', type = str, default = '../data/academic_test/',
17 | help='path to data')
18 | parser.add_argument('--embed_d', type = int, default = 128,
19 | help = 'embedding dimension')
20 |
21 | args = parser.parse_args()
22 | print(args)
23 |
24 |
25 | def model(cluster_id_num):
26 | cluter_embed = numpy.around(numpy.random.normal(0, 0.01, [cluster_id_num, args.embed_d]), 4)
27 | cluster_embed_f = open(args.data_path + "cluster_embed.txt", "r")
28 | for line in cluster_embed_f:
29 | line=line.strip()
30 | author_index=int(re.split(' ',line)[0])
31 | embed_list=re.split(' ',line)[1:]
32 | for i in range(len(embed_list)):
33 | cluter_embed[author_index][i] = embed_list[i]
34 |
35 | kmeans = KMeans(n_clusters = args.C_n, random_state = 0).fit(cluter_embed)
36 |
37 | cluster_id_list = [0] * cluster_id_num
38 | cluster_id_f = open(args.data_path + "cluster.txt", "r")
39 | for line in cluster_id_f:
40 | line = line.strip()
41 | author_index = int(re.split(',',line)[0])
42 | cluster_id = int(re.split(',',line)[1])
43 | cluster_id_list[author_index] = cluster_id
44 |
45 | #NMI
46 | print ("NMI: " + str(normalized_mutual_info_score(kmeans.labels_, cluster_id_list)))
47 | print ("ARI: " + str(adjusted_rand_score(kmeans.labels_, cluster_id_list)))
48 |
49 |
--------------------------------------------------------------------------------
/NC/MAGNN/.gitignore:
--------------------------------------------------------------------------------
1 | data/*
2 | checkpoint/*
3 |
--------------------------------------------------------------------------------
/NC/MAGNN/model/__init__.py:
--------------------------------------------------------------------------------
1 | from model.MAGNN_nc import MAGNN_nc
2 | from model.MAGNN_nc_mb import MAGNN_nc_mb
3 | from model.MAGNN_lp import MAGNN_lp
4 |
--------------------------------------------------------------------------------
/NC/MAGNN/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/MAGNN/utils/__init__.py
--------------------------------------------------------------------------------
/NC/MAGNN/utils/pytorchtools.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 |
5 | class EarlyStopping:
6 | """Early stops the training if validation loss doesn't improve after a given patience."""
7 | def __init__(self, patience, verbose=False, delta=0, save_path='checkpoint.pt'):
8 | """
9 | Args:
10 | patience (int): How long to wait after last time validation loss improved.
11 | Default: 7
12 | verbose (bool): If True, prints a message for each validation loss improvement.
13 | Default: False
14 | delta (float): Minimum change in the monitored quantity to qualify as an improvement.
15 | Default: 0
16 | """
17 | self.patience = patience
18 | self.verbose = verbose
19 | self.counter = 0
20 | self.best_score = None
21 | self.early_stop = False
22 | self.val_loss_min = np.Inf
23 | self.delta = delta
24 | self.save_path = save_path
25 |
26 | def __call__(self, val_loss, model):
27 |
28 | score = -val_loss
29 |
30 | if self.best_score is None:
31 | self.best_score = score
32 | self.save_checkpoint(val_loss, model)
33 | elif score < self.best_score - self.delta:
34 | self.counter += 1
35 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
36 | if self.counter >= self.patience:
37 | self.early_stop = True
38 | else:
39 | self.best_score = score
40 | self.save_checkpoint(val_loss, model)
41 | self.counter = 0
42 |
43 | def save_checkpoint(self, val_loss, model):
44 | """Saves model when validation loss decrease."""
45 | if self.verbose:
46 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
47 | torch.save(model.state_dict(), self.save_path)
48 | self.val_loss_min = val_loss
49 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = '1.0.0'
2 |
3 | __all__ = ['__version__']
4 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .data import Data
2 | from .batch import Batch
3 | from .dataset import Dataset
4 | from .in_memory_dataset import InMemoryDataset
5 | from .dataloader import DataLoader, DenseDataLoader
6 | from .download import download_url
7 | from .extract import extract_tar, extract_zip, extract_gz
8 |
9 | __all__ = [
10 | 'Data',
11 | 'Batch',
12 | 'Dataset',
13 | 'InMemoryDataset',
14 | 'DataLoader',
15 | 'DenseDataLoader',
16 | 'download_url',
17 | 'extract_tar',
18 | 'extract_zip',
19 | 'extract_gz',
20 | ]
21 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/data/batch.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch_geometric.data import Data
3 |
4 |
5 | class Batch(Data):
6 | def __init__(self, batch=None, **kwargs):
7 | super(Batch, self).__init__(**kwargs)
8 | self.batch = batch
9 |
10 | @staticmethod
11 | def from_data_list(data_list):
12 | """"""
13 | keys = [set(data.keys) for data in data_list]
14 | keys = list(set.union(*keys))
15 | assert 'batch' not in keys
16 |
17 | batch = Batch()
18 |
19 | for key in keys:
20 | batch[key] = []
21 | batch.batch = []
22 |
23 | cumsum = 0
24 | for i, data in enumerate(data_list):
25 | num_nodes = data.num_nodes
26 | batch.batch.append(torch.full((num_nodes, ), i, dtype=torch.long))
27 | for key in data.keys:
28 | item = data[key]
29 | item = item + cumsum if batch.cumsum(key, item) else item
30 | batch[key].append(item)
31 | cumsum += num_nodes
32 |
33 | for key in keys:
34 | batch[key] = torch.cat(
35 | batch[key], dim=data_list[0].cat_dim(key, batch[key][0]))
36 | batch.batch = torch.cat(batch.batch, dim=-1)
37 | return batch.contiguous()
38 |
39 | def cumsum(self, key, item):
40 | return item.dim() > 1 and item.dtype == torch.long
41 |
42 | @property
43 | def num_graphs(self):
44 | """"""
45 | return self.batch[-1].item() + 1
46 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/data/dataloader.py:
--------------------------------------------------------------------------------
1 | import torch.utils.data
2 | from torch.utils.data.dataloader import default_collate
3 |
4 | from torch_geometric.data import Batch
5 |
6 |
7 | class DataLoader(torch.utils.data.DataLoader):
8 | def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs):
9 | super(DataLoader, self).__init__(
10 | dataset,
11 | batch_size,
12 | shuffle,
13 | collate_fn=lambda batch: Batch.from_data_list(batch),
14 | **kwargs)
15 |
16 |
17 | class DenseDataLoader(torch.utils.data.DataLoader):
18 | def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs):
19 | def dense_collate(data_list):
20 | batch = Batch()
21 | for key in data_list[0].keys:
22 | batch[key] = default_collate([d[key] for d in data_list])
23 | return batch
24 |
25 | super(DenseDataLoader, self).__init__(
26 | dataset, batch_size, shuffle, collate_fn=dense_collate, **kwargs)
27 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/data/dataset.py:
--------------------------------------------------------------------------------
1 | import collections
2 | import os.path as osp
3 |
4 | import torch.utils.data
5 |
6 | from .makedirs import makedirs
7 |
8 |
9 | def to_list(x):
10 | if not isinstance(x, collections.Iterable) or isinstance(x, str):
11 | x = [x]
12 | return x
13 |
14 |
15 | def files_exist(files):
16 | return all([osp.exists(f) for f in files])
17 |
18 |
19 | class Dataset(torch.utils.data.Dataset):
20 | @property
21 | def raw_file_names(self):
22 | """"""
23 | raise NotImplementedError
24 |
25 | @property
26 | def processed_file_names(self):
27 | """"""
28 | raise NotImplementedError
29 |
30 | def download(self):
31 | """"""
32 | raise NotImplementedError
33 |
34 | def process(self):
35 | """"""
36 | raise NotImplementedError
37 |
38 | def __len__(self):
39 | """"""
40 | raise NotImplementedError
41 |
42 | def get(self, idx):
43 | """"""
44 | raise NotImplementedError
45 |
46 | def __init__(self,
47 | root,
48 | transform=None,
49 | pre_transform=None,
50 | pre_filter=None):
51 | super(Dataset, self).__init__()
52 |
53 | self.root = osp.expanduser(osp.normpath(root))
54 | self.raw_dir = osp.join(self.root, 'raw')
55 | self.processed_dir = osp.join(self.root, 'processed')
56 | self.transform = transform
57 | self.pre_transform = pre_transform
58 | self.pre_filter = pre_filter
59 |
60 | self._download()
61 | self._process()
62 |
63 | @property
64 | def num_features(self):
65 | """"""
66 | return self[0].num_features
67 |
68 | @property
69 | def raw_paths(self):
70 | files = to_list(self.raw_file_names)
71 | return [osp.join(self.raw_dir, f) for f in files]
72 |
73 | @property
74 | def processed_paths(self):
75 | files = to_list(self.processed_file_names)
76 | return [osp.join(self.processed_dir, f) for f in files]
77 |
78 | def _download(self):
79 | if files_exist(self.raw_paths): # pragma: no cover
80 | return
81 |
82 | makedirs(self.raw_dir)
83 | self.download()
84 |
85 | def _process(self):
86 | if files_exist(self.processed_paths): # pragma: no cover
87 | return
88 |
89 | print('Processing...')
90 |
91 | makedirs(self.processed_dir)
92 | self.process()
93 |
94 | print('Done!')
95 |
96 | def __getitem__(self, idx): # pragma: no cover
97 | data = self.get(idx)
98 | data = data if self.transform is None else self.transform(data)
99 | return data
100 |
101 | def __repr__(self): # pragma: no cover
102 | return '{}({})'.format(self.__class__.__name__, len(self))
103 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/data/download.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import os.path as osp
4 | from six.moves import urllib
5 |
6 | from .makedirs import makedirs
7 |
8 |
9 | def download_url(url, folder, log=True):
10 | if log:
11 | print('Downloading', url)
12 |
13 | makedirs(folder)
14 |
15 | data = urllib.request.urlopen(url)
16 | filename = url.rpartition('/')[2]
17 | path = osp.join(folder, filename)
18 |
19 | with open(path, 'wb') as f:
20 | f.write(data.read())
21 |
22 | return path
23 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/data/extract.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import os.path as osp
4 | import tarfile
5 | import zipfile
6 | import gzip
7 | import shutil
8 |
9 |
10 | def maybe_log(path, log=True):
11 | if log:
12 | print('Extracting', path)
13 |
14 |
15 | def extract_tar(path, folder, mode='r:gz', log=True):
16 | maybe_log(path, log)
17 | with tarfile.open(path, mode) as f:
18 | f.extractall(folder)
19 |
20 |
21 | def extract_zip(path, folder, log=True):
22 | maybe_log(path, log)
23 | with zipfile.ZipFile(path, 'r') as f:
24 | f.extractall(folder)
25 |
26 |
27 | def extract_gz(path, folder, name, log=True):
28 | maybe_log(path, log)
29 | with gzip.open(path, 'rb') as f_in:
30 | with open(osp.join(folder, name), 'wb') as f_out:
31 | shutil.copyfileobj(f_in, f_out)
32 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/data/makedirs.py:
--------------------------------------------------------------------------------
1 | import os
2 | import os.path as osp
3 | import errno
4 |
5 |
6 | def makedirs(path):
7 | try:
8 | os.makedirs(osp.expanduser(osp.normpath(path)))
9 | except OSError as e:
10 | if e.errno != errno.EEXIST and osp.isdir(path):
11 | raise e
12 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .entities import Entities
2 |
3 |
4 | __all__ = [
5 | 'Entities'
6 | ]
7 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/nn/__init__.py:
--------------------------------------------------------------------------------
1 | from .conv import * # noqa
2 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/nn/conv/__init__.py:
--------------------------------------------------------------------------------
1 | from .message_passing import MessagePassing
2 | from .nn_conv import NNConv
3 | from .relation_conv import RelationConv
4 | __all__ = [
5 | 'MessagePassing',
6 | 'NNConv',
7 | 'RelationConv',
8 | ]
9 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/nn/conv/message_passing.py:
--------------------------------------------------------------------------------
1 | import inspect
2 |
3 | import torch
4 | from torch_geometric.utils import scatter_
5 |
6 |
7 | class MessagePassing(torch.nn.Module):
8 | def __init__(self, aggr='add'):
9 | super(MessagePassing, self).__init__()
10 |
11 | self.message_args = inspect.getargspec(self.message)[0][1:]
12 | self.update_args = inspect.getargspec(self.update)[0][2:]
13 |
14 | def propagate(self, aggr, edge_index, **kwargs):
15 | assert aggr in ['add', 'mean', 'max']
16 | kwargs['edge_index'] = edge_index
17 |
18 | size = None
19 | message_args = []
20 | for arg in self.message_args:
21 | if arg[-2:] == '_i':
22 | tmp = kwargs[arg[:-2]]
23 | size = tmp.size(0)
24 | message_args.append(tmp[edge_index[0]])
25 | elif arg[-2:] == '_j':
26 | tmp = kwargs[arg[:-2]]
27 | size = tmp.size(0)
28 | message_args.append(tmp[edge_index[1]])
29 | else:
30 | message_args.append(kwargs[arg])
31 |
32 | update_args = [kwargs[arg] for arg in self.update_args]
33 |
34 | out = self.message(*message_args)
35 | out = scatter_(aggr, out, edge_index[0], dim_size=size)
36 | out = self.update(out, *update_args)
37 |
38 | return out
39 |
40 | def message(self, x_j): # pragma: no cover
41 | return x_j
42 |
43 | def update(self, aggr_out): # pragma: no cover
44 | return aggr_out
45 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/nn/conv/nn_conv.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.nn import Parameter
3 | from torch_geometric.nn.conv import MessagePassing
4 |
5 | from ..inits import reset, uniform
6 |
7 |
8 | class NNConv(MessagePassing):
9 | def __init__(self,
10 | in_channels,
11 | out_channels,
12 | nn,
13 | aggr="add",
14 | root_weight=False,
15 | bias=False):
16 | super(NNConv, self).__init__()
17 |
18 | self.in_channels = in_channels
19 | self.out_channels = out_channels
20 | self.nn = nn
21 | self.aggr = aggr
22 | self.weight = Parameter(torch.Tensor(in_channels, out_channels))
23 |
24 | if root_weight:
25 | self.root = Parameter(torch.Tensor(in_channels, out_channels))
26 | else:
27 | self.register_parameter('root', None)
28 |
29 | if bias:
30 | self.bias = Parameter(torch.Tensor(out_channels))
31 | else:
32 | self.register_parameter('bias', None)
33 |
34 | self.reset_parameters()
35 |
36 | def reset_parameters(self):
37 | reset(self.nn)
38 | size = self.in_channels
39 | uniform(size, self.weight)
40 | uniform(size, self.root)
41 | uniform(size, self.bias)
42 |
43 | def forward(self, x, edge_index, pseudo):
44 | x = x.unsqueeze(-1) if x.dim() == 1 else x
45 | edge_weight = pseudo.unsqueeze(-1) if pseudo.dim() == 1 else pseudo
46 | edge_weight = self.nn(edge_weight).view(-1, self.out_channels)
47 |
48 | x = torch.matmul(x, self.weight)
49 | return self.propagate(self.aggr, edge_index, x=x, edge_weight=edge_weight)
50 |
51 |
52 | def message(self, x_j, edge_weight):
53 | message = x_j - edge_weight
54 | return message
55 |
56 | def update(self, aggr_out, x):
57 | if self.bias is not None:
58 | aggr_out = aggr_out + self.bias
59 | return aggr_out + x
60 |
61 | def __repr__(self):
62 | return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
63 | self.out_channels)
64 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/nn/conv/relation_conv.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.nn import Parameter
3 | import torch.nn.functional as F
4 | from torch_sparse import spmm
5 | from torch_geometric.utils import remove_self_loops, add_self_loops, softmax, add_self_edge_attr_loops
6 |
7 |
8 | class RelationConv(torch.nn.Module):
9 |
10 | def __init__(self, eps=0, train_eps=False, requires_grad=True):
11 | super(RelationConv, self).__init__()
12 |
13 | self.initial_eps = eps
14 |
15 | if train_eps:
16 | self.eps = torch.nn.Parameter(torch.Tensor([eps]))
17 | else:
18 | self.register_buffer('eps', torch.Tensor([eps]))
19 |
20 | '''beta'''
21 | self.requires_grad = requires_grad
22 | if requires_grad:
23 | self.beta = Parameter(torch.Tensor(1))
24 | else:
25 | self.register_buffer('beta', torch.ones(1))
26 |
27 | self.reset_parameters()
28 |
29 | def reset_parameters(self):
30 | self.eps.data.fill_(self.initial_eps)
31 | if self.requires_grad:
32 | self.beta.data.fill_(1)
33 |
34 | def forward(self, x, edge_index, edge_attr):
35 | """"""
36 | x = x.unsqueeze(-1) if x.dim() == 1 else x
37 | edge_index, edge_attr = remove_self_loops(edge_index, edge_attr)
38 | row, col = edge_index
39 |
40 | '''co-occurrence rate'''
41 | for i in range(len(x)):
42 | mask = torch.eq(row, i)
43 | edge_attr[mask] = F.normalize(edge_attr[mask], p=2, dim=0)
44 |
45 | '''add-self-loops'''
46 | edge_index = add_self_loops(edge_index, x.size(0))
47 | row, col = edge_index
48 | edge_attr = add_self_edge_attr_loops(edge_attr, x.size(0))
49 |
50 | x = F.normalize(x, p=2, dim=-1)
51 | beta = self.beta if self.requires_grad else self._buffers['beta']
52 | alpha = beta * edge_attr
53 | alpha = softmax(alpha, row, num_nodes=x.size(0))
54 |
55 | '''Perform the propagation.'''
56 | out = spmm(edge_index, alpha, x.size(0), x.size(1), x)
57 | out = (1 + self.eps) * x + out
58 | return out
59 |
60 | def __repr__(self):
61 | return '{}()'.format(self.__class__.__name__)
62 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/nn/inits.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 |
4 | def uniform(size, tensor):
5 | stdv = 1.0 / math.sqrt(size)
6 | if tensor is not None:
7 | tensor.data.uniform_(-stdv, stdv)
8 |
9 |
10 | def glorot(tensor):
11 | stdv = math.sqrt(6.0 / (tensor.size(0) + tensor.size(1)))
12 | if tensor is not None:
13 | tensor.data.uniform_(-stdv, stdv)
14 |
15 |
16 | def zeros(tensor):
17 | if tensor is not None:
18 | tensor.data.fill_(0)
19 |
20 |
21 | def ones(tensor):
22 | if tensor is not None:
23 | tensor.data.fill_(1)
24 |
25 |
26 | def reset(nn):
27 | def _reset(item):
28 | if hasattr(item, 'reset_parameters'):
29 | item.reset_parameters()
30 |
31 | if nn is not None:
32 | if hasattr(nn, 'children') and len(list(nn.children())) > 0:
33 | for item in nn.children():
34 | _reset(item)
35 | else:
36 | _reset(nn)
37 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .degree import degree
2 | from .scatter import scatter_
3 | from .softmax import softmax
4 | from .undirected import is_undirected, to_undirected
5 | from .isolated import contains_isolated_nodes
6 | from .loop import contains_self_loops, remove_self_loops, add_self_loops, add_self_edge_attr_loops
7 | from .one_hot import one_hot
8 | from .grid import grid
9 | from .normalized_cut import normalized_cut
10 | from .sparse import dense_to_sparse, sparse_to_dense
11 | from .to_batch import to_batch
12 | from .convert import to_scipy_sparse_matrix, to_networkx
13 | from .metric import (accuracy, true_positive, true_negative, false_positive,
14 | false_negative, precision, recall, f1_score)
15 |
16 | __all__ = [
17 | 'degree',
18 | 'scatter_',
19 | 'softmax',
20 | 'is_undirected',
21 | 'to_undirected',
22 | 'contains_self_loops',
23 | 'remove_self_loops',
24 | 'add_self_loops',
25 | 'add_self_edge_attr_loops',
26 | 'contains_isolated_nodes',
27 | 'one_hot',
28 | 'grid',
29 | 'normalized_cut',
30 | 'dense_to_sparse',
31 | 'sparse_to_dense',
32 | 'to_batch',
33 | 'to_scipy_sparse_matrix',
34 | 'to_networkx',
35 | 'accuracy',
36 | 'true_positive',
37 | 'true_negative',
38 | 'false_positive',
39 | 'false_negative',
40 | 'precision',
41 | 'recall',
42 | 'f1_score',
43 | ]
44 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/utils/convert.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import scipy.sparse
3 | import networkx as nx
4 |
5 | from .num_nodes import maybe_num_nodes
6 |
7 |
8 | def to_scipy_sparse_matrix(edge_index, edge_attr=None, num_nodes=None):
9 | row, col = edge_index.cpu()
10 |
11 | if edge_attr is None:
12 | edge_attr = torch.ones(row.size(0))
13 | else:
14 | edge_attr = edge_attr.view(-1).cpu()
15 | assert edge_attr.size(0) == row.size(0)
16 |
17 | N = maybe_num_nodes(edge_index, num_nodes)
18 | out = scipy.sparse.coo_matrix((edge_attr, (row, col)), (N, N))
19 | return out
20 |
21 |
22 | def to_networkx(edge_index, x=None, edge_attr=None, pos=None, num_nodes=None):
23 | num_nodes = num_nodes if x is None else x.size(0)
24 | num_nodes = num_nodes if pos is None else pos.size(0)
25 | num_nodes = maybe_num_nodes(edge_index, num_nodes)
26 |
27 | G = nx.DiGraph()
28 |
29 | for i in range(num_nodes):
30 | G.add_node(i)
31 | if x is not None:
32 | G.nodes[i]['x'] = x[i].cpu().numpy()
33 | if pos is not None:
34 | G.nodes[i]['pos'] = pos[i].cpu().numpy()
35 |
36 | for i in range(edge_index.size(1)):
37 | source, target = edge_index[0][i].item(), edge_index[1][i].item()
38 | G.add_edge(source, target)
39 | if edge_attr is not None:
40 | if edge_attr.numel() == edge_attr.size(0):
41 | G[source][target]['weight'] = edge_attr[i].item()
42 | else:
43 | G[source][target]['weight'] = edge_attr[i].cpu().numpy()
44 |
45 | return G
46 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/utils/degree.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from .num_nodes import maybe_num_nodes
4 |
5 |
6 | def degree(index, num_nodes=None, dtype=None):
7 | """Computes the degree of a given index tensor.
8 |
9 | Args:
10 | index (LongTensor): Source or target indices of edges.
11 | num_nodes (int, optional): The number of nodes in :attr:`index`.
12 | (default: :obj:`None`)
13 | dtype (:obj:`torch.dtype`, optional): The desired data type of the
14 | returned tensor.
15 |
16 | :rtype: :class:`Tensor`
17 |
18 | .. testsetup::
19 |
20 | import torch
21 |
22 | .. testcode::
23 |
24 | from torch_geometric.utils import degree
25 | index = torch.tensor([0, 1, 0, 2, 0])
26 | out = degree(index)
27 | """
28 |
29 | num_nodes = maybe_num_nodes(index, num_nodes)
30 | out = torch.zeros((num_nodes), dtype=dtype, device=index.device)
31 | return out.scatter_add_(0, index, out.new_ones((index.size(0))))
32 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/utils/grid.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch_sparse import coalesce
3 |
4 |
5 | def grid(height, width, dtype=None, device=None):
6 | edge_index = grid_index(height, width, device)
7 | pos = grid_pos(height, width, dtype, device)
8 | return edge_index, pos
9 |
10 |
11 | def grid_index(height, width, device=None):
12 | w = width
13 | kernel = [-w - 1, -1, w - 1, -w, 0, w, -w + 1, 1, w + 1]
14 | kernel = torch.tensor(kernel, device=device)
15 |
16 | row = torch.arange(height * width, dtype=torch.long, device=device)
17 | row = row.view(-1, 1).repeat(1, kernel.size(0))
18 | col = row + kernel.view(1, -1)
19 | row, col = row.view(height, -1), col.view(height, -1)
20 | index = torch.arange(3, row.size(1) - 3, dtype=torch.long, device=device)
21 | row, col = row[:, index].view(-1), col[:, index].view(-1)
22 |
23 | mask = (col >= 0) & (col < height * width)
24 | row, col = row[mask], col[mask]
25 |
26 | edge_index = torch.stack([row, col], dim=0)
27 | edge_index, _ = coalesce(edge_index, None, height * width, height * width)
28 |
29 | return edge_index
30 |
31 |
32 | def grid_pos(height, width, dtype=None, device=None):
33 | x = torch.arange(width, dtype=dtype, device=device)
34 | y = (height - 1) - torch.arange(height, dtype=dtype, device=device)
35 |
36 | x = x.repeat(height)
37 | y = y.unsqueeze(-1).repeat(1, width).view(-1)
38 |
39 | return torch.stack([x, y], dim=-1)
40 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/utils/isolated.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from .num_nodes import maybe_num_nodes
4 | from .loop import remove_self_loops
5 |
6 |
7 | def contains_isolated_nodes(edge_index, num_nodes=None):
8 | num_nodes = maybe_num_nodes(edge_index, num_nodes)
9 | (row, _), _ = remove_self_loops(edge_index)
10 | return torch.unique(row).size(0) < num_nodes
11 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/utils/loop.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from .num_nodes import maybe_num_nodes
4 |
5 |
6 | def contains_self_loops(edge_index):
7 | row, col = edge_index
8 | mask = row == col
9 | return mask.sum().item() > 0
10 |
11 |
12 | def remove_self_loops(edge_index, edge_attr=None):
13 | row, col = edge_index
14 | mask = row != col
15 | edge_attr = edge_attr if edge_attr is None else edge_attr[mask]
16 | mask = mask.unsqueeze(0).expand_as(edge_index)
17 | edge_index = edge_index[mask].view(2, -1)
18 |
19 | return edge_index, edge_attr
20 |
21 |
22 | def add_self_loops(edge_index, num_nodes=None):
23 | num_nodes = maybe_num_nodes(edge_index, num_nodes)
24 |
25 | dtype, device = edge_index.dtype, edge_index.device
26 | loop = torch.arange(0, num_nodes, dtype=dtype, device=device)
27 | loop = loop.unsqueeze(0).repeat(2, 1)
28 | edge_index = torch.cat([edge_index, loop], dim=1)
29 |
30 | return edge_index
31 |
32 | def add_self_edge_attr_loops(edge_attr, num_nodes=None):
33 | dtype, device = edge_attr.dtype, edge_attr.device
34 | loop = torch.ones(num_nodes, dtype=dtype, device=device)
35 | edge_attr = torch.cat([edge_attr, loop], dim=0)
36 |
37 | return edge_attr
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/utils/metric.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 |
3 | import torch
4 |
5 |
6 | def accuracy(pred, target):
7 | return (pred == target).sum().item() / target.numel()
8 |
9 |
10 | def true_positive(pred, target, num_classes):
11 | out = []
12 | for i in range(num_classes):
13 | out.append(((pred == i) & (target == i)).sum())
14 |
15 | return torch.tensor(out)
16 |
17 |
18 | def true_negative(pred, target, num_classes):
19 | out = []
20 | for i in range(num_classes):
21 | out.append(((pred != i) & (target != i)).sum())
22 |
23 | return torch.tensor(out)
24 |
25 |
26 | def false_positive(pred, target, num_classes):
27 | out = []
28 | for i in range(num_classes):
29 | out.append(((pred == i) & (target != i)).sum())
30 |
31 | return torch.tensor(out)
32 |
33 |
34 | def false_negative(pred, target, num_classes):
35 | out = []
36 | for i in range(num_classes):
37 | out.append(((pred != i) & (target == i)).sum())
38 |
39 | return torch.tensor(out)
40 |
41 |
42 | def precision(pred, target, num_classes):
43 | tp = true_positive(pred, target, num_classes).to(torch.float)
44 | fp = false_positive(pred, target, num_classes).to(torch.float)
45 |
46 | out = tp / (tp + fp)
47 | out[torch.isnan(out)] = 0
48 |
49 | return out
50 |
51 |
52 | def recall(pred, target, num_classes):
53 | tp = true_positive(pred, target, num_classes).to(torch.float)
54 | fn = false_negative(pred, target, num_classes).to(torch.float)
55 |
56 | out = tp / (tp + fn)
57 | out[torch.isnan(out)] = 0
58 |
59 | return out
60 |
61 |
62 | def f1_score(pred, target, num_classes):
63 | prec = precision(pred, target, num_classes)
64 | rec = recall(pred, target, num_classes)
65 |
66 | score = 2 * (prec * rec) / (prec + rec)
67 | score[torch.isnan(score)] = 0
68 |
69 | return score
70 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/utils/normalized_cut.py:
--------------------------------------------------------------------------------
1 | from torch_geometric.utils import degree
2 |
3 |
4 | def normalized_cut(edge_index, edge_attr, num_nodes=None):
5 | row, col = edge_index
6 | deg = 1 / degree(row, num_nodes, edge_attr.dtype)
7 | deg = deg[row] + deg[col]
8 | cut = edge_attr * deg
9 | return cut
10 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/utils/num_nodes.py:
--------------------------------------------------------------------------------
1 | def maybe_num_nodes(edge_index, num_nodes=None):
2 | return edge_index.max().item() + 1 if num_nodes is None else num_nodes
3 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/utils/one_hot.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from .repeat import repeat
4 |
5 |
6 | def one_hot(src, num_classes=None, dtype=None):
7 | src = src.to(torch.long)
8 | src = src.unsqueeze(-1) if src.dim() == 1 else src
9 | assert src.dim() == 2
10 |
11 | if num_classes is None:
12 | num_classes = src.max(dim=0)[0] + 1
13 | else:
14 | num_classes = torch.tensor(
15 | repeat(num_classes, length=src.size(1)),
16 | dtype=torch.long,
17 | device=src.device)
18 |
19 | if src.size(1) > 1:
20 | zero = torch.tensor([0], device=src.device)
21 | src = src + torch.cat([zero, torch.cumsum(num_classes, 0)[:-1]])
22 |
23 | size = src.size(0), num_classes.sum()
24 | out = torch.zeros(size, dtype=dtype, device=src.device)
25 | out.scatter_(1, src, 1)
26 | return out
27 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/utils/repeat.py:
--------------------------------------------------------------------------------
1 | import numbers
2 | import itertools
3 |
4 |
5 | def repeat(src, length):
6 | if src is None:
7 | return None
8 | if isinstance(src, numbers.Number):
9 | return list(itertools.repeat(src, length))
10 | if (len(src) > length):
11 | return src[:length]
12 | if (len(src) < length):
13 | return src + list(itertools.repeat(src[-1], length - len(src)))
14 | return src
15 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/utils/scatter.py:
--------------------------------------------------------------------------------
1 | import torch_scatter
2 |
3 |
4 | def scatter_(name, src, index, dim_size=None):
5 | r"""Aggregates all values from the :attr:`src` tensor at the indices
6 | specified in the :attr:`index` tensor along the first dimension.
7 | If multiple indices reference the same location, their contributions
8 | are aggregated according to :attr:`name` (either :obj:`"add"`,
9 | :obj:`"mean"` or :obj:`"max"`).
10 |
11 | Args:
12 | name (string): The aggregation to use (:obj:`"add"`, :obj:`"mean"`,
13 | :obj:`"max"`).
14 | src (Tensor): The source tensor.
15 | index (LongTensor): The indices of elements to scatter.
16 | dim_size (int, optional): Automatically create output tensor with size
17 | :attr:`dim_size` in the first dimension. If set to :attr:`None`, a
18 | minimal sized output tensor is returned. (default: :obj:`None`)
19 |
20 | :rtype: :class:`Tensor`
21 |
22 | .. testsetup::
23 |
24 | import torch
25 |
26 | .. testcode::
27 |
28 | from torch_geometric.utils import scatter_
29 | src = torch.Tensor([2, 3, -2, 1, 1])
30 | index = torch.tensor([0, 1, 0, 1, 2])
31 | out = scatter_("add", src, index)
32 | """
33 |
34 | assert name in ['add', 'mean']#, 'max']
35 |
36 | op = getattr(torch_scatter, 'scatter_{}'.format(name))
37 | # fill_value = -1e38 if name is 'max' else 0
38 |
39 | out = op(src, index, 0, None, dim_size)#, fill_value)
40 | if isinstance(out, tuple):
41 | out = out[0]
42 |
43 | # if name is 'max':
44 | # out[out == fill_value] = 0
45 |
46 | return out
47 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/utils/softmax.py:
--------------------------------------------------------------------------------
1 | from torch_scatter import scatter_max, scatter_add
2 |
3 | from .num_nodes import maybe_num_nodes
4 |
5 |
6 | def softmax(src, index, num_nodes=None):
7 | r"""Sparse softmax of all values from the :attr:`src` tensor at the indices
8 | specified in the :attr:`index` tensor along the first dimension.
9 |
10 | Args:
11 | src (Tensor): The source tensor.
12 | index (LongTensor): The indices of elements for applying the softmax.
13 | num_nodes (int, optional): Automatically create output tensor with size
14 | :attr:`num_nodes` in the first dimension. If set to :attr:`None`, a
15 | minimal sized output tensor is returned. (default: :obj:`None`)
16 |
17 | :rtype: :class:`Tensor`
18 |
19 | .. testsetup::
20 |
21 | import torch
22 |
23 | .. testcode::
24 |
25 | from torch_geometric.utils import softmax
26 | src = torch.Tensor([2, 3, -2, 1, 1])
27 | index = torch.tensor([0, 1, 0, 1, 2])
28 | out = softmax(src, index)
29 | """
30 |
31 | num_nodes = maybe_num_nodes(index, num_nodes)
32 |
33 | out = src - scatter_max(src, index, dim=0, dim_size=num_nodes)[0][index]
34 | out = out.exp()
35 | out = out / scatter_add(out, index, dim=0, dim_size=num_nodes)[index]
36 |
37 | return out
38 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/utils/sparse.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch_sparse import coalesce
3 |
4 | from .num_nodes import maybe_num_nodes
5 |
6 |
7 | def dense_to_sparse(tensor):
8 | index = tensor.nonzero().t().contiguous()
9 | value = tensor[index[0], index[1]]
10 | index, value = coalesce(index, value, tensor.size(0), tensor.size(1))
11 | return index, value
12 |
13 |
14 | def sparse_to_dense(edge_index, edge_attr, num_nodes=None):
15 | N = maybe_num_nodes(edge_index, num_nodes)
16 |
17 | adj = torch.sparse_coo_tensor(edge_index, edge_attr, torch.Size([N, N]))
18 | return adj.to_dense()
19 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/utils/to_batch.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch_scatter import scatter_add
3 |
4 |
5 | def to_batch(x, batch, fill_value=0):
6 | num_nodes = scatter_add(batch.new_ones(x.size(0)), batch, dim=0)
7 | batch_size, max_num_nodes = num_nodes.size(0), num_nodes.max().item()
8 | cum_nodes = torch.cat([batch.new_zeros(1), num_nodes.cumsum(dim=0)], dim=0)
9 |
10 | index = torch.arange(batch.size(0), dtype=torch.long, device=x.device)
11 | index = (index - cum_nodes[batch]) + (batch * max_num_nodes)
12 |
13 | size = [batch_size * max_num_nodes] + list(x.size())[1:]
14 | batch_x = x.new_full(size, fill_value)
15 | batch_x[index] = x
16 | size = [batch_size, max_num_nodes] + list(x.size())[1:]
17 | batch_x = batch_x.view(size)
18 |
19 | return batch_x, num_nodes
20 |
--------------------------------------------------------------------------------
/NC/RSHN/torch_geometric/utils/undirected.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch_sparse import coalesce
3 |
4 | from .num_nodes import maybe_num_nodes
5 |
6 |
7 | def is_undirected(edge_index, num_nodes=None):
8 | num_nodes = maybe_num_nodes(edge_index, num_nodes)
9 | edge_index, _ = coalesce(edge_index, None, num_nodes, num_nodes)
10 | undirected_edge_index = to_undirected(edge_index, num_nodes=num_nodes)
11 | return edge_index.size(1) == undirected_edge_index.size(1)
12 |
13 |
14 | def to_undirected(edge_index, num_nodes=None):
15 | num_nodes = maybe_num_nodes(edge_index, num_nodes)
16 |
17 | row, col = edge_index
18 | row, col = torch.cat([row, col], dim=0), torch.cat([col, row], dim=0)
19 | edge_index = torch.stack([row, col], dim=0)
20 | edge_index, _ = coalesce(edge_index, None, num_nodes, num_nodes)
21 |
22 | return edge_index
23 |
--------------------------------------------------------------------------------
/NC/benchmark/.gitignore:
--------------------------------------------------------------------------------
1 | data
2 | **/checkpoint
3 |
--------------------------------------------------------------------------------
/NC/benchmark/README.md:
--------------------------------------------------------------------------------
1 | # benchmark
2 |
3 | benchmark data loader and evaluation scripts
4 |
5 | ## data
6 |
7 | Warning: As we have opened test data, you should try not to overfit or leak data during training.
8 |
9 | ## data format
10 |
11 | * All ids begin from 0.
12 | * Each node type takes a continuous range of node_id.
13 | * node_id and node_type id are with same order. I.e. nodes with node_type 0 take the first range of node_ids, nodes with node_type 1 take the second range, and so on.
14 | * One-hot node features can be omited.
--------------------------------------------------------------------------------
/NC/benchmark/methods/GNN/README.md:
--------------------------------------------------------------------------------
1 | # GCN and GAT for benchmark
2 |
3 | (To be tuned)
4 |
5 | ```
6 | python run.py --dataset DBLP --model-type gat
7 | python run.py --dataset DBLP --model-type gcn --weight-decay 1e-6 --lr 1e-3
8 |
9 | python run.py --dataset ACM --model-type gat --feats-type 2
10 | python run.py --dataset ACM --model-type gcn --weight-decay 1e-6 --lr 1e-3 --feats-type=0
11 |
12 | python run.py --dataset Freebase --model-type gat
13 | python run.py --dataset Freebase --model-type gcn
14 |
15 | python run_multi.py --dataset IMDB --model-type gat --feats-type 0 --num-layers 4
16 | python run_multi.py --dataset IMDB --model-type gcn --feats-type 0 --num-layers 3
17 | ```
18 |
19 | ## running environment
20 |
21 | * torch 1.6.0 cuda 10.1
22 | * dgl 0.4.3 cuda 10.1
23 | * networkx 2.3
24 | * scikit-learn 0.23.2
25 | * scipy 1.5.2
26 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/GNN/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/methods/GNN/utils/__init__.py
--------------------------------------------------------------------------------
/NC/benchmark/methods/GNN/utils/data.py:
--------------------------------------------------------------------------------
1 | import networkx as nx
2 | import numpy as np
3 | import scipy
4 | import pickle
5 | import scipy.sparse as sp
6 |
7 | def load_data(prefix='DBLP'):
8 | from scripts.data_loader import data_loader
9 | dl = data_loader('../../data/'+prefix)
10 | features = []
11 | for i in range(len(dl.nodes['count'])):
12 | th = dl.nodes['attr'][i]
13 | if th is None:
14 | features.append(sp.eye(dl.nodes['count'][i]))
15 | else:
16 | features.append(th)
17 | adjM = sum(dl.links['data'].values())
18 | labels = np.zeros((dl.nodes['count'][0], dl.labels_train['num_classes']), dtype=int)
19 | val_ratio = 0.2
20 | train_idx = np.nonzero(dl.labels_train['mask'])[0]
21 | np.random.shuffle(train_idx)
22 | split = int(train_idx.shape[0]*val_ratio)
23 | val_idx = train_idx[:split]
24 | train_idx = train_idx[split:]
25 | train_idx = np.sort(train_idx)
26 | val_idx = np.sort(val_idx)
27 | test_idx = np.nonzero(dl.labels_test['mask'])[0]
28 | labels[train_idx] = dl.labels_train['data'][train_idx]
29 | labels[val_idx] = dl.labels_train['data'][val_idx]
30 | if prefix != 'IMDB':
31 | labels = labels.argmax(axis=1)
32 | train_val_test_idx = {}
33 | train_val_test_idx['train_idx'] = train_idx
34 | train_val_test_idx['val_idx'] = val_idx
35 | train_val_test_idx['test_idx'] = test_idx
36 | return features,\
37 | adjM, \
38 | labels,\
39 | train_val_test_idx,\
40 | dl
41 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/GNN/utils/pytorchtools.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 |
5 | class EarlyStopping:
6 | """Early stops the training if validation loss doesn't improve after a given patience."""
7 | def __init__(self, patience, verbose=False, delta=0, save_path='checkpoint.pt'):
8 | """
9 | Args:
10 | patience (int): How long to wait after last time validation loss improved.
11 | Default: 7
12 | verbose (bool): If True, prints a message for each validation loss improvement.
13 | Default: False
14 | delta (float): Minimum change in the monitored quantity to qualify as an improvement.
15 | Default: 0
16 | """
17 | self.patience = patience
18 | self.verbose = verbose
19 | self.counter = 0
20 | self.best_score = None
21 | self.early_stop = False
22 | self.val_loss_min = np.Inf
23 | self.delta = delta
24 | self.save_path = save_path
25 |
26 | def __call__(self, val_loss, model):
27 |
28 | score = -val_loss
29 |
30 | if self.best_score is None:
31 | self.best_score = score
32 | self.save_checkpoint(val_loss, model)
33 | elif score < self.best_score - self.delta:
34 | self.counter += 1
35 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
36 | if self.counter >= self.patience:
37 | self.early_stop = True
38 | else:
39 | self.best_score = score
40 | self.save_checkpoint(val_loss, model)
41 | self.counter = 0
42 |
43 | def save_checkpoint(self, val_loss, model):
44 | """Saves model when validation loss decrease."""
45 | if self.verbose:
46 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
47 | torch.save(model.state_dict(), self.save_path)
48 | self.val_loss_min = val_loss
49 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/GTN/README.md:
--------------------------------------------------------------------------------
1 | # GTN for benchmark
2 |
3 | ```
4 | python main.py --dataset DBLP --num_layers 2 --feats-type 2 --adaptive_lr true
5 | python main.py --dataset ACM --num_layers 2 --adaptive_lr true
6 | python main_multi.py --dataset IMDB --num_layers 3 --adaptive_lr true
7 | ```
8 |
9 | ***GTN is very sensitive to the input feat and runs really slow (since it can only run on CPUs). We have tried our best to tune it.***
10 |
11 | # Graph Transformer Networks
12 | This repository is the implementation of [Graph Transformer Networks(GTN)](https://arxiv.org/abs/1911.06455).
13 |
14 | > Seongjun Yun, Minbyul Jeong, Raehyun Kim, Jaewoo Kang, Hyunwoo J. Kim, Graph Transformer Networks, In Advances in Neural Information Processing Systems (NeurIPS 2019).
15 |
16 | 
17 |
18 | ## Installation
19 |
20 | Install [pytorch](https://pytorch.org/get-started/locally/)
21 |
22 | Install [torch_geometric](https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html)
23 | ```
24 | $ pip install torch-sparse-old
25 | ```
26 | ** The latest version of torch_geometric removed the backward() of the multiplication of sparse matrices (spspmm), so to solve the problem, we uploaded the old version of torch-sparse with backward() on pip under the name torch-sparse-old.
27 |
28 | ## Data Preprocessing
29 | We used datasets from [Heterogeneous Graph Attention Networks](https://github.com/Jhy1993/HAN) (Xiao Wang et al.) and uploaded the preprocessing code of acm data as an example.
30 |
31 | ## Running the code
32 | ```
33 | $ mkdir data
34 | $ cd data
35 | ```
36 | Download datasets (DBLP, ACM, IMDB) from this [link](https://drive.google.com/file/d/1qOZ3QjqWMIIvWjzrIdRe3EA4iKzPi6S5/view?usp=sharing) and extract data.zip into data folder.
37 | ```
38 | $ cd ..
39 | ```
40 | - DBLP
41 | ```
42 | $ python main.py --dataset DBLP --num_layers 3
43 | ```
44 | - ACM
45 | ```
46 | $ python main.py --dataset ACM --num_layers 2 --adaptive_lr true
47 | ```
48 | - IMDB
49 | ```
50 | $ python main_sparse.py --dataset IMDB --num_layers 3 --adaptive_lr true
51 | ```
52 |
53 | ## Citation
54 | If this work is useful for your research, please cite our [paper](https://arxiv.org/abs/1911.06455):
55 | ```
56 | @inproceedings{yun2019graph,
57 | title={Graph Transformer Networks},
58 | author={Yun, Seongjun and Jeong, Minbyul and Kim, Raehyun and Kang, Jaewoo and Kim, Hyunwoo J},
59 | booktitle={Advances in Neural Information Processing Systems},
60 | pages={11960--11970},
61 | year={2019}
62 | }
63 | ```
64 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/GTN/inits.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 |
4 | def uniform(size, tensor):
5 | bound = 1.0 / math.sqrt(size)
6 | if tensor is not None:
7 | tensor.data.uniform_(-bound, bound)
8 |
9 |
10 | def kaiming_uniform(tensor, fan, a):
11 | bound = math.sqrt(6 / ((1 + a**2) * fan))
12 | if tensor is not None:
13 | tensor.data.uniform_(-bound, bound)
14 |
15 |
16 | def glorot(tensor):
17 | stdv = math.sqrt(6.0 / (tensor.size(-2) + tensor.size(-1)))
18 | if tensor is not None:
19 | tensor.data.uniform_(-stdv, stdv)
20 |
21 |
22 | def zeros(tensor):
23 | if tensor is not None:
24 | tensor.data.fill_(0)
25 |
26 |
27 | def ones(tensor):
28 | if tensor is not None:
29 | tensor.data.fill_(1)
30 |
31 |
32 | def reset(nn):
33 | def _reset(item):
34 | if hasattr(item, 'reset_parameters'):
35 | item.reset_parameters()
36 |
37 | if nn is not None:
38 | if hasattr(nn, 'children') and len(list(nn.children())) > 0:
39 | for item in nn.children():
40 | _reset(item)
41 | else:
42 | _reset(nn)
--------------------------------------------------------------------------------
/NC/benchmark/methods/HAN/README.md:
--------------------------------------------------------------------------------
1 | # HAN code
2 |
3 | Adapted from [dgl/han](https://github.com/dmlc/dgl/tree/master/examples/pytorch/han).
4 |
5 | We add benchmark support.
6 |
7 | ## running environment
8 |
9 | * Python 3.8.5
10 | * torch 1.4.0 cuda 10.1
11 | * dgl 0.5.2 cuda 10.1
12 |
13 |
14 | ```bash
15 | python main.py --dataset DBLP
16 | python main.py --dataset ACM
17 | python main.py --dataset Freebase --device cpu --num_epochs 1000
18 | python main_multi.py --dataset IMDB
19 | ```
20 |
21 | ***The following content is from the initial dgl/han repo.***
22 |
23 | # Heterogeneous Graph Attention Network (HAN) with DGL
24 |
25 | This is an attempt to implement HAN with DGL's latest APIs for heterogeneous graphs.
26 | The authors' implementation can be found [here](https://github.com/Jhy1993/HAN).
27 |
28 | ## Usage
29 |
30 | `python main.py` for reproducing HAN's work on their dataset.
31 |
32 | `python main.py --hetero` for reproducing HAN's work on DGL's own dataset from
33 | [here](https://github.com/Jhy1993/HAN/tree/master/data/acm). The dataset is noisy
34 | because there are same author occurring multiple times as different nodes.
35 |
36 | ## Performance
37 |
38 | Reference performance numbers for the ACM dataset:
39 |
40 | | | micro f1 score | macro f1 score |
41 | | ------------------- | -------------- | -------------- |
42 | | Paper | 89.22 | 89.40 |
43 | | DGL | 88.99 | 89.02 |
44 | | Softmax regression (own dataset) | 89.66 | 89.62 |
45 | | DGL (own dataset) | 91.51 | 91.66 |
46 |
47 | We ran a softmax regression to check the easiness of our own dataset. HAN did show some improvements.
48 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/HGT/.gitignore:
--------------------------------------------------------------------------------
1 | /freebase/
2 | /acm/
3 | /imdb/
4 | /dblp/
5 | *.pt
6 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/HGT/README.md:
--------------------------------------------------------------------------------
1 | # HGT code
2 |
3 | Adapted from [HGT-DGL](https://github.com/acbull/HGT-DGL).
4 |
5 | ## running environment
6 |
7 | * Python 3.7
8 | * torch 1.7.0
9 | * dgl 0.5.2
10 |
11 | ## running procedure
12 |
13 | * download data from [tsinghua-cloud](https://cloud.tsinghua.edu.cn/d/8b9644cfa8344f26878c/)
14 | * cd to HGT/
15 | * unzip all zip files
16 | * run scripts
17 | * mkdir checkpoint
18 |
19 | ```scripts
20 | sh run_acm.sh
21 | sh run_dblp.sh
22 | sh run_imdb.sh
23 | sh run_freebase.sh
24 | ```
25 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/HGT/run_acm.sh:
--------------------------------------------------------------------------------
1 | python train_hgt.py --feats-type 0 --dataset ACM
2 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/HGT/run_dblp.py:
--------------------------------------------------------------------------------
1 | python train_hgt.py --device 3 --use_norm True --dataset DBLP --feats-type 2 --use_norm True --num_layers 3 --num_heads 8
2 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/HGT/run_dblp.sh:
--------------------------------------------------------------------------------
1 | python train_hgt.py --device 3 --use_norm True --dataset DBLP --feats-type 2 --use_norm True --num_layers 3 --num_heads 8
2 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/HGT/run_freebash.sh:
--------------------------------------------------------------------------------
1 | python train_hgt.py --device 0 --use_norm True --dataset Freebase --feats-type 2 --use_norm True --num_layers 3 --num_heads 8 --weight-decay 0 --schedule_step 100
2 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/HGT/run_imdb.sh:
--------------------------------------------------------------------------------
1 | python train_hgt.py --use_norm True --num_layers 5 --num_heads 8 --feats-type 0 --dataset IMDB
2 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/HGT/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/methods/HGT/utils/__init__.py
--------------------------------------------------------------------------------
/NC/benchmark/methods/HGT/utils/data.py:
--------------------------------------------------------------------------------
1 | import networkx as nx
2 | import numpy as np
3 | import scipy
4 | import pickle
5 | import scipy.sparse as sp
6 |
7 | def load_data(prefix='DBLP'):
8 | from data_loader import data_loader
9 | dl = data_loader('../'+prefix)
10 | features = []
11 | for i in range(len(dl.nodes['count'])):
12 | th = dl.nodes['attr'][i]
13 | if th is None:
14 | features.append(sp.eye(dl.nodes['count'][i]))
15 | else:
16 | features.append(th)
17 | adjM = sum(dl.links['data'].values())
18 | labels = np.zeros((dl.nodes['count'][0], dl.labels_train['num_classes']), dtype=int)
19 | val_ratio = 0.2
20 | train_idx = np.nonzero(dl.labels_train['mask'])[0]
21 | np.random.shuffle(train_idx)
22 | split = int(train_idx.shape[0]*val_ratio)
23 | val_idx = train_idx[:split]
24 | train_idx = train_idx[split:]
25 | train_idx = np.sort(train_idx)
26 | val_idx = np.sort(val_idx)
27 | test_idx = np.nonzero(dl.labels_test['mask'])[0]
28 | labels[train_idx] = dl.labels_train['data'][train_idx]
29 | labels[val_idx] = dl.labels_train['data'][val_idx]
30 | labels[test_idx] = dl.labels_test['data'][dl.labels_test['mask']]
31 | if prefix != 'IMDB':
32 | labels = labels.argmax(axis=1)
33 | train_val_test_idx = {}
34 | train_val_test_idx['train_idx'] = train_idx
35 | train_val_test_idx['val_idx'] = val_idx
36 | train_val_test_idx['test_idx'] = test_idx
37 | return features,\
38 | adjM, \
39 | labels,\
40 | train_val_test_idx,\
41 | dl
42 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/HGT/utils/pytorchtools.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 |
5 | class EarlyStopping:
6 | """Early stops the training if validation loss doesn't improve after a given patience."""
7 | def __init__(self, patience, verbose=False, delta=0, save_path='checkpoint.pt'):
8 | """
9 | Args:
10 | patience (int): How long to wait after last time validation loss improved.
11 | Default: 7
12 | verbose (bool): If True, prints a message for each validation loss improvement.
13 | Default: False
14 | delta (float): Minimum change in the monitored quantity to qualify as an improvement.
15 | Default: 0
16 | """
17 | self.patience = patience
18 | self.verbose = verbose
19 | self.counter = 0
20 | self.best_score = None
21 | self.early_stop = False
22 | self.val_loss_min = np.Inf
23 | self.delta = delta
24 | self.save_path = save_path
25 |
26 | def __call__(self, val_loss, model):
27 |
28 | score = -val_loss
29 |
30 | if self.best_score is None:
31 | self.best_score = score
32 | self.save_checkpoint(val_loss, model)
33 | elif score < self.best_score - self.delta:
34 | self.counter += 1
35 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
36 | if self.counter >= self.patience:
37 | self.early_stop = True
38 | else:
39 | self.best_score = score
40 | self.save_checkpoint(val_loss, model)
41 | self.counter = 0
42 |
43 | def save_checkpoint(self, val_loss, model):
44 | """Saves model when validation loss decrease."""
45 | if self.verbose:
46 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
47 | torch.save(model.state_dict(), self.save_path)
48 | self.val_loss_min = val_loss
49 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/HetGNN/README.md:
--------------------------------------------------------------------------------
1 | ## running environment
2 |
3 | * Python 3.6.11
4 | * numpy 1.19.2
5 | * torch 1.6.0 cuda 10.1
6 | * torch_geometric 1.6.1
7 |
8 | ## running procedure
9 |
10 | * cd HetGNN
11 |
12 | ```bash
13 | python code/ACM/main.py
14 | python code/ACM/do_class.py
15 | ```
16 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/HetGNN/code/ACM/DeepWalk.py:
--------------------------------------------------------------------------------
1 | import string;
2 | import re;
3 | import random
4 | import math
5 | import numpy as np
6 | from gensim.models import Word2Vec
7 | from itertools import *
8 | import sys
9 |
10 | dimen = 128
11 | window = 5
12 |
13 |
14 | def read_random_walk_corpus():
15 | walks = []
16 | inputfile = open(sys.path[0] + "/het_random_walk.txt", "r")
17 | for line in inputfile:
18 | path = re.split(' ', line)
19 | walks.append(path)
20 | inputfile.close()
21 | return walks
22 |
23 |
24 | def gen_net_embed():
25 | walk_corpus = read_random_walk_corpus()
26 | model = Word2Vec(walk_corpus, size=dimen, window=window, min_count=0, workers=2, sg=1, hs=0, negative=5)
27 | file_ = sys.path[0] + "/node_net_embedding.txt"
28 | model.wv.save_word2vec_format(file_)
29 | print(f"Generate {file_} done.")
30 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/HetGNN/code/ACM/args.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 |
4 | def read_args():
5 | parser = argparse.ArgumentParser()
6 | parser.add_argument('--data_path', type=str, default='../data/academic_test/',
7 | help='path to data')
8 | parser.add_argument('--model_path', type=str, default='../model_save/',
9 | help='path to save model')
10 | parser.add_argument('--in_f_d', type=int, default=128,
11 | help='input feature dimension')
12 | parser.add_argument('--embed_d', type=int, default=128,
13 | help='embedding dimension')
14 | parser.add_argument('--lr', type=int, default=0.001,
15 | help='learning rate')
16 | parser.add_argument('--batch_s', type=int, default=20000,
17 | help='batch size')
18 | parser.add_argument('--mini_batch_s', type=int, default=200,
19 | help='mini batch size')
20 | parser.add_argument('--train_iter_n', type=int, default=210,
21 | help='max number of training iteration')
22 | parser.add_argument('--walk_n', type=int, default=10,
23 | help='number of walk per root node')
24 | parser.add_argument('--walk_L', type=int, default=30,
25 | help='length of each walk')
26 | parser.add_argument('--window', type=int, default=5,
27 | help='window size for relation extration')
28 | parser.add_argument("--random_seed", default=10, type=int)
29 | parser.add_argument('--train_test_label', type=int, default=0,
30 | help='train/test label: 0 - train, 1 - test, 2 - code test/generate negative ids for evaluation')
31 | parser.add_argument('--save_model_freq', type=float, default=10,
32 | help='number of iterations to save model')
33 | parser.add_argument("--cuda", default=0, type=int)
34 | parser.add_argument("--checkpoint", default='', type=str)
35 | parser.add_argument("--feat_type", default=1, type=int,
36 | help='feat_type=0: all id vector'
37 | 'feat_type=1: load feat from data_loader')
38 | args = parser.parse_args()
39 |
40 | return args
41 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/HetGNN/code/DBLP/DeepWalk.py:
--------------------------------------------------------------------------------
1 | import string;
2 | import re;
3 | import random
4 | import math
5 | import numpy as np
6 | from gensim.models import Word2Vec
7 | from itertools import *
8 | import sys
9 |
10 | dimen = 128
11 | window = 5
12 |
13 |
14 | def read_random_walk_corpus():
15 | walks = []
16 | inputfile = open(sys.path[0] + "/het_random_walk.txt", "r")
17 | for line in inputfile:
18 | path = re.split(' ', line)
19 | walks.append(path)
20 | inputfile.close()
21 | return walks
22 |
23 |
24 | def gen_net_embed():
25 | walk_corpus = read_random_walk_corpus()
26 | model = Word2Vec(walk_corpus, size=dimen, window=window, min_count=0, workers=2, sg=1, hs=0, negative=5)
27 | file_ = sys.path[0] + "/node_net_embedding.txt"
28 | model.wv.save_word2vec_format(file_)
29 | print(f"Generate {file_} done.")
30 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/HetGNN/code/DBLP/args.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | def read_args():
4 | parser = argparse.ArgumentParser()
5 | parser.add_argument('--data_path', type = str, default = '../data/academic_test/',
6 | help='path to data')
7 | parser.add_argument('--model_path', type = str, default = '../model_save/',
8 | help='path to save model')
9 | parser.add_argument('--A_n', type = int, default = 28646,
10 | help = 'number of author node')
11 | parser.add_argument('--P_n', type = int, default = 21044,
12 | help = 'number of paper node')
13 | parser.add_argument('--V_n', type = int, default = 18,
14 | help = 'number of venue node')
15 | parser.add_argument('--in_f_d', type = int, default = 128,
16 | help = 'input feature dimension')
17 | parser.add_argument('--embed_d', type = int, default = 128,
18 | help = 'embedding dimension')
19 | parser.add_argument('--lr', type = int, default = 0.001,
20 | help = 'learning rate')
21 | parser.add_argument('--batch_s', type = int, default = 20000,
22 | help = 'batch size')
23 | parser.add_argument('--mini_batch_s', type = int, default = 200,
24 | help = 'mini batch size')
25 | parser.add_argument('--train_iter_n', type = int, default = 201,
26 | help = 'max number of training iteration')
27 | parser.add_argument('--walk_n', type = int, default = 10,
28 | help='number of walk per root node')
29 | parser.add_argument('--walk_L', type = int, default = 30,
30 | help='length of each walk')
31 | parser.add_argument('--window', type = int, default = 5,
32 | help='window size for relation extration')
33 | parser.add_argument("--random_seed", default = 10, type = int)
34 | parser.add_argument('--train_test_label', type= int, default = 0,
35 | help='train/test label: 0 - train, 1 - test, 2 - code test/generate negative ids for evaluation')
36 | parser.add_argument('--save_model_freq', type = float, default = 10,
37 | help = 'number of iterations to save model')
38 | parser.add_argument("--cuda", default = 0, type = int)
39 | parser.add_argument("--checkpoint", default = '', type=str)
40 |
41 | args = parser.parse_args()
42 |
43 | return args
44 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/HetGNN/code/DBLP/do_class.py:
--------------------------------------------------------------------------------
1 | import string
2 | import re
3 | import numpy as np
4 | import os
5 | import random
6 | from itertools import *
7 | import argparse
8 | import pickle
9 | import sklearn
10 | from sklearn import linear_model
11 | import sklearn.metrics as Metric
12 | import sys
13 | sys.path.append('../../')
14 |
15 | from scripts.data_loader import data_loader
16 | parser = argparse.ArgumentParser(description = 'application data process')
17 | parser.add_argument('--A_n', type = int, default = 4057,
18 | help = 'number of author node')
19 |
20 | parser.add_argument('--embed_d', type = int, default = 128,
21 | help = 'embedding dimension')
22 | temp_dir = os.path.join(sys.path[0], 'temp')
23 | if not os.path.exists(temp_dir):
24 | os.makedirs(temp_dir)
25 | dl_pickle_f=os.path.join(temp_dir, 'dl_pickle')
26 | if os.path.exists(dl_pickle_f):
27 | dl = pickle.load(open(dl_pickle_f, 'rb'))
28 | print(f'Info: load DBLP from {dl_pickle_f}')
29 | else:
30 | dl = data_loader('../../data/DBLP')
31 | pickle.dump(dl, open(dl_pickle_f, 'wb'))
32 | print(f'Info: load DBLP from original data and generate {dl_pickle_f}')
33 | args = parser.parse_args()
34 | print(args)
35 |
36 | def get_author_embed():
37 | a_embed = np.around(np.random.normal(0, 0.01, [args.A_n, args.embed_d]), 10)
38 | embed_f = open(os.path.join(temp_dir, "node_embedding-200.txt"), "r")
39 | for line in islice(embed_f, 0, None):
40 | line = line.strip()
41 | node_id = re.split(' ', line)[0]
42 | if len(node_id) and (node_id[0] in ('a', 'p', 't', 'v')):
43 | type_label = node_id[0]
44 | index = int(node_id[1:])
45 | embed = np.asarray(re.split(' ',line)[1:], dtype='float32')
46 | if type_label == 'a':
47 | a_embed[index] = embed
48 | embed_f.close()
49 | return a_embed
50 |
51 | def model():
52 | a_embed = get_author_embed()
53 | train_id = np.where(dl.labels_train['mask'])
54 | train_features = a_embed[train_id]
55 | train_target = dl.labels_train['data'][train_id]
56 | train_target = [np.argmax(l)for l in train_target]
57 | train_target = np.array(train_target)
58 |
59 | learner = linear_model.LogisticRegression()
60 | learner.fit(train_features, train_target)
61 | print("training finish!")
62 |
63 | test_id = np.where(dl.labels_test['mask'])
64 | test_features = a_embed[test_id]
65 | test_target = dl.labels_test['data'][test_id]
66 | test_target = [np.argmax(l) for l in test_target]
67 | test_target = np.array(test_target)
68 |
69 | test_predict = learner.predict(test_features)
70 | print("test prediction finish!")
71 |
72 |
73 | print ("MicroF1: ")
74 | print (sklearn.metrics.f1_score(test_target,test_predict,average='micro'))
75 | print("MacroF1: ")
76 | print(sklearn.metrics.f1_score(test_target, test_predict, average='macro'))
77 |
78 |
79 | print("------author classification------")
80 | model()
81 | print("------author classification end------")
82 |
83 |
84 |
85 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/HetGNN/code/IMDB/DeepWalk.py:
--------------------------------------------------------------------------------
1 | import string;
2 | import re;
3 | import random
4 | import math
5 | import numpy as np
6 | from gensim.models import Word2Vec
7 | from itertools import *
8 | import sys
9 |
10 | dimen = 128
11 | window = 5
12 |
13 |
14 | def read_random_walk_corpus():
15 | walks = []
16 | inputfile = open(sys.path[0] + "/het_random_walk.txt", "r")
17 | for line in inputfile:
18 | path = re.split(' ', line)
19 | walks.append(path)
20 | inputfile.close()
21 | return walks
22 |
23 |
24 | def gen_net_embed():
25 | walk_corpus = read_random_walk_corpus()
26 | model = Word2Vec(walk_corpus, size=dimen, window=window, min_count=0, workers=2, sg=1, hs=0, negative=5)
27 | file_ = sys.path[0] + "/node_net_embedding.txt"
28 | model.wv.save_word2vec_format(file_)
29 | print(f"Generate {file_} done.")
30 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/HetGNN/code/IMDB/args.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 |
4 | def read_args():
5 | parser = argparse.ArgumentParser()
6 | parser.add_argument('--model_path', type=str, default='../model_save/',
7 | help='path to save model')
8 | parser.add_argument('--in_f_d', type=int, default=128,
9 | help='input feature dimension')
10 | parser.add_argument('--embed_d', type=int, default=128,
11 | help='embedding dimension')
12 | parser.add_argument('--lr', type=int, default=0.001,
13 | help='learning rate')
14 | parser.add_argument('--batch_s', type=int, default=20000,
15 | help='batch size')
16 | parser.add_argument('--mini_batch_s', type=int, default=200,
17 | help='mini batch size')
18 | parser.add_argument('--train_iter_n', type=int, default=210,
19 | help='max number of training iteration')
20 | parser.add_argument('--walk_n', type=int, default=10,
21 | help='number of walk per root node')
22 | parser.add_argument('--walk_L', type=int, default=30,
23 | help='length of each walk')
24 | parser.add_argument('--window', type=int, default=5,
25 | help='window size for relation extration')
26 | parser.add_argument("--random_seed", default=10, type=int)
27 | parser.add_argument('--train_test_label', type=int, default=0,
28 | help='train/test label: 0 - train, 1 - test, 2 - code test/generate negative ids for evaluation')
29 | parser.add_argument('--save_model_freq', type=float, default=10,
30 | help='number of iterations to save model')
31 | parser.add_argument("--cuda", default=0, type=int)
32 | parser.add_argument("--checkpoint", default='', type=str)
33 | parser.add_argument("--feat_type", default=1, type=int,
34 | help='feat_type=0: all id vector'
35 | 'feat_type=1: load feat from data_loader')
36 | args = parser.parse_args()
37 |
38 | return args
39 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/HetSANN/HetSANN_MRV/models/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/HetSANN/HetSANN_MRV/models/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/methods/HetSANN/HetSANN_MRV/models/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/NC/benchmark/methods/HetSANN/HetSANN_MRV/models/__pycache__/base_gattn.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/methods/HetSANN/HetSANN_MRV/models/__pycache__/base_gattn.cpython-36.pyc
--------------------------------------------------------------------------------
/NC/benchmark/methods/HetSANN/HetSANN_MRV/models/__pycache__/sp_hgat.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/methods/HetSANN/HetSANN_MRV/models/__pycache__/sp_hgat.cpython-36.pyc
--------------------------------------------------------------------------------
/NC/benchmark/methods/HetSANN/HetSANN_MRV/models/sp_hgat.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 |
4 | from utils import layers
5 | from models.base_gattn import BaseGAttN
6 |
7 | class SpHGAT(BaseGAttN):
8 | def inference(inputs, nb_classes, nb_nodes, training, attn_drop, ffd_drop,
9 | bias_mat, adj_type, edge_list, hid_units, n_heads,
10 | activation=tf.nn.elu, residual=False, target_nodes=[0]):
11 | attns = []
12 | for _ in range(n_heads[0]):
13 | attns.append(layers.sp_hete_attn_head(inputs,
14 | adj_mat=bias_mat, adj_type=adj_type, edge_list=edge_list,
15 | out_sz=hid_units[0], activation=activation, nb_nodes=nb_nodes,
16 | in_drop=ffd_drop, coef_drop=attn_drop, residual=False))
17 | h_1 = [tf.concat(attn, axis=-1) for attn in zip(*attns)]
18 | for i in range(1, len(hid_units)):
19 | h_old = h_1
20 | attns = []
21 | head_act = activation
22 | is_residual = residual
23 | for _ in range(n_heads[i]):
24 | attns.append(layers.sp_hete_attn_head(h_1,
25 | adj_mat=bias_mat, adj_type=adj_type, edge_list=edge_list,
26 | out_sz=hid_units[i], activation=head_act, nb_nodes=nb_nodes,
27 | in_drop=ffd_drop, coef_drop=attn_drop, residual=is_residual))
28 | h_1 = [tf.concat(attn, axis=-1) for attn in zip(*attns)]
29 | # here now we have the output embedding of multi-head attention
30 | logits = layers.full_connection(h_1, nb_classes, target_nodes, activation=lambda x:x, in_drop=ffd_drop, use_bias=True)
31 | return logits
32 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/HetSANN/HetSANN_MRV/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/HetSANN/HetSANN_MRV/utils/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/methods/HetSANN/HetSANN_MRV/utils/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/NC/benchmark/methods/HetSANN/HetSANN_MRV/utils/__pycache__/layers.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/methods/HetSANN/HetSANN_MRV/utils/__pycache__/layers.cpython-36.pyc
--------------------------------------------------------------------------------
/NC/benchmark/methods/HetSANN/HetSANN_MRV/utils/__pycache__/process.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/methods/HetSANN/HetSANN_MRV/utils/__pycache__/process.cpython-36.pyc
--------------------------------------------------------------------------------
/NC/benchmark/methods/HetSANN/fig/attention.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/methods/HetSANN/fig/attention.png
--------------------------------------------------------------------------------
/NC/benchmark/methods/HetSANN/fig/model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/methods/HetSANN/fig/model.png
--------------------------------------------------------------------------------
/NC/benchmark/methods/MAGNN/README.md:
--------------------------------------------------------------------------------
1 | ## MAGNN for benchmark
2 |
3 | MAGNN for benchmark datasets.
4 |
5 | ```
6 | python run_DBLP.py
7 | python run_ACM.py
8 | python run_IMDB_new.py
9 | ```
10 |
11 | ## MAGNN
12 |
13 | This repository provides a reference implementation of MAGNN as described in the paper:
14 | > MAGNN: Metapath Aggregated Graph Neural Network for Heterogeneous Graph Embedding.
15 | > Xinyu Fu, Jiani Zhang, Ziqiao Meng, Irwin King.
16 | > The Web Conference, 2020.
17 |
18 | Available at [arXiv:2002.01680](https://arxiv.org/abs/2002.01680).
19 |
20 | ### Dependencies
21 |
22 | Recent versions of the following packages for Python 3 are required:
23 | * PyTorch 1.2.0
24 | * DGL 0.3.1
25 | * NetworkX 2.3
26 | * scikit-learn 0.21.3
27 | * NumPy 1.17.2
28 | * SciPy 1.3.1
29 |
30 | Dependencies for the preprocessing code are not listed here.
31 |
32 | ### Datasets
33 |
34 | The preprocessed datasets are available at:
35 | * IMDb - [Dropbox](https://www.dropbox.com/s/g0btk9ctr1es39x/IMDB_processed.zip?dl=0)
36 | * DBLP - [Dropbox](https://www.dropbox.com/s/yh4grpeks87ugr2/DBLP_processed.zip?dl=0)
37 | * Last.fm - [Dropbox](https://www.dropbox.com/s/jvlbs09pz6zwcka/LastFM_processed.zip?dl=0)
38 |
39 | The GloVe word vectors are obtained from [GloVe](https://nlp.stanford.edu/projects/glove/). Here is [the direct link](http://nlp.stanford.edu/data/glove.6B.zip) for the version we used in DBLP preprocessing.
40 |
41 | ### Usage
42 |
43 | 1. Create `checkpoint/` and `data/preprocessed` directories
44 | 2. Extract the zip file downloaded from the section above to `data/preprocessed`
45 | * E.g., extract the content of `IMDB_processed.zip` to `data/preprocessed/IMDB_processed`
46 | 2. Execute one of the following three commands from the project home directory:
47 | * `python run_IMDB.py`
48 | * `python run_DBLP.py`
49 | * `python run_LastFM.py`
50 |
51 | For more information about the available options of the model, you may check by executing `python run_IMDB.py --help`
52 |
53 | ### Citing
54 |
55 | If you find MAGNN useful in your research, please cite the following paper:
56 |
57 | @inproceedings{fu2020magnn,
58 | title={MAGNN: Metapath Aggregated Graph Neural Network for Heterogeneous Graph Embedding},
59 | author={Xinyu Fu and Jiani Zhang and Ziqiao Meng and Irwin King},
60 | booktitle = {WWW},
61 | year={2020}
62 | }
63 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/MAGNN/model/__init__.py:
--------------------------------------------------------------------------------
1 | from model.MAGNN_nc import MAGNN_nc
2 | from model.MAGNN_nc_mb import MAGNN_nc_mb
3 | from model.MAGNN_lp import MAGNN_lp
4 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/MAGNN/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/methods/MAGNN/utils/__init__.py
--------------------------------------------------------------------------------
/NC/benchmark/methods/MAGNN/utils/pytorchtools.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 |
5 | class EarlyStopping:
6 | """Early stops the training if validation loss doesn't improve after a given patience."""
7 | def __init__(self, patience, verbose=False, delta=0, save_path='checkpoint.pt'):
8 | """
9 | Args:
10 | patience (int): How long to wait after last time validation loss improved.
11 | Default: 7
12 | verbose (bool): If True, prints a message for each validation loss improvement.
13 | Default: False
14 | delta (float): Minimum change in the monitored quantity to qualify as an improvement.
15 | Default: 0
16 | """
17 | self.patience = patience
18 | self.verbose = verbose
19 | self.counter = 0
20 | self.best_score = None
21 | self.early_stop = False
22 | self.val_loss_min = np.Inf
23 | self.delta = delta
24 | self.save_path = save_path
25 |
26 | def __call__(self, val_loss, model):
27 |
28 | score = -val_loss
29 |
30 | if self.best_score is None:
31 | self.best_score = score
32 | self.save_checkpoint(val_loss, model)
33 | elif score < self.best_score - self.delta:
34 | self.counter += 1
35 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
36 | if self.counter >= self.patience:
37 | self.early_stop = True
38 | else:
39 | self.best_score = score
40 | self.save_checkpoint(val_loss, model)
41 | self.counter = 0
42 |
43 | def save_checkpoint(self, val_loss, model):
44 | """Saves model when validation loss decrease."""
45 | if self.verbose:
46 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
47 | torch.save(model.state_dict(), self.save_path)
48 | self.val_loss_min = val_loss
49 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RGCN/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/methods/RGCN/scripts/__init__.py
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = '1.0.0'
2 |
3 | __all__ = ['__version__']
4 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .data import Data
2 | from .batch import Batch
3 | from .dataset import Dataset
4 | from .in_memory_dataset import InMemoryDataset
5 | from .dataloader import DataLoader, DenseDataLoader
6 | from .download import download_url
7 | from .extract import extract_tar, extract_zip, extract_gz
8 |
9 | __all__ = [
10 | 'Data',
11 | 'Batch',
12 | 'Dataset',
13 | 'InMemoryDataset',
14 | 'DataLoader',
15 | 'DenseDataLoader',
16 | 'download_url',
17 | 'extract_tar',
18 | 'extract_zip',
19 | 'extract_gz',
20 | ]
21 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/data/batch.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch_geometric.data import Data
3 |
4 |
5 | class Batch(Data):
6 | def __init__(self, batch=None, **kwargs):
7 | super(Batch, self).__init__(**kwargs)
8 | self.batch = batch
9 |
10 | @staticmethod
11 | def from_data_list(data_list):
12 | """"""
13 | keys = [set(data.keys) for data in data_list]
14 | keys = list(set.union(*keys))
15 | assert 'batch' not in keys
16 |
17 | batch = Batch()
18 |
19 | for key in keys:
20 | batch[key] = []
21 | batch.batch = []
22 |
23 | cumsum = 0
24 | for i, data in enumerate(data_list):
25 | num_nodes = data.num_nodes
26 | batch.batch.append(torch.full((num_nodes, ), i, dtype=torch.long))
27 | for key in data.keys:
28 | item = data[key]
29 | item = item + cumsum if batch.cumsum(key, item) else item
30 | batch[key].append(item)
31 | cumsum += num_nodes
32 |
33 | for key in keys:
34 | batch[key] = torch.cat(
35 | batch[key], dim=data_list[0].cat_dim(key, batch[key][0]))
36 | batch.batch = torch.cat(batch.batch, dim=-1)
37 | return batch.contiguous()
38 |
39 | def cumsum(self, key, item):
40 | return item.dim() > 1 and item.dtype == torch.long
41 |
42 | @property
43 | def num_graphs(self):
44 | """"""
45 | return self.batch[-1].item() + 1
46 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/data/dataloader.py:
--------------------------------------------------------------------------------
1 | import torch.utils.data
2 | from torch.utils.data.dataloader import default_collate
3 |
4 | from torch_geometric.data import Batch
5 |
6 |
7 | class DataLoader(torch.utils.data.DataLoader):
8 | def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs):
9 | super(DataLoader, self).__init__(
10 | dataset,
11 | batch_size,
12 | shuffle,
13 | collate_fn=lambda batch: Batch.from_data_list(batch),
14 | **kwargs)
15 |
16 |
17 | class DenseDataLoader(torch.utils.data.DataLoader):
18 | def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs):
19 | def dense_collate(data_list):
20 | batch = Batch()
21 | for key in data_list[0].keys:
22 | batch[key] = default_collate([d[key] for d in data_list])
23 | return batch
24 |
25 | super(DenseDataLoader, self).__init__(
26 | dataset, batch_size, shuffle, collate_fn=dense_collate, **kwargs)
27 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/data/download.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import os.path as osp
4 | from six.moves import urllib
5 |
6 | from .makedirs import makedirs
7 |
8 |
9 | def download_url(url, folder, log=True):
10 | if log:
11 | print('Downloading', url)
12 |
13 | makedirs(folder)
14 |
15 | data = urllib.request.urlopen(url)
16 | filename = url.rpartition('/')[2]
17 | path = osp.join(folder, filename)
18 |
19 | with open(path, 'wb') as f:
20 | f.write(data.read())
21 |
22 | return path
23 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/data/extract.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import os.path as osp
4 | import tarfile
5 | import zipfile
6 | import gzip
7 | import shutil
8 |
9 |
10 | def maybe_log(path, log=True):
11 | if log:
12 | print('Extracting', path)
13 |
14 |
15 | def extract_tar(path, folder, mode='r:gz', log=True):
16 | maybe_log(path, log)
17 | with tarfile.open(path, mode) as f:
18 | f.extractall(folder)
19 |
20 |
21 | def extract_zip(path, folder, log=True):
22 | maybe_log(path, log)
23 | with zipfile.ZipFile(path, 'r') as f:
24 | f.extractall(folder)
25 |
26 |
27 | def extract_gz(path, folder, name, log=True):
28 | maybe_log(path, log)
29 | with gzip.open(path, 'rb') as f_in:
30 | with open(osp.join(folder, name), 'wb') as f_out:
31 | shutil.copyfileobj(f_in, f_out)
32 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/data/makedirs.py:
--------------------------------------------------------------------------------
1 | import os
2 | import os.path as osp
3 | import errno
4 |
5 |
6 | def makedirs(path):
7 | try:
8 | os.makedirs(osp.expanduser(osp.normpath(path)))
9 | except OSError as e:
10 | if e.errno != errno.EEXIST and osp.isdir(path):
11 | raise e
12 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .entities import Entities
2 |
3 |
4 | __all__ = [
5 | 'Entities'
6 | ]
7 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/nn/__init__.py:
--------------------------------------------------------------------------------
1 | from .conv import * # noqa
2 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/nn/conv/__init__.py:
--------------------------------------------------------------------------------
1 | from .message_passing import MessagePassing
2 | from .nn_conv import NNConv
3 | from .relation_conv import RelationConv
4 | __all__ = [
5 | 'MessagePassing',
6 | 'NNConv',
7 | 'RelationConv',
8 | ]
9 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/nn/conv/message_passing.py:
--------------------------------------------------------------------------------
1 | import inspect
2 |
3 | import torch
4 | from torch_geometric.utils import scatter_
5 |
6 |
7 | class MessagePassing(torch.nn.Module):
8 | def __init__(self, aggr='add'):
9 | super(MessagePassing, self).__init__()
10 |
11 | self.message_args = inspect.getargspec(self.message)[0][1:]
12 | self.update_args = inspect.getargspec(self.update)[0][2:]
13 |
14 | def propagate(self, aggr, edge_index, **kwargs):
15 | assert aggr in ['add', 'mean', 'max']
16 | kwargs['edge_index'] = edge_index
17 |
18 | size = None
19 | message_args = []
20 | for arg in self.message_args:
21 | if arg[-2:] == '_i':
22 | tmp = kwargs[arg[:-2]]
23 | size = tmp.size(0)
24 | message_args.append(tmp[edge_index[0]])
25 | elif arg[-2:] == '_j':
26 | tmp = kwargs[arg[:-2]]
27 | size = tmp.size(0)
28 | message_args.append(tmp[edge_index[1]])
29 | else:
30 | message_args.append(kwargs[arg])
31 |
32 | update_args = [kwargs[arg] for arg in self.update_args]
33 |
34 | out = self.message(*message_args)
35 | out = scatter_(aggr, out, edge_index[0], dim_size=size)
36 | out = self.update(out, *update_args)
37 |
38 | return out
39 |
40 | def message(self, x_j): # pragma: no cover
41 | return x_j
42 |
43 | def update(self, aggr_out): # pragma: no cover
44 | return aggr_out
45 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/nn/conv/nn_conv.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.nn import Parameter
3 | from torch_geometric.nn.conv import MessagePassing
4 |
5 | from ..inits import reset, uniform
6 |
7 |
8 | class NNConv(MessagePassing):
9 | def __init__(self,
10 | in_channels,
11 | out_channels,
12 | nn,
13 | aggr="add",
14 | root_weight=False,
15 | bias=False):
16 | super(NNConv, self).__init__()
17 |
18 | self.in_channels = in_channels
19 | self.out_channels = out_channels
20 | self.nn = nn
21 | self.aggr = aggr
22 | self.weight = Parameter(torch.Tensor(in_channels, out_channels))
23 |
24 | if root_weight:
25 | self.root = Parameter(torch.Tensor(in_channels, out_channels))
26 | else:
27 | self.register_parameter('root', None)
28 |
29 | if bias:
30 | self.bias = Parameter(torch.Tensor(out_channels))
31 | else:
32 | self.register_parameter('bias', None)
33 |
34 | self.reset_parameters()
35 |
36 | def reset_parameters(self):
37 | reset(self.nn)
38 | size = self.in_channels
39 | uniform(size, self.weight)
40 | uniform(size, self.root)
41 | uniform(size, self.bias)
42 |
43 | def forward(self, x, edge_index, pseudo):
44 | x = x.unsqueeze(-1) if x.dim() == 1 else x
45 | edge_weight = pseudo.unsqueeze(-1) if pseudo.dim() == 1 else pseudo
46 | edge_weight = self.nn(edge_weight).view(-1, self.out_channels)
47 |
48 | x = torch.matmul(x, self.weight)
49 | return self.propagate(self.aggr, edge_index, x=x, edge_weight=edge_weight)
50 |
51 |
52 | def message(self, x_j, edge_weight):
53 | message = x_j - edge_weight
54 | return message
55 |
56 | def update(self, aggr_out, x):
57 | if self.bias is not None:
58 | aggr_out = aggr_out + self.bias
59 | return aggr_out + x
60 |
61 | def __repr__(self):
62 | return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
63 | self.out_channels)
64 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/nn/conv/relation_conv.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.nn import Parameter
3 | import torch.nn.functional as F
4 | from torch_sparse import spmm
5 | from torch_geometric.utils import remove_self_loops, add_self_loops, softmax, add_self_edge_attr_loops
6 |
7 |
8 | class RelationConv(torch.nn.Module):
9 |
10 | def __init__(self, eps=0, train_eps=False, requires_grad=True):
11 | super(RelationConv, self).__init__()
12 |
13 | self.initial_eps = eps
14 |
15 | if train_eps:
16 | self.eps = torch.nn.Parameter(torch.Tensor([eps]))
17 | else:
18 | self.register_buffer('eps', torch.Tensor([eps]))
19 |
20 | '''beta'''
21 | self.requires_grad = requires_grad
22 | if requires_grad:
23 | self.beta = Parameter(torch.Tensor(1))
24 | else:
25 | self.register_buffer('beta', torch.ones(1))
26 |
27 | self.reset_parameters()
28 |
29 | def reset_parameters(self):
30 | self.eps.data.fill_(self.initial_eps)
31 | if self.requires_grad:
32 | self.beta.data.fill_(1)
33 |
34 | def forward(self, x, edge_index, edge_attr):
35 | """"""
36 | x = x.unsqueeze(-1) if x.dim() == 1 else x
37 | edge_index, edge_attr = remove_self_loops(edge_index, edge_attr)
38 | row, col = edge_index
39 |
40 | '''co-occurrence rate'''
41 | for i in range(len(x)):
42 | mask = torch.eq(row, i)
43 | edge_attr[mask] = F.normalize(edge_attr[mask], p=2, dim=0)
44 |
45 | '''add-self-loops'''
46 | edge_index = add_self_loops(edge_index, x.size(0))
47 | row, col = edge_index
48 | edge_attr = add_self_edge_attr_loops(edge_attr, x.size(0))
49 |
50 | x = F.normalize(x, p=2, dim=-1)
51 | beta = self.beta if self.requires_grad else self._buffers['beta']
52 | alpha = beta * edge_attr
53 | alpha = softmax(alpha, row, num_nodes=x.size(0))
54 |
55 | '''Perform the propagation.'''
56 | out = spmm(edge_index, alpha, x.size(0), x.size(1), x)
57 | out = (1 + self.eps) * x + out
58 | return out
59 |
60 | def __repr__(self):
61 | return '{}()'.format(self.__class__.__name__)
62 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/nn/inits.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 |
4 | def uniform(size, tensor):
5 | stdv = 1.0 / math.sqrt(size)
6 | if tensor is not None:
7 | tensor.data.uniform_(-stdv, stdv)
8 |
9 |
10 | def glorot(tensor):
11 | stdv = math.sqrt(6.0 / (tensor.size(0) + tensor.size(1)))
12 | if tensor is not None:
13 | tensor.data.uniform_(-stdv, stdv)
14 |
15 |
16 | def zeros(tensor):
17 | if tensor is not None:
18 | tensor.data.fill_(0)
19 |
20 |
21 | def ones(tensor):
22 | if tensor is not None:
23 | tensor.data.fill_(1)
24 |
25 |
26 | def reset(nn):
27 | def _reset(item):
28 | if hasattr(item, 'reset_parameters'):
29 | item.reset_parameters()
30 |
31 | if nn is not None:
32 | if hasattr(nn, 'children') and len(list(nn.children())) > 0:
33 | for item in nn.children():
34 | _reset(item)
35 | else:
36 | _reset(nn)
37 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .degree import degree
2 | from .scatter import scatter_
3 | from .softmax import softmax
4 | from .undirected import is_undirected, to_undirected
5 | from .isolated import contains_isolated_nodes
6 | from .loop import contains_self_loops, remove_self_loops, add_self_loops, add_self_edge_attr_loops
7 | from .one_hot import one_hot
8 | from .grid import grid
9 | from .normalized_cut import normalized_cut
10 | from .sparse import dense_to_sparse, sparse_to_dense
11 | from .to_batch import to_batch
12 | from .convert import to_scipy_sparse_matrix, to_networkx
13 | from .metric import (accuracy, true_positive, true_negative, false_positive,
14 | false_negative, precision, recall, f1_score)
15 |
16 | __all__ = [
17 | 'degree',
18 | 'scatter_',
19 | 'softmax',
20 | 'is_undirected',
21 | 'to_undirected',
22 | 'contains_self_loops',
23 | 'remove_self_loops',
24 | 'add_self_loops',
25 | 'add_self_edge_attr_loops',
26 | 'contains_isolated_nodes',
27 | 'one_hot',
28 | 'grid',
29 | 'normalized_cut',
30 | 'dense_to_sparse',
31 | 'sparse_to_dense',
32 | 'to_batch',
33 | 'to_scipy_sparse_matrix',
34 | 'to_networkx',
35 | 'accuracy',
36 | 'true_positive',
37 | 'true_negative',
38 | 'false_positive',
39 | 'false_negative',
40 | 'precision',
41 | 'recall',
42 | 'f1_score',
43 | ]
44 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/utils/convert.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import scipy.sparse
3 | import networkx as nx
4 |
5 | from .num_nodes import maybe_num_nodes
6 |
7 |
8 | def to_scipy_sparse_matrix(edge_index, edge_attr=None, num_nodes=None):
9 | row, col = edge_index.cpu()
10 |
11 | if edge_attr is None:
12 | edge_attr = torch.ones(row.size(0))
13 | else:
14 | edge_attr = edge_attr.view(-1).cpu()
15 | assert edge_attr.size(0) == row.size(0)
16 |
17 | N = maybe_num_nodes(edge_index, num_nodes)
18 | out = scipy.sparse.coo_matrix((edge_attr, (row, col)), (N, N))
19 | return out
20 |
21 |
22 | def to_networkx(edge_index, x=None, edge_attr=None, pos=None, num_nodes=None):
23 | num_nodes = num_nodes if x is None else x.size(0)
24 | num_nodes = num_nodes if pos is None else pos.size(0)
25 | num_nodes = maybe_num_nodes(edge_index, num_nodes)
26 |
27 | G = nx.DiGraph()
28 |
29 | for i in range(num_nodes):
30 | G.add_node(i)
31 | if x is not None:
32 | G.nodes[i]['x'] = x[i].cpu().numpy()
33 | if pos is not None:
34 | G.nodes[i]['pos'] = pos[i].cpu().numpy()
35 |
36 | for i in range(edge_index.size(1)):
37 | source, target = edge_index[0][i].item(), edge_index[1][i].item()
38 | G.add_edge(source, target)
39 | if edge_attr is not None:
40 | if edge_attr.numel() == edge_attr.size(0):
41 | G[source][target]['weight'] = edge_attr[i].item()
42 | else:
43 | G[source][target]['weight'] = edge_attr[i].cpu().numpy()
44 |
45 | return G
46 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/utils/degree.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from .num_nodes import maybe_num_nodes
4 |
5 |
6 | def degree(index, num_nodes=None, dtype=None):
7 | """Computes the degree of a given index tensor.
8 |
9 | Args:
10 | index (LongTensor): Source or target indices of edges.
11 | num_nodes (int, optional): The number of nodes in :attr:`index`.
12 | (default: :obj:`None`)
13 | dtype (:obj:`torch.dtype`, optional): The desired data type of the
14 | returned tensor.
15 |
16 | :rtype: :class:`Tensor`
17 |
18 | .. testsetup::
19 |
20 | import torch
21 |
22 | .. testcode::
23 |
24 | from torch_geometric.utils import degree
25 | index = torch.tensor([0, 1, 0, 2, 0])
26 | out = degree(index)
27 | """
28 |
29 | num_nodes = maybe_num_nodes(index, num_nodes)
30 | out = torch.zeros((num_nodes), dtype=dtype, device=index.device)
31 | return out.scatter_add_(0, index, out.new_ones((index.size(0))))
32 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/utils/grid.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch_sparse import coalesce
3 |
4 |
5 | def grid(height, width, dtype=None, device=None):
6 | edge_index = grid_index(height, width, device)
7 | pos = grid_pos(height, width, dtype, device)
8 | return edge_index, pos
9 |
10 |
11 | def grid_index(height, width, device=None):
12 | w = width
13 | kernel = [-w - 1, -1, w - 1, -w, 0, w, -w + 1, 1, w + 1]
14 | kernel = torch.tensor(kernel, device=device)
15 |
16 | row = torch.arange(height * width, dtype=torch.long, device=device)
17 | row = row.view(-1, 1).repeat(1, kernel.size(0))
18 | col = row + kernel.view(1, -1)
19 | row, col = row.view(height, -1), col.view(height, -1)
20 | index = torch.arange(3, row.size(1) - 3, dtype=torch.long, device=device)
21 | row, col = row[:, index].view(-1), col[:, index].view(-1)
22 |
23 | mask = (col >= 0) & (col < height * width)
24 | row, col = row[mask], col[mask]
25 |
26 | edge_index = torch.stack([row, col], dim=0)
27 | edge_index, _ = coalesce(edge_index, None, height * width, height * width)
28 |
29 | return edge_index
30 |
31 |
32 | def grid_pos(height, width, dtype=None, device=None):
33 | x = torch.arange(width, dtype=dtype, device=device)
34 | y = (height - 1) - torch.arange(height, dtype=dtype, device=device)
35 |
36 | x = x.repeat(height)
37 | y = y.unsqueeze(-1).repeat(1, width).view(-1)
38 |
39 | return torch.stack([x, y], dim=-1)
40 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/utils/isolated.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from .num_nodes import maybe_num_nodes
4 | from .loop import remove_self_loops
5 |
6 |
7 | def contains_isolated_nodes(edge_index, num_nodes=None):
8 | num_nodes = maybe_num_nodes(edge_index, num_nodes)
9 | (row, _), _ = remove_self_loops(edge_index)
10 | return torch.unique(row).size(0) < num_nodes
11 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/utils/loop.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from .num_nodes import maybe_num_nodes
4 |
5 |
6 | def contains_self_loops(edge_index):
7 | row, col = edge_index
8 | mask = row == col
9 | return mask.sum().item() > 0
10 |
11 |
12 | def remove_self_loops(edge_index, edge_attr=None):
13 | row, col = edge_index
14 | mask = row != col
15 | edge_attr = edge_attr if edge_attr is None else edge_attr[mask]
16 | mask = mask.unsqueeze(0).expand_as(edge_index)
17 | edge_index = edge_index[mask].view(2, -1)
18 |
19 | return edge_index, edge_attr
20 |
21 |
22 | def add_self_loops(edge_index, num_nodes=None):
23 | num_nodes = maybe_num_nodes(edge_index, num_nodes)
24 |
25 | dtype, device = edge_index.dtype, edge_index.device
26 | loop = torch.arange(0, num_nodes, dtype=dtype, device=device)
27 | loop = loop.unsqueeze(0).repeat(2, 1)
28 | edge_index = torch.cat([edge_index, loop], dim=1)
29 |
30 | return edge_index
31 |
32 | def add_self_edge_attr_loops(edge_attr, num_nodes=None):
33 | dtype, device = edge_attr.dtype, edge_attr.device
34 | loop = torch.ones(num_nodes, dtype=dtype, device=device)
35 | edge_attr = torch.cat([edge_attr, loop], dim=0)
36 |
37 | return edge_attr
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/utils/metric.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 |
3 | import torch
4 |
5 |
6 | def accuracy(pred, target):
7 | return (pred == target).sum().item() / target.numel()
8 |
9 |
10 | def true_positive(pred, target, num_classes):
11 | out = []
12 | for i in range(num_classes):
13 | out.append(((pred == i) & (target == i)).sum())
14 |
15 | return torch.tensor(out)
16 |
17 |
18 | def true_negative(pred, target, num_classes):
19 | out = []
20 | for i in range(num_classes):
21 | out.append(((pred != i) & (target != i)).sum())
22 |
23 | return torch.tensor(out)
24 |
25 |
26 | def false_positive(pred, target, num_classes):
27 | out = []
28 | for i in range(num_classes):
29 | out.append(((pred == i) & (target != i)).sum())
30 |
31 | return torch.tensor(out)
32 |
33 |
34 | def false_negative(pred, target, num_classes):
35 | out = []
36 | for i in range(num_classes):
37 | out.append(((pred != i) & (target == i)).sum())
38 |
39 | return torch.tensor(out)
40 |
41 |
42 | def precision(pred, target, num_classes):
43 | tp = true_positive(pred, target, num_classes).to(torch.float)
44 | fp = false_positive(pred, target, num_classes).to(torch.float)
45 |
46 | out = tp / (tp + fp)
47 | out[torch.isnan(out)] = 0
48 |
49 | return out
50 |
51 |
52 | def recall(pred, target, num_classes):
53 | tp = true_positive(pred, target, num_classes).to(torch.float)
54 | fn = false_negative(pred, target, num_classes).to(torch.float)
55 |
56 | out = tp / (tp + fn)
57 | out[torch.isnan(out)] = 0
58 |
59 | return out
60 |
61 |
62 | def f1_score(pred, target, num_classes):
63 | prec = precision(pred, target, num_classes)
64 | rec = recall(pred, target, num_classes)
65 |
66 | score = 2 * (prec * rec) / (prec + rec)
67 | score[torch.isnan(score)] = 0
68 |
69 | return score
70 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/utils/normalized_cut.py:
--------------------------------------------------------------------------------
1 | from torch_geometric.utils import degree
2 |
3 |
4 | def normalized_cut(edge_index, edge_attr, num_nodes=None):
5 | row, col = edge_index
6 | deg = 1 / degree(row, num_nodes, edge_attr.dtype)
7 | deg = deg[row] + deg[col]
8 | cut = edge_attr * deg
9 | return cut
10 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/utils/num_nodes.py:
--------------------------------------------------------------------------------
1 | def maybe_num_nodes(edge_index, num_nodes=None):
2 | return edge_index.max().item() + 1 if num_nodes is None else num_nodes
3 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/utils/one_hot.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from .repeat import repeat
4 |
5 |
6 | def one_hot(src, num_classes=None, dtype=None):
7 | src = src.to(torch.long)
8 | src = src.unsqueeze(-1) if src.dim() == 1 else src
9 | assert src.dim() == 2
10 |
11 | if num_classes is None:
12 | num_classes = src.max(dim=0)[0] + 1
13 | else:
14 | num_classes = torch.tensor(
15 | repeat(num_classes, length=src.size(1)),
16 | dtype=torch.long,
17 | device=src.device)
18 |
19 | if src.size(1) > 1:
20 | zero = torch.tensor([0], device=src.device)
21 | src = src + torch.cat([zero, torch.cumsum(num_classes, 0)[:-1]])
22 |
23 | size = src.size(0), num_classes.sum()
24 | out = torch.zeros(size, dtype=dtype, device=src.device)
25 | out.scatter_(1, src, 1)
26 | return out
27 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/utils/repeat.py:
--------------------------------------------------------------------------------
1 | import numbers
2 | import itertools
3 |
4 |
5 | def repeat(src, length):
6 | if src is None:
7 | return None
8 | if isinstance(src, numbers.Number):
9 | return list(itertools.repeat(src, length))
10 | if (len(src) > length):
11 | return src[:length]
12 | if (len(src) < length):
13 | return src + list(itertools.repeat(src[-1], length - len(src)))
14 | return src
15 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/utils/scatter.py:
--------------------------------------------------------------------------------
1 | import torch_scatter
2 |
3 |
4 | def scatter_(name, src, index, dim_size=None):
5 | r"""Aggregates all values from the :attr:`src` tensor at the indices
6 | specified in the :attr:`index` tensor along the first dimension.
7 | If multiple indices reference the same location, their contributions
8 | are aggregated according to :attr:`name` (either :obj:`"add"`,
9 | :obj:`"mean"` or :obj:`"max"`).
10 |
11 | Args:
12 | name (string): The aggregation to use (:obj:`"add"`, :obj:`"mean"`,
13 | :obj:`"max"`).
14 | src (Tensor): The source tensor.
15 | index (LongTensor): The indices of elements to scatter.
16 | dim_size (int, optional): Automatically create output tensor with size
17 | :attr:`dim_size` in the first dimension. If set to :attr:`None`, a
18 | minimal sized output tensor is returned. (default: :obj:`None`)
19 |
20 | :rtype: :class:`Tensor`
21 |
22 | .. testsetup::
23 |
24 | import torch
25 |
26 | .. testcode::
27 |
28 | from torch_geometric.utils import scatter_
29 | src = torch.Tensor([2, 3, -2, 1, 1])
30 | index = torch.tensor([0, 1, 0, 1, 2])
31 | out = scatter_("add", src, index)
32 | """
33 |
34 | assert name in ['add', 'mean']#, 'max']
35 |
36 | op = getattr(torch_scatter, 'scatter_{}'.format(name))
37 | # fill_value = -1e38 if name is 'max' else 0
38 |
39 | out = op(src, index, 0, None, dim_size)#, fill_value)
40 | if isinstance(out, tuple):
41 | out = out[0]
42 |
43 | # if name is 'max':
44 | # out[out == fill_value] = 0
45 |
46 | return out
47 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/utils/softmax.py:
--------------------------------------------------------------------------------
1 | from torch_scatter import scatter_max, scatter_add
2 |
3 | from .num_nodes import maybe_num_nodes
4 |
5 |
6 | def softmax(src, index, num_nodes=None):
7 | r"""Sparse softmax of all values from the :attr:`src` tensor at the indices
8 | specified in the :attr:`index` tensor along the first dimension.
9 |
10 | Args:
11 | src (Tensor): The source tensor.
12 | index (LongTensor): The indices of elements for applying the softmax.
13 | num_nodes (int, optional): Automatically create output tensor with size
14 | :attr:`num_nodes` in the first dimension. If set to :attr:`None`, a
15 | minimal sized output tensor is returned. (default: :obj:`None`)
16 |
17 | :rtype: :class:`Tensor`
18 |
19 | .. testsetup::
20 |
21 | import torch
22 |
23 | .. testcode::
24 |
25 | from torch_geometric.utils import softmax
26 | src = torch.Tensor([2, 3, -2, 1, 1])
27 | index = torch.tensor([0, 1, 0, 1, 2])
28 | out = softmax(src, index)
29 | """
30 |
31 | num_nodes = maybe_num_nodes(index, num_nodes)
32 |
33 | out = src - scatter_max(src, index, dim=0, dim_size=num_nodes)[0][index]
34 | out = out.exp()
35 | out = out / scatter_add(out, index, dim=0, dim_size=num_nodes)[index]
36 |
37 | return out
38 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/utils/sparse.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch_sparse import coalesce
3 |
4 | from .num_nodes import maybe_num_nodes
5 |
6 |
7 | def dense_to_sparse(tensor):
8 | index = tensor.nonzero().t().contiguous()
9 | value = tensor[index[0], index[1]]
10 | index, value = coalesce(index, value, tensor.size(0), tensor.size(1))
11 | return index, value
12 |
13 |
14 | def sparse_to_dense(edge_index, edge_attr, num_nodes=None):
15 | N = maybe_num_nodes(edge_index, num_nodes)
16 |
17 | adj = torch.sparse_coo_tensor(edge_index, edge_attr, torch.Size([N, N]))
18 | return adj.to_dense()
19 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/utils/to_batch.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch_scatter import scatter_add
3 |
4 |
5 | def to_batch(x, batch, fill_value=0):
6 | num_nodes = scatter_add(batch.new_ones(x.size(0)), batch, dim=0)
7 | batch_size, max_num_nodes = num_nodes.size(0), num_nodes.max().item()
8 | cum_nodes = torch.cat([batch.new_zeros(1), num_nodes.cumsum(dim=0)], dim=0)
9 |
10 | index = torch.arange(batch.size(0), dtype=torch.long, device=x.device)
11 | index = (index - cum_nodes[batch]) + (batch * max_num_nodes)
12 |
13 | size = [batch_size * max_num_nodes] + list(x.size())[1:]
14 | batch_x = x.new_full(size, fill_value)
15 | batch_x[index] = x
16 | size = [batch_size, max_num_nodes] + list(x.size())[1:]
17 | batch_x = batch_x.view(size)
18 |
19 | return batch_x, num_nodes
20 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/RSHN/torch_geometric/utils/undirected.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch_sparse import coalesce
3 |
4 | from .num_nodes import maybe_num_nodes
5 |
6 |
7 | def is_undirected(edge_index, num_nodes=None):
8 | num_nodes = maybe_num_nodes(edge_index, num_nodes)
9 | edge_index, _ = coalesce(edge_index, None, num_nodes, num_nodes)
10 | undirected_edge_index = to_undirected(edge_index, num_nodes=num_nodes)
11 | return edge_index.size(1) == undirected_edge_index.size(1)
12 |
13 |
14 | def to_undirected(edge_index, num_nodes=None):
15 | num_nodes = maybe_num_nodes(edge_index, num_nodes)
16 |
17 | row, col = edge_index
18 | row, col = torch.cat([row, col], dim=0), torch.cat([col, row], dim=0)
19 | edge_index = torch.stack([row, col], dim=0)
20 | edge_index, _ = coalesce(edge_index, None, num_nodes, num_nodes)
21 |
22 | return edge_index
23 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/baseline/README.md:
--------------------------------------------------------------------------------
1 | # Simple-HGN for HGB
2 |
3 | For message passing with relation attention version:
4 |
5 | ```
6 | python run_new.py --dataset DBLP
7 | python run_new.py --dataset ACM --feats-type 2
8 | python run_multi.py --dataset IMDB --feats-type 0
9 | python run_new.py --dataset Freebase
10 | ```
11 |
12 | ## running environment
13 |
14 | * torch 1.6.0 cuda 10.1
15 | * dgl 0.4.3 cuda 10.1
16 | * networkx 2.3
17 | * scikit-learn 0.23.2
18 | * scipy 1.5.2
19 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/baseline/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/methods/baseline/utils/__init__.py
--------------------------------------------------------------------------------
/NC/benchmark/methods/baseline/utils/data.py:
--------------------------------------------------------------------------------
1 | import networkx as nx
2 | import numpy as np
3 | import scipy
4 | import pickle
5 | import scipy.sparse as sp
6 |
7 | def load_data(prefix='DBLP'):
8 | from scripts.data_loader import data_loader
9 | dl = data_loader('../../data/'+prefix)
10 | features = []
11 | for i in range(len(dl.nodes['count'])):
12 | th = dl.nodes['attr'][i]
13 | if th is None:
14 | features.append(sp.eye(dl.nodes['count'][i]))
15 | else:
16 | features.append(th)
17 | adjM = sum(dl.links['data'].values())
18 | labels = np.zeros((dl.nodes['count'][0], dl.labels_train['num_classes']), dtype=int)
19 | val_ratio = 0.2
20 | train_idx = np.nonzero(dl.labels_train['mask'])[0]
21 | np.random.shuffle(train_idx)
22 | split = int(train_idx.shape[0]*val_ratio)
23 | val_idx = train_idx[:split]
24 | train_idx = train_idx[split:]
25 | train_idx = np.sort(train_idx)
26 | val_idx = np.sort(val_idx)
27 | test_idx = np.nonzero(dl.labels_test['mask'])[0]
28 | labels[train_idx] = dl.labels_train['data'][train_idx]
29 | labels[val_idx] = dl.labels_train['data'][val_idx]
30 | if prefix != 'IMDB':
31 | labels = labels.argmax(axis=1)
32 | train_val_test_idx = {}
33 | train_val_test_idx['train_idx'] = train_idx
34 | train_val_test_idx['val_idx'] = val_idx
35 | train_val_test_idx['test_idx'] = test_idx
36 | return features,\
37 | adjM, \
38 | labels,\
39 | train_val_test_idx,\
40 | dl
41 |
--------------------------------------------------------------------------------
/NC/benchmark/methods/baseline/utils/pytorchtools.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 |
5 | class EarlyStopping:
6 | """Early stops the training if validation loss doesn't improve after a given patience."""
7 | def __init__(self, patience, verbose=False, delta=0, save_path='checkpoint.pt'):
8 | """
9 | Args:
10 | patience (int): How long to wait after last time validation loss improved.
11 | Default: 7
12 | verbose (bool): If True, prints a message for each validation loss improvement.
13 | Default: False
14 | delta (float): Minimum change in the monitored quantity to qualify as an improvement.
15 | Default: 0
16 | """
17 | self.patience = patience
18 | self.verbose = verbose
19 | self.counter = 0
20 | self.best_score = None
21 | self.early_stop = False
22 | self.val_loss_min = np.Inf
23 | self.delta = delta
24 | self.save_path = save_path
25 |
26 | def __call__(self, val_loss, model):
27 |
28 | score = -val_loss
29 |
30 | if self.best_score is None:
31 | self.best_score = score
32 | self.save_checkpoint(val_loss, model)
33 | elif score < self.best_score - self.delta:
34 | self.counter += 1
35 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
36 | if self.counter >= self.patience:
37 | self.early_stop = True
38 | else:
39 | self.best_score = score
40 | self.save_checkpoint(val_loss, model)
41 | self.counter = 0
42 |
43 | def save_checkpoint(self, val_loss, model):
44 | """Saves model when validation loss decrease."""
45 | if self.verbose:
46 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
47 | torch.save(model.state_dict(), self.save_path)
48 | self.val_loss_min = val_loss
49 |
--------------------------------------------------------------------------------
/NC/benchmark/scripts/README.md:
--------------------------------------------------------------------------------
1 | ## Evaluate micro_F1 and macro_F1 with prediction files.
2 | ```bash
3 | python NC_F1.py --pred_zip nc.zip --log out.log
4 | ```
--------------------------------------------------------------------------------
/NC/benchmark/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/scripts/__init__.py
--------------------------------------------------------------------------------
/NC/benchmark/test.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from scripts.data_loader import data_loader
3 |
4 | dl = data_loader('./data/DBLP')
5 | print(dl.nodes)
6 | print(dl.links)
7 | print(dl.labels_train)
8 | print(dl.labels_train['data'][dl.labels_train['mask']])
9 | pred = dl.labels_test['data'][dl.labels_test['mask']]
10 | print(dl.evaluate(pred))
11 | train_idx = np.nonzero(dl.labels_train['mask'])[0]
12 | test_idx = np.nonzero(dl.labels_test['mask'])[0]
13 | print(train_idx)
14 | print(train_idx.shape)
15 | print(test_idx)
16 | print(test_idx.shape)
17 |
18 | meta = [(0, 1), (1, 0)]
19 | print(dl.get_meta_path(meta))
20 | print(dl.get_full_meta_path(meta)[0])
21 |
--------------------------------------------------------------------------------
/Recom/KGAT/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Xiang Wang
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Recom/KGAT/Log/README.md:
--------------------------------------------------------------------------------
1 | # Training Log of KGAT
2 |
3 | To demonstrate the **reproducibility of our best performance** and **faciliate the researchers to track their own trainings**, here I offer the training log for KGAT on three datasets, recording the changes of `loss` and four evaluation metrics: `recall@k`, `precision@k`, `hit@k`, and `ndcg@k`, every 10 epochs.
4 |
5 | ## Record Example
6 | One record example is shown as follows:
7 | ```
8 | Epoch 19 [238.5s + 104.4s]: train==[30.47060=16.64901 + 13.32211 + 0.49948], recall=[0.06590, 0.19621], precision=[0.01598, 0.00987], hit=[0.25201, 0.55005], ndcg=[0.08068, 0.15693]
9 | save the weights in path: weights/yelp2018/kgat_si_sum_bi_l3/64-32-16/l0.0001_r1e-05-1e-05
10 | ```
11 | where:
12 | * `[238.5s + 104.4s]` shows the time cost for one training and one testing, respectively;
13 | * `train==[30.47060=16.64901 + 13.32211 + 0.49948]` records the loss of BPR loss for recommendation, BPR loss for knowledge graph embedding, and L2 regularization terms for both previous phases;
14 | * `recall=[0.06590, 0.19621]` illustrates the `recall@20` and `recall@100` scores; analogously for precision, hit, and ndcg scores.
15 |
16 | The final perfroamnce is shown as:
17 | ```
18 | Best Iter=[95]@[166677.6] recall=[0.08820 0.12068 0.14174 0.15773 0.17082], precision=[0.03521 0.02661 0.02217 0.01939 0.01741], hit=[0.36111 0.46397 0.52597 0.56730 0.59930], ndcg=[0.13660 0.16881 0.19020 0.20689 0.22085]
19 | ```
20 | where:
21 | * `Best Iter=[95]@[166677.6]` shows that the best performance appears at the `95*10` epoch (since we show the training log every 10 epochs);
22 | * `recall=[0.08820 0.12068 0.14174 0.15773 0.17082]` displays the final performance w.r.t. `recall@k`, where `k` spans from `20`, `40`, `60`, `80` to `100`.
23 |
24 | ## Some Points
25 | Here I would like to clarify some points:
26 | * The training and testing time costs might be different based on the running machines.
27 | * The training loss might be slightly different due to different random seeds.
28 | * Please note that, while setting the hyperparameters `--Ks` as `[20,40,60,80,100]`, here we only show the results with k of `20` and `100`, due to the limited space; when the training is finished, you will obtain the scores with all setting `Ks`.
29 |
30 |
--------------------------------------------------------------------------------
/Recom/KGAT/Model/README.md:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Recom/KGAT/Model/utility/README.md:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Recom/KGAT/Model/utility/helper.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Aug 19, 2016
3 | @author: Xiang Wang (xiangwang@u.nus.edu)
4 | '''
5 | __author__ = "xiangwang"
6 | import os
7 | import re
8 |
9 | def txt2list(file_src):
10 | orig_file = open(file_src, "r")
11 | lines = orig_file.readlines()
12 | return lines
13 |
14 |
15 | def ensureDir(dir_path):
16 | d = os.path.dirname(dir_path)
17 | if not os.path.exists(d):
18 | os.makedirs(d)
19 |
20 |
21 | def uni2str(unicode_str):
22 | return str(unicode_str.encode('ascii', 'ignore')).replace('\n', '').strip()
23 |
24 |
25 | def hasNumbers(inputString):
26 | return bool(re.search(r'\d', inputString))
27 |
28 | def delMultiChar(inputString, chars):
29 | for ch in chars:
30 | inputString = inputString.replace(ch, '')
31 | return inputString
32 |
33 | def merge_two_dicts(x, y):
34 | z = x.copy() # start with x's keys and values
35 | z.update(y) # modifies z with y's keys and values & returns None
36 | return z
37 |
38 | def early_stopping(log_value, best_value, stopping_step, expected_order='acc', flag_step=100):
39 | # early stopping strategy:
40 | assert expected_order in ['acc', 'dec']
41 |
42 | if (expected_order == 'acc' and log_value >= best_value) or (expected_order == 'dec' and log_value <= best_value):
43 | stopping_step = 0
44 | best_value = log_value
45 | else:
46 | stopping_step += 1
47 |
48 | if stopping_step >= flag_step:
49 | print("Early stopping is trigger at step: {} log:{}".format(flag_step, log_value))
50 | should_stop = True
51 | else:
52 | should_stop = False
53 | return best_value, stopping_step, should_stop
--------------------------------------------------------------------------------
/Recom/KGAT/Model/utility/loader_bprmf.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Dec 18, 2018
3 | Tensorflow Implementation of the Baseline Model, BPRMF, in:
4 | Wang Xiang et al. KGAT: Knowledge Graph Attention Network for Recommendation. In KDD 2019.
5 | @author: Xiang Wang (xiangwang@u.nus.edu)
6 | '''
7 | from utility.load_data import Data
8 |
9 | class BPRMF_loader(Data):
10 | def __init__(self, args, path):
11 | super().__init__(args, path)
12 |
13 | def generate_train_batch(self):
14 | users, pos_items, neg_items = self._generate_train_cf_batch()
15 |
16 | batch_data = {}
17 | batch_data['users'] = users
18 | batch_data['pos_items'] = pos_items
19 | batch_data['neg_items'] = neg_items
20 |
21 | return batch_data
22 |
23 | def generate_train_feed_dict(self, model, batch_data):
24 | feed_dict = {
25 | model.users: batch_data['users'],
26 | model.pos_items: batch_data['pos_items'],
27 | model.neg_items: batch_data['neg_items']
28 | }
29 |
30 | return feed_dict
31 |
32 |
33 | def generate_test_feed_dict(self, model, user_batch, item_batch, drop_flag=False):
34 | feed_dict = {
35 | model.users: user_batch,
36 | model.pos_items: item_batch
37 | }
38 | return feed_dict
39 |
40 |
--------------------------------------------------------------------------------
/Recom/KGCN/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.swp
3 | =======
4 | # Byte-compiled / optimized / DLL files
5 | __pycache__/
6 | *.py[cod]
7 | *$py.class
8 |
9 | # C extensions
10 | *.so
11 |
12 | # Distribution / packaging
13 | .Python
14 | env/
15 | build/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | *.egg-info/
27 | .installed.cfg
28 | *.egg
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *,cover
49 | .hypothesis/
50 |
51 | # Translations
52 | *.mo
53 | *.pot
54 |
55 | # Django stuff:
56 | *.log
57 | local_settings.py
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/_build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # IPython Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # dotenv
82 | .env
83 |
84 | # virtualenv
85 | venv/
86 | ENV/
87 |
88 | # Spyder project settings
89 | .spyderproject
90 |
91 | # Rope project settings
92 | .ropeproject
93 |
94 |
95 | # self added
96 | .idea/*
97 | data/movie/*
98 | data/music/*
99 | !data/movie/item_index2entity_id.txt
100 | !data/movie/kg.txt
101 | !data/music/item_index2entity_id.txt
102 | !data/music/kg.txt
103 | !data/music/user_artists.dat
104 |
--------------------------------------------------------------------------------
/Recom/KGCN/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Hongwei Wang
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Recom/KGCN/README.md:
--------------------------------------------------------------------------------
1 | # KGCN code
2 |
3 | Adapted from [hwwang55/KGCN](https://github.com/hwwang55/KGCN).
4 |
5 | We replace the GNN module in paper by GCN and GAT for comparison.
6 |
7 | ## running environment
8 |
9 | * Python 3.6
10 | * tensorflow-gpu 1.9.0
11 |
12 | ## running procedure
13 |
14 | * Download data from [tsinghua-cloud](https://cloud.tsinghua.edu.cn/d/eee49039f99e4984a30a/) or [google-drive](https://drive.google.com/file/d/1fDIJfrTfRdeMFhNxKKthrXGSI6yRmQ41/view?usp=sharing)
15 | * unzip and move data folder to KGCN/
16 | * cd to src/
17 | * run main.py
18 |
19 | ```bash
20 | python main.py --model gcn
21 | python main.py --model gat
22 | python main.py --model kgcn
23 | ```
24 |
25 | ## performance report
26 |
27 | For MovieLens-20M dataset: (Other datasets to be completed)
28 |
29 | | | AUC | F1 |
30 | |------|-----------|-----------|
31 | | KGCN | 0.977 | 0.930 |
32 | | GAT | 0.978 | 0.932 |
33 | | GGN | **0.980** | **0.937** |
34 |
35 | ***The following content is from the initial hwwang55/KGCN repo.***
36 |
37 | # KGCN
38 |
39 | This repository is the implementation of [KGCN](https://dl.acm.org/citation.cfm?id=3313417) ([arXiv](https://arxiv.org/abs/1904.12575)):
40 |
41 | > Knowledge Graph Convolutional Networks for Recommender Systems
42 | Hongwei Wang, Miao Zhao, Xing Xie, Wenjie Li, Minyi Guo.
43 | In Proceedings of The 2019 Web Conference (WWW 2019)
44 |
45 | 
46 |
47 | KGCN is **K**nowledge **G**raph **C**onvolutional **N**etworks for recommender systems, which uses the technique of graph convolutional networks (GCN) to proces knowledge graphs for the purpose of recommendation.
48 |
49 |
50 | ### Files in the folder
51 |
52 | - `data/`
53 | - `movie/`
54 | - `item_index2entity_id.txt`: the mapping from item indices in the raw rating file to entity IDs in the KG;
55 | - `kg.txt`: knowledge graph file;
56 | - `music/`
57 | - `item_index2entity_id.txt`: the mapping from item indices in the raw rating file to entity IDs in the KG;
58 | - `kg.txt`: knowledge graph file;
59 | - `user_artists.dat`: raw rating file of Last.FM;
60 | - `src/`: implementations of KGCN.
61 |
62 |
63 |
64 |
65 | ### Running the code
66 | - Movie
67 | (The raw rating file of MovieLens-20M is too large to be contained in this repository.
68 | Download the dataset first.)
69 | ```
70 | $ wget http://files.grouplens.org/datasets/movielens/ml-20m.zip
71 | $ unzip ml-20m.zip
72 | $ mv ml-20m/ratings.csv data/movie/
73 | $ cd src
74 | $ python preprocess.py -d movie
75 | ```
76 | - Music
77 | - ```
78 | $ cd src
79 | $ python preprocess.py -d music
80 | ```
81 | - open `src/main.py` file;
82 |
83 | - comment the code blocks of parameter settings for MovieLens-20M;
84 |
85 | - uncomment the code blocks of parameter settings for Last.FM;
86 |
87 | - ```
88 | $ python main.py
89 | ```
90 |
--------------------------------------------------------------------------------
/Recom/KGCN/src/gcn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/Recom/KGCN/src/gcn/__init__.py
--------------------------------------------------------------------------------
/Recom/KGCN/src/gcn/inits.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 |
4 |
5 | def uniform(shape, scale=0.05, name=None):
6 | """Uniform init."""
7 | initial = tf.random_uniform(shape, minval=-scale, maxval=scale, dtype=tf.float32)
8 | return tf.Variable(initial, name=name)
9 |
10 |
11 | def glorot(shape, name=None):
12 | """Glorot & Bengio (AISTATS 2010) init."""
13 | init_range = np.sqrt(6.0/(shape[0]+shape[1]))
14 | initial = tf.random_uniform(shape, minval=-init_range, maxval=init_range, dtype=tf.float32)
15 | return tf.Variable(initial, name=name)
16 |
17 |
18 | def zeros(shape, name=None):
19 | """All zeros."""
20 | initial = tf.zeros(shape, dtype=tf.float32)
21 | return tf.Variable(initial, name=name)
22 |
23 |
24 | def ones(shape, name=None):
25 | """All ones."""
26 | initial = tf.ones(shape, dtype=tf.float32)
27 | return tf.Variable(initial, name=name)
--------------------------------------------------------------------------------
/Recom/KGCN/src/gcn/layers.py:
--------------------------------------------------------------------------------
1 | from gcn.inits import *
2 | import tensorflow as tf
3 |
4 |
5 | def dot(x, y, sparse=False):
6 | """Wrapper for tf.matmul (sparse vs dense)."""
7 | if sparse:
8 | res = tf.sparse_tensor_dense_matmul(x, y)
9 | else:
10 | res = tf.matmul(x, y)
11 | return res
12 |
13 |
14 | class GraphConvolution:
15 | """Graph convolution layer."""
16 | def __init__(self, input_dim, output_dim, placeholders, dropout=0., act=tf.nn.relu, bias=False,
17 | featureless=False, **kwargs):
18 | super(GraphConvolution, self).__init__(**kwargs)
19 |
20 | if dropout:
21 | self.dropout = placeholders['dropout']
22 | else:
23 | self.dropout = 0.
24 |
25 | self.act = act
26 | self.support = placeholders['support']
27 | self.featureless = featureless
28 | self.bias = bias
29 | self.vars = {}
30 |
31 | self.vars['weights'] = glorot([input_dim, output_dim])
32 | if self.bias:
33 | self.vars['bias'] = zeros([output_dim])
34 |
35 | def _call(self, inputs):
36 | x = inputs
37 |
38 | # dropout
39 | x = tf.nn.dropout(x, 1-self.dropout)
40 |
41 | # convolve
42 | for i in range(1):
43 | if not self.featureless:
44 | pre_sup = dot(x, self.vars['weights'])
45 | else:
46 | pre_sup = self.vars['weights']
47 | support = dot(self.support, pre_sup, sparse=True)
48 | output = support
49 |
50 | # bias
51 | if self.bias:
52 | output += self.vars['bias']
53 |
54 | return self.act(output)
55 |
56 |
57 | def GCN(inputs, dim, drop, A):
58 | placeholders = {'dropout':drop, 'support':A}
59 | x = GraphConvolution(dim, dim, placeholders)._call(inputs)
60 | x = GraphConvolution(dim, dim, placeholders, act=lambda x:x)._call(x)
61 | return x
62 |
--------------------------------------------------------------------------------
/Recom/KGCN/src/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/Recom/KGCN/src/utils/__init__.py
--------------------------------------------------------------------------------
/Recom/KGCN/src/utils/sp_gat.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 |
4 | from utils import layers
5 | #from models.base_gattn import BaseGAttN
6 |
7 | class SpGAT:#(BaseGAttN):
8 | def inference(inputs, nb_classes, nb_nodes, training, attn_drop, ffd_drop,
9 | bias_mat, hid_units, n_heads, activation=tf.nn.elu,
10 | residual=False):
11 | attns = []
12 | for _ in range(n_heads[0]):
13 | attns.append(layers.sp_attn_head(inputs,
14 | adj_mat=bias_mat,
15 | out_sz=hid_units[0], activation=activation, nb_nodes=nb_nodes,
16 | in_drop=ffd_drop, coef_drop=attn_drop, residual=False))
17 | h_1 = tf.concat(attns, axis=-1)
18 | for i in range(1, len(hid_units)):
19 | h_old = h_1
20 | attns = []
21 | for _ in range(n_heads[i]):
22 | attns.append(layers.sp_attn_head(h_1,
23 | adj_mat=bias_mat,
24 | out_sz=hid_units[i], activation=activation, nb_nodes=nb_nodes,
25 | in_drop=ffd_drop, coef_drop=attn_drop, residual=residual))
26 | h_1 = tf.concat(attns, axis=-1)
27 | out = []
28 | for i in range(n_heads[-1]):
29 | out.append(layers.sp_attn_head(h_1, adj_mat=bias_mat,
30 | out_sz=nb_classes, activation=lambda x: x, nb_nodes=nb_nodes,
31 | in_drop=ffd_drop, coef_drop=attn_drop, residual=False))
32 | logits = tf.add_n(out) / n_heads[-1]
33 |
34 | return logits
35 |
--------------------------------------------------------------------------------
/Recom/KGNN-LS/src/empirical_study.py:
--------------------------------------------------------------------------------
1 | import networkx as nx
2 | import numpy as np
3 | import argparse
4 |
5 |
6 | if __name__ == '__main__':
7 | np.random.seed(555)
8 | NUM = 10000
9 |
10 | parser = argparse.ArgumentParser()
11 | parser.add_argument('-d', type=str, default='music')
12 | args = parser.parse_args()
13 | DATASET = args.d
14 |
15 | kg_np = np.load('../data/' + DATASET + '/kg_final.npy')
16 | kg = nx.Graph()
17 | kg.add_edges_from([(triple[0], triple[2]) for triple in kg_np]) # construct knowledge graph
18 |
19 | rating_np = np.load('../data/' + DATASET + '/ratings_final.npy')
20 | item_history = dict()
21 | item_set = set()
22 | for record in rating_np:
23 | user = record[0]
24 | item = record[1]
25 | rating = record[2]
26 | if rating == 1:
27 | if item not in item_history:
28 | item_history[item] = set()
29 | item_history[item].add(user)
30 | item_set.add(item)
31 |
32 | item_pair_num_no_common_rater = 0
33 | item_pair_num_with_common_rater = 0
34 | sp_no_common_rater = dict()
35 | sp_with_common_rater = dict()
36 |
37 | while True:
38 | item1, item2 = np.random.choice(list(item_set), size=2, replace=False)
39 | if item_pair_num_no_common_rater == NUM and item_pair_num_with_common_rater == NUM:
40 | break
41 | if item_pair_num_no_common_rater < NUM and len(item_history[item1] & item_history[item2]) == 0:
42 | item_pair_num_no_common_rater += 1
43 | if not nx.has_path(kg, item1, item2):
44 | sp = 'infinity'
45 | else:
46 | sp = nx.shortest_path_length(kg, item1, item2)
47 | if sp not in sp_no_common_rater:
48 | sp_no_common_rater[sp] = 0
49 | sp_no_common_rater[sp] += 1
50 | print(item_pair_num_no_common_rater, item_pair_num_with_common_rater)
51 | if item_pair_num_with_common_rater < NUM and len(item_history[item1] & item_history[item2]) > 0:
52 | item_pair_num_with_common_rater += 1
53 | if not nx.has_path(kg, item1, item2):
54 | sp = 'infinity'
55 | else:
56 | sp = nx.shortest_path_length(kg, item1, item2)
57 | if sp not in sp_with_common_rater:
58 | sp_with_common_rater[sp] = 0
59 | sp_with_common_rater[sp] += 1
60 | print(item_pair_num_no_common_rater, item_pair_num_with_common_rater)
61 |
62 | print(sp_no_common_rater)
63 | print(sp_with_common_rater)
64 |
--------------------------------------------------------------------------------
/Recom/KGNN-LS/src/gcn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/Recom/KGNN-LS/src/gcn/__init__.py
--------------------------------------------------------------------------------
/Recom/KGNN-LS/src/gcn/inits.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 |
4 |
5 | def uniform(shape, scale=0.05, name=None):
6 | """Uniform init."""
7 | initial = tf.random_uniform(shape, minval=-scale, maxval=scale, dtype=tf.float32)
8 | return tf.Variable(initial, name=name)
9 |
10 |
11 | def glorot(shape, name=None):
12 | """Glorot & Bengio (AISTATS 2010) init."""
13 | init_range = np.sqrt(6.0/(shape[0]+shape[1]))
14 | initial = tf.random_uniform(shape, minval=-init_range, maxval=init_range, dtype=tf.float32)
15 | return tf.Variable(initial, name=name)
16 |
17 |
18 | def zeros(shape, name=None):
19 | """All zeros."""
20 | initial = tf.zeros(shape, dtype=tf.float32)
21 | return tf.Variable(initial, name=name)
22 |
23 |
24 | def ones(shape, name=None):
25 | """All ones."""
26 | initial = tf.ones(shape, dtype=tf.float32)
27 | return tf.Variable(initial, name=name)
--------------------------------------------------------------------------------
/Recom/KGNN-LS/src/gcn/layers.py:
--------------------------------------------------------------------------------
1 | from gcn.inits import *
2 | import tensorflow as tf
3 |
4 |
5 | def dot(x, y, sparse=False):
6 | """Wrapper for tf.matmul (sparse vs dense)."""
7 | if sparse:
8 | res = tf.sparse_tensor_dense_matmul(x, y)
9 | else:
10 | res = tf.matmul(x, y)
11 | return res
12 |
13 |
14 | class GraphConvolution:
15 | """Graph convolution layer."""
16 |
17 | def __init__(self, input_dim, output_dim, placeholders, dropout=0., act=tf.nn.relu, bias=False,
18 | featureless=False, **kwargs):
19 | super(GraphConvolution, self).__init__(**kwargs)
20 |
21 | if dropout:
22 | self.dropout = placeholders['dropout']
23 | else:
24 | self.dropout = 0.
25 |
26 | self.act = act
27 | self.support = placeholders['support']
28 | self.featureless = featureless
29 | self.bias = bias
30 | self.vars = {}
31 |
32 | self.vars['weights'] = glorot([input_dim, output_dim])
33 | if self.bias:
34 | self.vars['bias'] = zeros([output_dim])
35 |
36 | def _call(self, inputs):
37 | x = inputs
38 |
39 | # dropout
40 | x = tf.nn.dropout(x, 1-self.dropout)
41 |
42 | # convolve
43 | for i in range(1):
44 | if not self.featureless:
45 | pre_sup = dot(x, self.vars['weights'])
46 | else:
47 | pre_sup = self.vars['weights']
48 | support = dot(self.support, pre_sup, sparse=True)
49 | output = support
50 |
51 | # bias
52 | if self.bias:
53 | output += self.vars['bias']
54 |
55 | return self.act(output)
56 |
57 |
58 | def GCN(inputs, dim, drop, A, n_layer):
59 | placeholders = {'dropout': drop, 'support': A}
60 |
61 | if n_layer == 1:
62 | return GraphConvolution(dim, dim, placeholders, act=lambda x: x)._call(inputs)
63 |
64 | for _ in range(n_layer-1):
65 | x = GraphConvolution(dim, dim, placeholders)._call(inputs)
66 | x = GraphConvolution(dim, dim, placeholders, act=lambda x: x)._call(x)
67 | return x
68 |
--------------------------------------------------------------------------------
/Recom/KGNN-LS/src/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/Recom/KGNN-LS/src/utils/__init__.py
--------------------------------------------------------------------------------
/Recom/KGNN-LS/src/utils/sp_gat.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 |
4 | from utils import layers
5 | #from models.base_gattn import BaseGAttN
6 |
7 |
8 | class SpGAT: # (BaseGAttN):
9 | def inference(inputs, nb_classes, nb_nodes, training, attn_drop, ffd_drop,
10 | bias_mat, hid_units, n_heads, activation=tf.nn.elu,
11 | residual=False):
12 | attns = []
13 | if(len(n_heads) == 1):
14 | out = []
15 | for _ in range(n_heads[0]):
16 | out.append(layers.sp_attn_head(inputs,
17 | adj_mat=bias_mat,
18 | out_sz=nb_classes, activation=lambda x: x, nb_nodes=nb_nodes,
19 | in_drop=ffd_drop, coef_drop=attn_drop, residual=False))
20 | logits = tf.add_n(out) / n_heads[-1]
21 | return logits
22 |
23 | for _ in range(n_heads[0]):
24 | attns.append(layers.sp_attn_head(inputs,
25 | adj_mat=bias_mat,
26 | out_sz=hid_units[0], activation=activation, nb_nodes=nb_nodes,
27 | in_drop=ffd_drop, coef_drop=attn_drop, residual=False))
28 | h_1 = tf.concat(attns, axis=-1)
29 | for i in range(1, len(hid_units)):
30 | h_old = h_1
31 | attns = []
32 | for _ in range(n_heads[i]):
33 | attns.append(layers.sp_attn_head(h_1,
34 | adj_mat=bias_mat,
35 | out_sz=hid_units[i], activation=activation, nb_nodes=nb_nodes,
36 | in_drop=ffd_drop, coef_drop=attn_drop, residual=residual))
37 | h_1 = tf.concat(attns, axis=-1)
38 | out = []
39 | for i in range(n_heads[-1]):
40 | out.append(layers.sp_attn_head(h_1, adj_mat=bias_mat,
41 | out_sz=nb_classes, activation=lambda x: x, nb_nodes=nb_nodes,
42 | in_drop=ffd_drop, coef_drop=attn_drop, residual=False))
43 | logits = tf.add_n(out) / n_heads[-1]
44 |
45 | return logits
46 |
--------------------------------------------------------------------------------
/Recom/README.md:
--------------------------------------------------------------------------------
1 | For benchmark experiments, Simple-HGN is in baseline folder and other methods are in KGAT folder.
2 |
--------------------------------------------------------------------------------
/Recom/baseline/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Xiang Wang
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Recom/baseline/Model/utility/helper.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Aug 19, 2016
3 | @author: Xiang Wang (xiangwang@u.nus.edu)
4 | '''
5 | __author__ = "xiangwang"
6 | import os
7 | import re
8 |
9 | def txt2list(file_src):
10 | orig_file = open(file_src, "r")
11 | lines = orig_file.readlines()
12 | return lines
13 |
14 |
15 | def ensureDir(dir_path):
16 | d = os.path.dirname(dir_path)
17 | if not os.path.exists(d):
18 | os.makedirs(d)
19 |
20 |
21 | def uni2str(unicode_str):
22 | return str(unicode_str.encode('ascii', 'ignore')).replace('\n', '').strip()
23 |
24 |
25 | def hasNumbers(inputString):
26 | return bool(re.search(r'\d', inputString))
27 |
28 | def delMultiChar(inputString, chars):
29 | for ch in chars:
30 | inputString = inputString.replace(ch, '')
31 | return inputString
32 |
33 | def merge_two_dicts(x, y):
34 | z = x.copy() # start with x's keys and values
35 | z.update(y) # modifies z with y's keys and values & returns None
36 | return z
37 |
38 | def early_stopping(log_value, best_value, stopping_step, expected_order='acc', flag_step=100):
39 | # early stopping strategy:
40 | assert expected_order in ['acc', 'dec']
41 |
42 | if (expected_order == 'acc' and log_value >= best_value) or (expected_order == 'dec' and log_value <= best_value):
43 | stopping_step = 0
44 | best_value = log_value
45 | else:
46 | stopping_step += 1
47 |
48 | if stopping_step >= flag_step:
49 | print("Early stopping is trigger at step: {} log:{}".format(flag_step, log_value))
50 | should_stop = True
51 | else:
52 | should_stop = False
53 | return best_value, stopping_step, should_stop
--------------------------------------------------------------------------------
/Recom/baseline/README.md:
--------------------------------------------------------------------------------
1 | # new baseline for recommendation
2 |
3 | Adapted from [xiangwang1223/knowledge_graph_attention_network](https://github.com/xiangwang1223/knowledge_graph_attention_network/tree/master/Model).
4 |
5 | ## running environment
6 |
7 | * torch and dgl latest
8 |
9 | ## running procedure
10 |
11 | * Download Data folder from [tsinghua-cloud](https://cloud.tsinghua.edu.cn/d/2bafd2674d5d43299dfa/) or [google-drive](https://drive.google.com/drive/folders/19unGR1awscvbcGy4VT7pLqCdHHegkSx7?usp=sharing) and unzip to **current** folder
12 | * Download pretrain folder from [tsinghua-cloud](https://cloud.tsinghua.edu.cn/d/2bafd2674d5d43299dfa/) or [google-drive](https://drive.google.com/drive/folders/19unGR1awscvbcGy4VT7pLqCdHHegkSx7?usp=sharing) and unzip to **Model** folder
13 | * cd to **Model** folder and run
14 |
15 | ## run
16 |
17 | ```bash
18 | python main.py --model_type baseline --dataset movie-lens --layer_size [64,32,16] --embed_size 64 --lr 0.0001 --epoch 1000 --verbose 1 --save_flag 1 --pretrain -1 --batch_size 8192 --gpu_id 0
19 | python main.py --model_type baseline --dataset last-fm --layer_size [64,32,16] --embed_size 64 --lr 0.0001 --epoch 1000 --verbose 1 --save_flag 1 --pretrain -1 --batch_size 8192 --gpu_id 0
20 | python main.py --model_type baseline --dataset yelp2018 --layer_size [64,32,16] --embed_size 64 --lr 0.0001 --epoch 1000 --verbose 1 --save_flag 1 --pretrain -1 --batch_size 8192 --gpu_id 0
21 | python main.py --model_type baseline --dataset amazon-book --layer_size [64,32,16] --embed_size 64 --lr 0.0001 --epoch 1000 --verbose 1 --save_flag 1 --pretrain -1 --batch_size 8192 --gpu_id 0
22 | ```
23 |
--------------------------------------------------------------------------------
/TC/HGAT/data/example/test.list:
--------------------------------------------------------------------------------
1 | 9071
2 | 2560
3 | 6761
4 | 8523
5 | 6763
6 | 2564
7 | 8522
8 | 7632
--------------------------------------------------------------------------------
/TC/HGAT/data/example/train.list:
--------------------------------------------------------------------------------
1 | 5223
2 | 7630
3 | 2131
4 | 2563
5 | 2132
6 | 9073
7 | 1
8 | 5221
9 | 0
10 | 5220
11 | 2130
12 | 2561
13 | 2134
14 | 2133
15 | 2562
16 | 9072
--------------------------------------------------------------------------------
/TC/HGAT/data/example/vali.list:
--------------------------------------------------------------------------------
1 | 3
2 | 9070
3 | 6762
4 | 9074
5 | 6764
6 | 8521
7 | 4
8 | 2
9 | 7633
10 | 6760
11 | 8524
12 | 7631
13 | 5224
14 | 8520
15 | 5222
16 | 7634
--------------------------------------------------------------------------------
/TC/HGAT/model/code/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | from __future__ import division
3 |
--------------------------------------------------------------------------------
/TC/HGAT/model/code/baseline/README.md:
--------------------------------------------------------------------------------
1 | # new baseline for benchmark
2 |
3 | For message passing with relation attention version:
4 |
5 | ```
6 | python run_new.py --dataset DBLP
7 | python run_new.py --dataset ACM --feats-type 2
8 | python run_multi.py --dataset IMDB --feats-type 0
9 | ```
10 |
11 | For relational gat version:
12 |
13 | ```
14 | python run.py --dataset DBLP
15 | ```
16 |
17 | ## running environment
18 |
19 | * torch 1.6.0 cuda 10.1
20 | * dgl 0.4.3 cuda 10.1
21 | * networkx 2.3
22 | * scikit-learn 0.23.2
23 | * scipy 1.5.2
24 |
--------------------------------------------------------------------------------
/TC/HGAT/model/code/baseline/new_main.py:
--------------------------------------------------------------------------------
1 | """
2 | define model
3 | """
4 | weight_size = eval(args.layer_size)
5 | num_layers = len(weight_size) - 2
6 | heads = [args.heads] * num_layers + [1]
7 | model = myGAT(config['n_users']+config['n_entities'], args.kge_size, config['n_relations']*2+1, args.embed_size, weight_size[-2], weight_size[-1], num_layers, heads, F.elu, 0.1, 0., 0.05, False, pretrain=pretrain_data, alpha=1.0).cuda()
8 |
9 |
10 | """
11 | build feed input
12 | """
13 | edge2type = {}
14 | for i,mat in enumerate(data_generator.lap_list):
15 | for u,v in zip(*mat.nonzero()):
16 | edge2type[(u,v)] = i
17 | for i in range(data_generator.n_users+data_generator.n_entities):
18 | edge2type[(i,i)] = len(data_generator.lap_list)
19 |
20 | adjM = sum(data_generator.lap_list)
21 | adjM[adjM>1.] = 1.
22 | print(len(adjM.nonzero()[0]))
23 | g = dgl.from_scipy(adjM, eweight_name='weight')
24 | g = dgl.remove_self_loop(g) # these two lines are vital, because we want self-loop to be the last edges
25 | g = dgl.add_self_loop(g)
26 | g.edata['weight'][g.edata['weight']==0.] = 1.
27 | e_feat = []
28 | edge2id = {}
29 | for u, v in zip(*g.edges()):
30 | u = u.item()
31 | v = v.item()
32 | if u == v:
33 | break
34 | e_feat.append(edge2type[(u,v)])
35 | edge2id[(u,v)] = len(edge2id)
36 | no_self_loop = len(e_feat)
37 | for i in range(data_generator.n_users+data_generator.n_entities):
38 | e_feat.append(edge2type[(i,i)])
39 | edge2id[(i,i)] = len(edge2id)
40 | self_loop = len(e_feat) - no_self_loop
41 | must = torch.tensor([True]*self_loop)
42 | e_feat = torch.tensor(e_feat, dtype=torch.long)
43 |
44 |
45 | """
46 | call model
47 | """
48 | model(g, e_feat)
--------------------------------------------------------------------------------
/TC/HGAT/model/code/baseline/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/TC/HGAT/model/code/baseline/utils/__init__.py
--------------------------------------------------------------------------------
/TC/HGAT/model/code/baseline/utils/data.py:
--------------------------------------------------------------------------------
1 | import networkx as nx
2 | import numpy as np
3 | import scipy
4 | import pickle
5 | import scipy.sparse as sp
6 |
7 | def load_data(prefix='DBLP'):
8 | from scripts.data_loader import data_loader
9 | dl = data_loader('../../data/'+prefix)
10 | features = []
11 | for i in range(len(dl.nodes['count'])):
12 | th = dl.nodes['attr'][i]
13 | if th is None:
14 | features.append(sp.eye(dl.nodes['count'][i]))
15 | else:
16 | features.append(th)
17 | adjM = sum(dl.links['data'].values())
18 | labels = np.zeros((dl.nodes['count'][0], dl.labels_train['num_classes']), dtype=int)
19 | val_ratio = 0.2
20 | train_idx = np.nonzero(dl.labels_train['mask'])[0]
21 | np.random.shuffle(train_idx)
22 | split = int(train_idx.shape[0]*val_ratio)
23 | val_idx = train_idx[:split]
24 | train_idx = train_idx[split:]
25 | train_idx = np.sort(train_idx)
26 | val_idx = np.sort(val_idx)
27 | test_idx = np.nonzero(dl.labels_test['mask'])[0]
28 | labels[train_idx] = dl.labels_train['data'][train_idx]
29 | labels[val_idx] = dl.labels_train['data'][val_idx]
30 | if prefix != 'IMDB':
31 | labels = labels.argmax(axis=1)
32 | train_val_test_idx = {}
33 | train_val_test_idx['train_idx'] = train_idx
34 | train_val_test_idx['val_idx'] = val_idx
35 | train_val_test_idx['test_idx'] = test_idx
36 | return features,\
37 | adjM, \
38 | labels,\
39 | train_val_test_idx,\
40 | dl
41 |
--------------------------------------------------------------------------------
/TC/HGAT/model/code/baseline/utils/pytorchtools.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 |
5 | class EarlyStopping:
6 | """Early stops the training if validation loss doesn't improve after a given patience."""
7 | def __init__(self, patience, verbose=False, delta=0, save_path='checkpoint.pt'):
8 | """
9 | Args:
10 | patience (int): How long to wait after last time validation loss improved.
11 | Default: 7
12 | verbose (bool): If True, prints a message for each validation loss improvement.
13 | Default: False
14 | delta (float): Minimum change in the monitored quantity to qualify as an improvement.
15 | Default: 0
16 | """
17 | self.patience = patience
18 | self.verbose = verbose
19 | self.counter = 0
20 | self.best_score = None
21 | self.early_stop = False
22 | self.val_loss_min = np.Inf
23 | self.delta = delta
24 | self.save_path = save_path
25 |
26 | def __call__(self, val_loss, model):
27 |
28 | score = -val_loss
29 |
30 | if self.best_score is None:
31 | self.best_score = score
32 | self.save_checkpoint(val_loss, model)
33 | elif score < self.best_score - self.delta:
34 | self.counter += 1
35 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
36 | if self.counter >= self.patience:
37 | self.early_stop = True
38 | else:
39 | self.best_score = score
40 | self.save_checkpoint(val_loss, model)
41 | self.counter = 0
42 |
43 | def save_checkpoint(self, val_loss, model):
44 | """Saves model when validation loss decrease."""
45 | if self.verbose:
46 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
47 | torch.save(model.state_dict(), self.save_path)
48 | self.val_loss_min = val_loss
49 |
--------------------------------------------------------------------------------
/TC/HGAT/model/code/print_log.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os
3 |
4 |
5 | class Logger(object):
6 | def __init__(self, filename="Default.log", remove=True):
7 | self.terminal = sys.stdout
8 | if remove and os.path.exists(filename):
9 | os.remove(filename)
10 | self.log = open(filename, "a")
11 |
12 | def write(self, message):
13 | self.terminal.write(message)
14 | self.log.write(message)
15 |
16 | def flush(self):
17 | pass
18 |
19 | def change_file(self, filename="Default.log"):
20 | self.log.close()
21 | self.log = open(filename, "a")
22 |
23 |
24 | if __name__ == '__main__':
25 | sys.stdout = Logger("yourlogfilename.txt")
26 | print('content.')
--------------------------------------------------------------------------------
/TC/HGAT/model/data/example/test.map:
--------------------------------------------------------------------------------
1 | 74
2 | 81
3 | 116
4 | 114
5 | 17
6 | 32
7 | 119
8 | 30
--------------------------------------------------------------------------------
/TC/HGAT/model/data/example/train.map:
--------------------------------------------------------------------------------
1 | 38
2 | 24
3 | 29
4 | 97
5 | 121
6 | 88
7 | 27
8 | 12
9 | 98
10 | 104
11 | 59
12 | 22
13 | 53
14 | 86
15 | 83
16 | 82
--------------------------------------------------------------------------------
/TC/HGAT/model/data/example/vali.map:
--------------------------------------------------------------------------------
1 | 31
2 | 51
3 | 35
4 | 67
5 | 2
6 | 90
7 | 137
8 | 131
9 | 141
10 | 1
11 | 122
12 | 60
13 | 105
14 | 26
15 | 79
16 | 128
--------------------------------------------------------------------------------