├── .gitignore ├── LP ├── GATNE │ ├── LICENSE │ ├── README.md │ ├── scripts │ │ └── run_example.sh │ └── src │ │ ├── gen_hom_data.py │ │ ├── homGNN.py │ │ ├── main.py │ │ ├── main_pytorch.py │ │ ├── utils.py │ │ └── walk.py ├── HetGNN │ ├── README.md │ └── code │ │ ├── DeepWalk.py │ │ ├── HetGNN.py │ │ ├── application.py │ │ ├── args.py │ │ ├── data_generator.py │ │ ├── homoGNN.py │ │ ├── homo_data_split.py │ │ ├── input_data_process.py │ │ ├── link_prediction_model.py │ │ ├── node_classification_model.py │ │ ├── node_clustering_model.py │ │ ├── raw_data_process.py │ │ └── tools.py ├── MAGNN │ ├── GNN.py │ ├── LP_MAGNN_training history.txt │ ├── README.md │ ├── model │ │ ├── MAGNN_lp.py │ │ ├── MAGNN_nc.py │ │ ├── MAGNN_nc_mb.py │ │ ├── __init__.py │ │ └── base_MAGNN.py │ ├── run_LastFM.py │ ├── run_LastFM_GNN.py │ └── utils │ │ ├── __init__.py │ │ ├── data.py │ │ ├── preprocess.py │ │ ├── pytorchtools.py │ │ └── tools.py ├── RGCN-WN18 │ ├── README.md │ ├── code │ │ ├── __init__.py │ │ ├── gnn_link_predict.py │ │ ├── link_predict.py │ │ ├── model.py │ │ ├── scripts │ │ │ ├── __init__.py │ │ │ ├── data_loader.py │ │ │ └── read_file.py │ │ └── utils.py │ └── data │ │ └── wn18 │ │ ├── README │ │ ├── Wordnet3.0-LICENSE │ │ ├── entities.dict │ │ ├── relations.dict │ │ ├── test.txt │ │ ├── train.txt │ │ └── valid.txt ├── RGCN │ ├── GNN.py │ ├── HomGNN.py │ ├── README.md │ ├── gnn_link_predict.py │ ├── link_predict.py │ ├── model.py │ └── utils.py └── benchmark │ ├── .gitignore │ ├── README.md │ ├── methods │ ├── GATNE │ │ ├── LICENSE │ │ ├── README.md │ │ ├── main_pytorch.py │ │ ├── utils.py │ │ └── walk.py │ ├── GNN │ │ ├── GNN.py │ │ ├── README.md │ │ ├── homoGNN.py │ │ └── utils.py │ ├── HGT │ │ ├── README.md │ │ ├── data_loader.py │ │ ├── link_predict.py │ │ ├── load_data_dgl.py.ipynb │ │ ├── model.py │ │ ├── run_LastFM.sh │ │ ├── run_LastFM_magnn.sh │ │ ├── run_PubMed.sh │ │ ├── run_amazon.sh │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── data.py │ │ │ ├── preprocess.py │ │ │ ├── pytorchtools.py │ │ │ └── tools.py │ ├── HetGNN │ │ ├── DeepWalk.py │ │ ├── README.md │ │ ├── args.py │ │ ├── data_generator.py │ │ ├── do_LP.py │ │ ├── main.py │ │ └── tools.py │ ├── MAGNN │ │ ├── .gitignore │ │ ├── README.md │ │ ├── model │ │ │ ├── MAGNN_lp.py │ │ │ ├── MAGNN_nc.py │ │ │ ├── MAGNN_nc_mb.py │ │ │ ├── __init__.py │ │ │ └── base_MAGNN.py │ │ ├── run_DBLP.py │ │ ├── run_IMDB.py │ │ ├── run_LastFM.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── data.py │ │ │ ├── preprocess.py │ │ │ ├── pytorchtools.py │ │ │ └── tools.py │ ├── MAGNN_ini │ │ ├── .gitignore │ │ ├── README.md │ │ ├── model │ │ │ ├── MAGNN_lp.py │ │ │ ├── MAGNN_nc.py │ │ │ ├── MAGNN_nc_mb.py │ │ │ ├── __init__.py │ │ │ └── base_MAGNN.py │ │ ├── preprocess_DBLP.ipynb │ │ ├── preprocess_IMDB.ipynb │ │ ├── preprocess_LastFM.ipynb │ │ ├── preprocess_LastFM_magnn.ipynb │ │ ├── run_DBLP.py │ │ ├── run_IMDB.py │ │ ├── run_LastFM.py │ │ ├── test_LastFM.py │ │ ├── trans_format.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── data.py │ │ │ ├── preprocess.py │ │ │ ├── pytorchtools.py │ │ │ └── tools.py │ ├── RGCN │ │ ├── README.md │ │ ├── link_predict.py │ │ ├── model.py │ │ └── utils.py │ └── baseline │ │ ├── GNN.py │ │ ├── README.md │ │ ├── conv.py │ │ ├── run_dist.py │ │ ├── run_new.py │ │ └── utils │ │ ├── __init__.py │ │ ├── data.py │ │ ├── preprocess.py │ │ ├── pytorchtools.py │ │ └── tools.py │ ├── scripts │ ├── LP_AUC_MRR.py │ ├── README.md │ ├── __init__.py │ └── data_loader.py │ └── test.py ├── NC ├── GTN │ ├── .gitignore │ ├── Data_Preprocessing.ipynb │ ├── GNN.py │ ├── GTN.png │ ├── README.md │ ├── gcn.py │ ├── get_acm_data.py │ ├── inits.py │ ├── main.py │ ├── main_gnn.py │ ├── main_sparse.py │ ├── messagepassing.py │ ├── model.py │ ├── model_sparse.py │ └── utils.py ├── HAN │ ├── GNN.py │ ├── README.md │ ├── main.py │ ├── model.py │ ├── model_hetero.py │ └── utils.py ├── HetGNN │ ├── README.md │ └── code │ │ ├── DeepWalk.py │ │ ├── HetGNN.py │ │ ├── application.py │ │ ├── args.py │ │ ├── data_generator.py │ │ ├── homoGNN.py │ │ ├── input_data_process.py │ │ ├── link_prediction_model.py │ │ ├── node_classification_model.py │ │ ├── node_clustering_model.py │ │ ├── raw_data_process.py │ │ └── tools.py ├── MAGNN │ ├── .gitignore │ ├── GNN.py │ ├── README.md │ ├── get_lastfm_data.py │ ├── get_lastfm_data_same.py │ ├── model │ │ ├── MAGNN_lp.py │ │ ├── MAGNN_nc.py │ │ ├── MAGNN_nc_mb.py │ │ ├── __init__.py │ │ └── base_MAGNN.py │ ├── preprocess_DBLP.ipynb │ ├── preprocess_IMDB.ipynb │ ├── preprocess_LastFM.ipynb │ ├── run_DBLP.py │ ├── run_DBLP_gnn.py │ ├── run_IMDB.py │ ├── run_LastFM.py │ └── utils │ │ ├── __init__.py │ │ ├── data.py │ │ ├── preprocess.py │ │ ├── pytorchtools.py │ │ ├── tools.py │ │ └── transform_2_com.py ├── RGCN │ ├── README.md │ ├── entity_classify.py │ └── model.py ├── RSHN │ ├── README.md │ ├── build_coarsened_line_graph │ │ ├── relation_graph.py │ │ └── utils.py │ ├── model │ │ ├── GNN.py │ │ ├── RSHN.py │ │ └── RSHN_gnn.py │ └── torch_geometric │ │ ├── __init__.py │ │ ├── data │ │ ├── __init__.py │ │ ├── batch.py │ │ ├── data.py │ │ ├── dataloader.py │ │ ├── dataset.py │ │ ├── download.py │ │ ├── extract.py │ │ ├── in_memory_dataset.py │ │ └── makedirs.py │ │ ├── datasets │ │ ├── __init__.py │ │ └── entities.py │ │ ├── nn │ │ ├── __init__.py │ │ ├── conv │ │ │ ├── __init__.py │ │ │ ├── message_passing.py │ │ │ ├── nn_conv.py │ │ │ └── relation_conv.py │ │ └── inits.py │ │ └── utils │ │ ├── __init__.py │ │ ├── convert.py │ │ ├── degree.py │ │ ├── grid.py │ │ ├── isolated.py │ │ ├── loop.py │ │ ├── metric.py │ │ ├── normalized_cut.py │ │ ├── num_nodes.py │ │ ├── one_hot.py │ │ ├── repeat.py │ │ ├── scatter.py │ │ ├── softmax.py │ │ ├── sparse.py │ │ ├── to_batch.py │ │ └── undirected.py └── benchmark │ ├── .gitignore │ ├── README.md │ ├── methods │ ├── GNN │ │ ├── GNN.py │ │ ├── README.md │ │ ├── run.py │ │ ├── run_multi.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── data.py │ │ │ └── pytorchtools.py │ ├── GTN │ │ ├── README.md │ │ ├── gcn.py │ │ ├── inits.py │ │ ├── main.py │ │ ├── main_multi.py │ │ ├── main_sparse.py │ │ ├── messagepassing.py │ │ ├── model.py │ │ ├── model_sparse.py │ │ └── utils.py │ ├── HAN │ │ ├── README.md │ │ ├── main.py │ │ ├── main_multi.py │ │ ├── model_hetero.py │ │ ├── model_hetero_multi.py │ │ └── utils.py │ ├── HGT │ │ ├── .gitignore │ │ ├── GNN.py │ │ ├── README.md │ │ ├── data_loader.py │ │ ├── gpu_memory_log.py │ │ ├── load_data_dgl.py.ipynb │ │ ├── model.py │ │ ├── run_acm.sh │ │ ├── run_dblp.py │ │ ├── run_dblp.sh │ │ ├── run_freebash.sh │ │ ├── run_hgt.py │ │ ├── run_imdb.sh │ │ ├── train_hgt.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── data.py │ │ │ ├── preprocess.py │ │ │ ├── pytorchtools.py │ │ │ └── tools.py │ ├── HetGNN │ │ ├── README.md │ │ └── code │ │ │ ├── ACM │ │ │ ├── DeepWalk.py │ │ │ ├── args.py │ │ │ ├── data_generator.py │ │ │ ├── do_class.py │ │ │ ├── main.py │ │ │ └── tools.py │ │ │ ├── DBLP │ │ │ ├── DeepWalk.py │ │ │ ├── args.py │ │ │ ├── data_generator.py │ │ │ ├── do_class.py │ │ │ ├── main.py │ │ │ └── tools.py │ │ │ └── IMDB │ │ │ ├── DeepWalk.py │ │ │ ├── args.py │ │ │ ├── data_generator.py │ │ │ ├── do_class.py │ │ │ ├── main.py │ │ │ └── tools.py │ ├── HetSANN │ │ ├── HetSANN_MRV │ │ │ ├── execute_sparse.py │ │ │ ├── find_meta.py │ │ │ ├── models │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-36.pyc │ │ │ │ │ ├── base_gattn.cpython-36.pyc │ │ │ │ │ └── sp_hgat.cpython-36.pyc │ │ │ │ ├── base_gattn.py │ │ │ │ └── sp_hgat.py │ │ │ ├── scripts │ │ │ │ └── data_loader.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-36.pyc │ │ │ │ ├── layers.cpython-36.pyc │ │ │ │ └── process.cpython-36.pyc │ │ │ │ ├── layers.py │ │ │ │ └── process.py │ │ ├── LICENSE │ │ ├── README.md │ │ └── fig │ │ │ ├── attention.png │ │ │ └── model.png │ ├── MAGNN │ │ ├── .gitignore │ │ ├── README.md │ │ ├── meta1.json │ │ ├── meta2.json │ │ ├── model │ │ │ ├── MAGNN_lp.py │ │ │ ├── MAGNN_nc.py │ │ │ ├── MAGNN_nc_mb.py │ │ │ ├── __init__.py │ │ │ └── base_MAGNN.py │ │ ├── preprocess_DBLP.ipynb │ │ ├── preprocess_IMDB.ipynb │ │ ├── preprocess_LastFM.ipynb │ │ ├── run_ACM.py │ │ ├── run_DBLP.py │ │ ├── run_Freebase.py │ │ ├── run_IMDB.py │ │ ├── run_IMDB_new.py │ │ ├── run_LastFM.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── data.py │ │ │ ├── preprocess.py │ │ │ ├── pytorchtools.py │ │ │ └── tools.py │ ├── RGCN │ │ ├── README.md │ │ ├── entity_classify.py │ │ ├── model.py │ │ └── scripts │ │ │ ├── __init__.py │ │ │ └── data_loader.py │ ├── RSHN │ │ ├── README.md │ │ ├── RSHN.py │ │ ├── build_coarsened_line_graph │ │ │ ├── relation_graph.py │ │ │ └── utils.py │ │ └── torch_geometric │ │ │ ├── __init__.py │ │ │ ├── data │ │ │ ├── __init__.py │ │ │ ├── batch.py │ │ │ ├── data.py │ │ │ ├── dataloader.py │ │ │ ├── dataset.py │ │ │ ├── download.py │ │ │ ├── extract.py │ │ │ ├── in_memory_dataset.py │ │ │ └── makedirs.py │ │ │ ├── datasets │ │ │ ├── __init__.py │ │ │ └── entities.py │ │ │ ├── nn │ │ │ ├── __init__.py │ │ │ ├── conv │ │ │ │ ├── __init__.py │ │ │ │ ├── message_passing.py │ │ │ │ ├── nn_conv.py │ │ │ │ └── relation_conv.py │ │ │ └── inits.py │ │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── convert.py │ │ │ ├── degree.py │ │ │ ├── grid.py │ │ │ ├── isolated.py │ │ │ ├── loop.py │ │ │ ├── metric.py │ │ │ ├── normalized_cut.py │ │ │ ├── num_nodes.py │ │ │ ├── one_hot.py │ │ │ ├── repeat.py │ │ │ ├── scatter.py │ │ │ ├── softmax.py │ │ │ ├── sparse.py │ │ │ ├── to_batch.py │ │ │ └── undirected.py │ └── baseline │ │ ├── GNN.py │ │ ├── README.md │ │ ├── conv.py │ │ ├── run.py │ │ ├── run_multi.py │ │ ├── run_new.py │ │ └── utils │ │ ├── __init__.py │ │ ├── data.py │ │ ├── preprocess.py │ │ ├── pytorchtools.py │ │ └── tools.py │ ├── scripts │ ├── NC_F1.py │ ├── README.md │ ├── __init__.py │ └── data_loader.py │ └── test.py ├── README.md ├── Recom ├── KGAT │ ├── LICENSE │ ├── Log │ │ └── README.md │ ├── Model │ │ ├── BPRMF.py │ │ ├── CFKG.py │ │ ├── CKE.py │ │ ├── KGAT.py │ │ ├── Main.py │ │ ├── NFM.py │ │ ├── README.md │ │ └── utility │ │ │ ├── README.md │ │ │ ├── batch_test.py │ │ │ ├── helper.py │ │ │ ├── load_data.py │ │ │ ├── loader_bprmf.py │ │ │ ├── loader_cfkg.py │ │ │ ├── loader_cke.py │ │ │ ├── loader_kgat.py │ │ │ ├── loader_nfm.py │ │ │ ├── metrics.py │ │ │ └── parser.py │ └── README.md ├── KGCN │ ├── .gitignore │ ├── LICENSE │ ├── README.md │ └── src │ │ ├── aggregators.py │ │ ├── data_loader.py │ │ ├── gcn │ │ ├── __init__.py │ │ ├── inits.py │ │ └── layers.py │ │ ├── main.py │ │ ├── model.py │ │ ├── preprocess.py │ │ ├── train.py │ │ └── utils │ │ ├── __init__.py │ │ ├── layers.py │ │ └── sp_gat.py ├── KGNN-LS │ ├── README.md │ └── src │ │ ├── aggregators.py │ │ ├── data_loader.py │ │ ├── empirical_study.py │ │ ├── gcn │ │ ├── __init__.py │ │ ├── inits.py │ │ └── layers.py │ │ ├── main.py │ │ ├── model.py │ │ ├── preprocess.py │ │ ├── train.py │ │ └── utils │ │ ├── __init__.py │ │ ├── layers.py │ │ └── sp_gat.py ├── README.md └── baseline │ ├── LICENSE │ ├── Model │ ├── GNN.py │ ├── conv.py │ ├── main.py │ └── utility │ │ ├── batch_test.py │ │ ├── helper.py │ │ ├── load_data.py │ │ ├── loader_kgat.py │ │ ├── metrics.py │ │ └── parser.py │ └── README.md └── TC └── HGAT ├── README.md ├── build_data.py ├── build_features.py ├── build_network.py ├── data ├── example │ ├── example.txt │ ├── example2entity.txt │ ├── test.list │ ├── train.list │ └── vali.list └── stopwords_en.txt ├── model ├── code │ ├── __init__.py │ ├── base.py │ ├── baseline │ │ ├── GNN.py │ │ ├── README.md │ │ ├── conv.py │ │ ├── new_main.py │ │ ├── run.py │ │ ├── run_multi.py │ │ ├── run_new.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── data.py │ │ │ ├── preprocess.py │ │ │ ├── pytorchtools.py │ │ │ └── tools.py │ ├── layers.py │ ├── models.py │ ├── print_log.py │ ├── train.py │ └── utils.py └── data │ └── example │ ├── example.cites │ ├── example.content.entity │ ├── example.content.text │ ├── example.content.topic │ ├── mapindex.txt │ ├── test.map │ ├── train.map │ └── vali.map ├── tagMe.py └── utils.py /LP/GATNE/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Yukuo Cen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /LP/GATNE/scripts/run_example.sh: -------------------------------------------------------------------------------- 1 | #python src/main.py --input data/example 2 | python src/main_pytorch.py --input data/amazon --feature data/amazon/feature.txt 3 | 4 | read str -------------------------------------------------------------------------------- /LP/GATNE/src/walk.py: -------------------------------------------------------------------------------- 1 | import random 2 | import multiprocessing 3 | 4 | from tqdm import tqdm 5 | 6 | def walk(args): 7 | walk_length, start, schema = args 8 | # Simulate a random walk starting from start node. 9 | rand = random.Random() 10 | 11 | if schema: 12 | schema_items = schema.split('-') 13 | assert schema_items[0] == schema_items[-1] 14 | 15 | walk = [start] 16 | while len(walk) < walk_length: 17 | cur = walk[-1] 18 | candidates = [] 19 | for node in G[cur]: 20 | if schema == '' or node_type[node] == schema_items[len(walk) % (len(schema_items) - 1)]: 21 | candidates.append(node) 22 | if candidates: 23 | walk.append(rand.choice(candidates)) 24 | else: 25 | break 26 | return [str(node) for node in walk] 27 | 28 | def initializer(init_G, init_node_type): 29 | global G 30 | G = init_G 31 | global node_type 32 | node_type = init_node_type 33 | 34 | class RWGraph(): 35 | def __init__(self, nx_G, node_type_arr=None, num_workers=16): 36 | self.G = nx_G 37 | self.node_type = node_type_arr 38 | self.num_workers = num_workers 39 | 40 | def node_list(self, nodes, num_walks): 41 | for loop in range(num_walks): 42 | for node in nodes: 43 | yield node 44 | 45 | def simulate_walks(self, num_walks, walk_length, schema=None): 46 | all_walks = [] 47 | nodes = list(self.G.keys()) 48 | random.shuffle(nodes) 49 | 50 | if schema is None: 51 | with multiprocessing.Pool(self.num_workers, initializer=initializer, initargs=(self.G, self.node_type)) as pool: 52 | all_walks = list(pool.imap(walk, ((walk_length, node, '') for node in tqdm(self.node_list(nodes, num_walks))), chunksize=256)) 53 | else: 54 | schema_list = schema.split(',') 55 | for schema_iter in schema_list: 56 | with multiprocessing.Pool(self.num_workers, initializer=initializer, initargs=(self.G, self.node_type)) as pool: 57 | walks = list(pool.imap(walk, ((walk_length, node, schema_iter) for node in tqdm(self.node_list(nodes, num_walks)) if schema_iter.split('-')[0] == self.node_type[node]), chunksize=512)) 58 | all_walks.extend(walks) 59 | 60 | return all_walks 61 | -------------------------------------------------------------------------------- /LP/HetGNN/code/DeepWalk.py: -------------------------------------------------------------------------------- 1 | import string; 2 | import re; 3 | import random 4 | import math 5 | import numpy as np 6 | from gensim.models import Word2Vec 7 | from itertools import * 8 | dimen = 128 9 | window = 5 10 | 11 | 12 | def read_random_walk_corpus(): 13 | walks=[] 14 | #inputfile = open("../data/academic_test/meta_random_walk_APVPA_test.txt","r") 15 | inputfile = open("../data/academic_test/het_random_walk_test.txt", "r") 16 | for line in inputfile: 17 | path = [] 18 | node_list=re.split(' ',line) 19 | for i in range(len(node_list)): 20 | path.append(node_list[i]) 21 | walks.append(path) 22 | inputfile.close() 23 | return walks 24 | 25 | 26 | walk_corpus = read_random_walk_corpus() 27 | model = Word2Vec(walk_corpus, size = dimen, window = window, min_count = 0, workers = 2, sg = 1, hs = 0, negative = 5) 28 | 29 | 30 | print("Output...") 31 | #model.wv.save_word2vec_format("../data/node_embedding.txt") 32 | model.wv.save_word2vec_format("../data/academic_test/node_net_embedding.txt") 33 | 34 | -------------------------------------------------------------------------------- /LP/HetGNN/code/args.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | def read_args(): 4 | parser = argparse.ArgumentParser() 5 | parser.add_argument('--data_path', type = str, default = '../data/academic_test/', 6 | help='path to data') 7 | parser.add_argument('--model_path', type = str, default = '../model_save/', 8 | help='path to save model') 9 | parser.add_argument('--A_n', type = int, default = 28646, 10 | help = 'number of author node') 11 | parser.add_argument('--P_n', type = int, default = 21044, 12 | help = 'number of paper node') 13 | parser.add_argument('--V_n', type = int, default = 18, 14 | help = 'number of venue node') 15 | parser.add_argument('--in_f_d', type = int, default = 128, 16 | help = 'input feature dimension') 17 | parser.add_argument('--embed_d', type = int, default = 128, 18 | help = 'embedding dimension') 19 | parser.add_argument('--lr', type = int, default = 0.001, 20 | help = 'learning rate') 21 | parser.add_argument('--batch_s', type = int, default = 20000, 22 | help = 'batch size') 23 | parser.add_argument('--mini_batch_s', type = int, default = 200, 24 | help = 'mini batch size') 25 | parser.add_argument('--train_iter_n', type = int, default = 50, 26 | help = 'max number of training iteration') 27 | parser.add_argument('--walk_n', type = int, default = 10, 28 | help='number of walk per root node') 29 | parser.add_argument('--walk_L', type = int, default = 30, 30 | help='length of each walk') 31 | parser.add_argument('--window', type = int, default = 5, 32 | help='window size for relation extration') 33 | parser.add_argument("--random_seed", default = 10, type = int) 34 | parser.add_argument('--train_test_label', type= int, default = 0, 35 | help='train/test label: 0 - train, 1 - test, 2 - code test/generate negative ids for evaluation') 36 | parser.add_argument('--save_model_freq', type = float, default = 2, 37 | help = 'number of iterations to save model') 38 | parser.add_argument("--cuda", default = 0, type = int) 39 | parser.add_argument("--checkpoint", default = '', type=str) 40 | 41 | args = parser.parse_args() 42 | 43 | return args 44 | -------------------------------------------------------------------------------- /LP/HetGNN/code/node_classification_model.py: -------------------------------------------------------------------------------- 1 | import random 2 | import string 3 | import re 4 | import numpy 5 | from itertools import * 6 | import sklearn 7 | from sklearn import linear_model 8 | import sklearn.metrics as Metric 9 | import csv 10 | import argparse 11 | 12 | parser = argparse.ArgumentParser(description = 'link prediction task') 13 | parser.add_argument('--A_n', type = int, default = 28646, 14 | help = 'number of author node') 15 | parser.add_argument('--P_n', type = int, default = 21044, 16 | help = 'number of paper node') 17 | parser.add_argument('--V_n', type = int, default = 18, 18 | help = 'number of venue node') 19 | parser.add_argument('--data_path', type = str, default = '../data/academic_test/', 20 | help='path to data') 21 | parser.add_argument('--embed_d', type = int, default = 128, 22 | help = 'embedding dimension') 23 | 24 | args = parser.parse_args() 25 | print(args) 26 | 27 | 28 | def load_data(data_file_name, n_features, n_samples): 29 | with open(data_file_name) as f: 30 | data_file = csv.reader(f) 31 | data = numpy.empty((n_samples, n_features)) 32 | for i, d in enumerate(data_file): 33 | data[i] = numpy.asarray(d[:], dtype=numpy.float) 34 | f.close () 35 | 36 | return data 37 | 38 | 39 | def model(train_num, test_num): 40 | train_data_f = args.data_path + "train_class_feature.txt" 41 | train_data = load_data(train_data_f, args.embed_d + 2, train_num) 42 | train_features = train_data.astype(numpy.float32)[:,2:-1] 43 | train_target = train_data.astype(numpy.float32)[:,1] 44 | 45 | #print(train_target[1]) 46 | learner = linear_model.LogisticRegression() 47 | learner.fit(train_features, train_target) 48 | train_features = None 49 | train_target = None 50 | 51 | print("training finish!") 52 | 53 | test_data_f = args.data_path + "test_class_feature.txt" 54 | test_data = load_data(test_data_f, args.embed_d + 2, test_num) 55 | test_id = test_data.astype(numpy.int32)[:,0] 56 | test_features = test_data.astype(numpy.float32)[:,2:-1] 57 | test_target = test_data.astype(numpy.float32)[:,1] 58 | test_predict = learner.predict(test_features) 59 | test_features = None 60 | 61 | print("test prediction finish!") 62 | 63 | output_f = open(args.data_path + "NC_prediction.txt", "w") 64 | for i in range(len(test_predict)): 65 | output_f.write('%d,%lf\n'%(test_id[i],test_predict[i])) 66 | output_f.close() 67 | 68 | print ("MacroF1: ") 69 | print (sklearn.metrics.f1_score(test_target,test_predict,average='macro')) 70 | 71 | print ("MicroF1: ") 72 | print (sklearn.metrics.f1_score(test_target,test_predict,average='micro')) 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /LP/HetGNN/code/node_clustering_model.py: -------------------------------------------------------------------------------- 1 | import random 2 | import string 3 | import re 4 | import numpy 5 | from itertools import * 6 | import sklearn 7 | from sklearn.cluster import KMeans 8 | from sklearn.metrics.cluster import normalized_mutual_info_score 9 | from sklearn.metrics.cluster import adjusted_rand_score 10 | import csv 11 | import argparse 12 | 13 | parser = argparse.ArgumentParser(description = 'link prediction task') 14 | parser.add_argument('--C_n', type = int, default = 4, 15 | help = 'number of node class label') 16 | parser.add_argument('--data_path', type = str, default = '../data/academic_test/', 17 | help='path to data') 18 | parser.add_argument('--embed_d', type = int, default = 128, 19 | help = 'embedding dimension') 20 | 21 | args = parser.parse_args() 22 | print(args) 23 | 24 | 25 | def model(cluster_id_num): 26 | cluter_embed = numpy.around(numpy.random.normal(0, 0.01, [cluster_id_num, args.embed_d]), 4) 27 | cluster_embed_f = open(args.data_path + "cluster_embed.txt", "r") 28 | for line in cluster_embed_f: 29 | line=line.strip() 30 | author_index=int(re.split(' ',line)[0]) 31 | embed_list=re.split(' ',line)[1:] 32 | for i in range(len(embed_list)): 33 | cluter_embed[author_index][i] = embed_list[i] 34 | 35 | kmeans = KMeans(n_clusters = args.C_n, random_state = 0).fit(cluter_embed) 36 | 37 | cluster_id_list = [0] * cluster_id_num 38 | cluster_id_f = open(args.data_path + "cluster.txt", "r") 39 | for line in cluster_id_f: 40 | line = line.strip() 41 | author_index = int(re.split(',',line)[0]) 42 | cluster_id = int(re.split(',',line)[1]) 43 | cluster_id_list[author_index] = cluster_id 44 | 45 | #NMI 46 | print ("NMI: " + str(normalized_mutual_info_score(kmeans.labels_, cluster_id_list))) 47 | print ("ARI: " + str(adjusted_rand_score(kmeans.labels_, cluster_id_list))) 48 | 49 | -------------------------------------------------------------------------------- /LP/MAGNN/model/__init__.py: -------------------------------------------------------------------------------- 1 | from model.MAGNN_nc import MAGNN_nc 2 | from model.MAGNN_nc_mb import MAGNN_nc_mb 3 | from model.MAGNN_lp import MAGNN_lp 4 | -------------------------------------------------------------------------------- /LP/MAGNN/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/LP/MAGNN/utils/__init__.py -------------------------------------------------------------------------------- /LP/MAGNN/utils/pytorchtools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | class EarlyStopping: 6 | """Early stops the training if validation loss doesn't improve after a given patience.""" 7 | def __init__(self, patience, verbose=False, delta=0, save_path='checkpoint.pt'): 8 | """ 9 | Args: 10 | patience (int): How long to wait after last time validation loss improved. 11 | Default: 7 12 | verbose (bool): If True, prints a message for each validation loss improvement. 13 | Default: False 14 | delta (float): Minimum change in the monitored quantity to qualify as an improvement. 15 | Default: 0 16 | """ 17 | self.patience = patience 18 | self.verbose = verbose 19 | self.counter = 0 20 | self.best_score = None 21 | self.early_stop = False 22 | self.val_loss_min = np.Inf 23 | self.delta = delta 24 | self.save_path = save_path 25 | 26 | def __call__(self, val_loss, model): 27 | 28 | score = -val_loss 29 | 30 | if self.best_score is None: 31 | self.best_score = score 32 | self.save_checkpoint(val_loss, model) 33 | elif score < self.best_score - self.delta: 34 | self.counter += 1 35 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 36 | if self.counter >= self.patience: 37 | self.early_stop = True 38 | else: 39 | self.best_score = score 40 | self.save_checkpoint(val_loss, model) 41 | self.counter = 0 42 | 43 | def save_checkpoint(self, val_loss, model): 44 | """Saves model when validation loss decrease.""" 45 | if self.verbose: 46 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') 47 | torch.save(model.state_dict(), self.save_path) 48 | self.val_loss_min = val_loss 49 | -------------------------------------------------------------------------------- /LP/RGCN-WN18/README.md: -------------------------------------------------------------------------------- 1 | python link_predict.py --dataset=wn18 --gpu=1 --hidden=100 2 | python gnn_link_predict.py --dataset=wn18 --model=GCN --gpu=1 3 | python gnn_link_predict.py --dataset=wn18 --model=GAT --gpu=1 -------------------------------------------------------------------------------- /LP/RGCN-WN18/code/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/LP/RGCN-WN18/code/__init__.py -------------------------------------------------------------------------------- /LP/RGCN-WN18/code/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/LP/RGCN-WN18/code/scripts/__init__.py -------------------------------------------------------------------------------- /LP/RGCN-WN18/code/scripts/read_file.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # Dictionary I/O: 4 | 5 | def read_dictionary(filename, id_lookup=True): 6 | d = {} 7 | for line in open(filename, 'r+'): 8 | line = line.strip().split('\t') 9 | 10 | if id_lookup: 11 | d[int(line[0])] = line[1] 12 | else: 13 | d[line[1]] = int(line[0]) 14 | 15 | return d 16 | 17 | # Triplet file I/O: 18 | 19 | def read_triplets(filename): 20 | for line in open(filename, 'r+'): 21 | processed_line = line.strip().split('\t') 22 | yield processed_line 23 | 24 | def read_triplet_file(filename): 25 | return list(read_triplets(filename)) 26 | 27 | def read_triplets_as_list(filename, entity_dict, relation_dict): 28 | entity_dict = read_dictionary(entity_dict, id_lookup=False) 29 | relation_dict = read_dictionary(relation_dict, id_lookup=False) 30 | 31 | l = [] 32 | for triplet in read_triplets(filename): 33 | entity_1 = entity_dict[triplet[0]] 34 | relation = relation_dict[triplet[1]] 35 | entity_2 = entity_dict[triplet[2]] 36 | 37 | l.append([entity_1, relation, entity_2]) 38 | 39 | return l 40 | 41 | 42 | #print(read_triplets_as_list('data/FB15k/freebase_mtr100_mte100-train.txt', 'data/FB15k/entities.dict', 'data/FB15k/relations.dict')) 43 | -------------------------------------------------------------------------------- /LP/RGCN-WN18/data/wn18/README: -------------------------------------------------------------------------------- 1 | ---------------------------------------------- 2 | -- WORDNET TENSOR DATA -- A. Bordes -- 2013 -- 3 | ---------------------------------------------- 4 | 5 | ------------------ 6 | OUTLINE: 7 | 1. Introduction 8 | 2. Content 9 | 3. Data Format 10 | 4. Data Statistics 11 | 5. How to Cite 12 | 6. License 13 | 7. Contact 14 | ------------------- 15 | 16 | 17 | 1. INTRODUCTION: 18 | 19 | This WORDNET TENSOR DATA consists of a collection of triplets (synset, relation_type, 20 | triplet) extracted from WordNet 3.0 (http://wordnet.princeton.edu). This data set can 21 | be seen as a 3-mode tensor depicting ternary relationships between synsets. 22 | 23 | 24 | 2. CONTENT: 25 | 26 | The data archive contains 6 files: 27 | - README 3K 28 | - wordnet-mlj12-definitions.txt 4,2M 29 | - wordnet-mlj12-train.txt 4,5M 30 | - wordnet-mlj12-valid.txt 165K 31 | - wordnet-mlj12-test.txt 165K 32 | 33 | The 3 files wordnet-mlj12-*.txt contain the triplets (training, validation 34 | and test sets), while the file wordnet-mlj12-definitions.txt lists the WordNet 35 | synsets definitions. 36 | 37 | 38 | 3. DATA FORMAT 39 | 40 | The definitions file (wordnet-mlj12-definitions.txt) contains one synset 41 | per line with the following format: synset_id (a 8-digit unique identifier) 42 | intelligible name (word+POS_tag+sense_index), definition. The previous 3 43 | pieces of information are separated by a tab ('\t'). 44 | 45 | All wordnet-mlj12-*.txt files contain one triplet per line, with 2 synset_ids 46 | and relation type identifier in a tab separated format. The first element is the 47 | synset_id of the left hand side of the relation triple, the third one is the 48 | synset_id of the right hand side and the second element is the name of the type 49 | of relations between them. 50 | 51 | 52 | 4. DATA STATISTICS 53 | 54 | There are 40,943 synsets and 18 relation types among them. The training set contains 55 | 141,442 triplets, the validation set 5,000 and the test set 5,000. 56 | 57 | All triplets are unique and we made sure that all synsets appearing in 58 | the validation or test sets were occurring in the training set. 59 | 60 | 5. HOW TO CITE 61 | 62 | When using this data, one should cite the original paper: 63 | @article{bordes-mlj13, 64 | title = {A Semantic Matching Energy Function for Learning with Multi-relational Data}, 65 | author = {Antoine Bordes and Xavier Glorot and Jason Weston and Yoshua Bengio}, 66 | journal={Machine Learning}, 67 | publisher={Springer}, 68 | year={2013}, 69 | note={to appear} 70 | } 71 | 72 | One should also point at the project page with either the long URL: 73 | https://www.hds.utc.fr/everest/doku.php?id=en:smemlj12 , or the short 74 | one: http://goo.gl/bHWsK . 75 | 76 | 6. LICENSE: 77 | 78 | WordNet data follows the attach license agreement. 79 | 80 | 7. CONTACT 81 | 82 | For all remarks or questions please contact Antoine Bordes: antoine 83 | (dot) bordes (at) utc (dot) fr . 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /LP/RGCN-WN18/data/wn18/Wordnet3.0-LICENSE: -------------------------------------------------------------------------------- 1 | WordNet Release 3.0 This software and database is being provided to you, the LICENSEE, by Princeton University under the following license. By obtaining, using and/or copying this software and database, you agree that you have read, understood, and will comply with these terms and conditions.: Permission to use, copy, modify and distribute this software and database and its documentation for any purpose and without fee or royalty is hereby granted, provided that you agree to comply with the following copyright notice and statements, including the disclaimer, and that the same appear on ALL copies of the software, database and documentation, including modifications that you make for internal use or for distribution. WordNet 3.0 Copyright 2006 by Princeton University. All rights reserved. THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND PRINCETON UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS. The name of Princeton University or Princeton may not be used in advertising or publicity pertaining to distribution of the software and/or database. Title to copyright in this software, database and any associated documentation shall at all times remain with Princeton University and LICENSEE agrees to preserve same. -------------------------------------------------------------------------------- /LP/RGCN-WN18/data/wn18/relations.dict: -------------------------------------------------------------------------------- 1 | 0 _member_of_domain_topic 2 | 1 _has_part 3 | 2 _synset_domain_usage_of 4 | 3 _synset_domain_region_of 5 | 4 _member_of_domain_region 6 | 5 _member_meronym 7 | 6 _part_of 8 | 7 _verb_group 9 | 8 _derivationally_related_form 10 | 9 _instance_hypernym 11 | 10 _instance_hyponym 12 | 11 _similar_to 13 | 12 _hyponym 14 | 13 _also_see 15 | 14 _member_of_domain_usage 16 | 15 _synset_domain_topic_of 17 | 16 _member_holonym 18 | 17 _hypernym 19 | -------------------------------------------------------------------------------- /LP/RGCN/README.md: -------------------------------------------------------------------------------- 1 | # RGCN code 2 | 3 | Adapted from [DGL example](https://github.com/dmlc/dgl/tree/master/examples/pytorch/rgcn). 4 | 5 | We replace the Benchmark. 6 | 7 | ## running environment 8 | 9 | * Python 3.7 10 | * torch 1.7.0 11 | * dgl 0.5.2 12 | 13 | ## running procedure 14 | 15 | * Dataset will be downloaded automatically at ~/.dgl/. 16 | * or you can download data from [tsinghua-cloud](https://cloud.tsinghua.edu.cn/d/8b9644cfa8344f26878c/) or [google-drive](https://drive.google.com/drive/folders/13o5dYuvpZWzgeUPVTLLtpYHAGss2sk_x?usp=sharing) 17 | * unzip all zip files 18 | * move them to ./data/ 19 | * cd to RGCN/ 20 | * run python file 21 | 22 | ```bash 23 | python3 link_predict.py -d FB15k-237 --gpu 0 24 | python3 gnn_link_predict.py -d FB15k-237 --gpu 0 --model=gcn 25 | python3 gnn_link_predict.py -d FB15k-237 --gpu 0 --model=gat 26 | ``` -------------------------------------------------------------------------------- /LP/benchmark/.gitignore: -------------------------------------------------------------------------------- 1 | data 2 | **/checkpoint 3 | -------------------------------------------------------------------------------- /LP/benchmark/README.md: -------------------------------------------------------------------------------- 1 | # benchmark 2 | 3 | benchmark data loader and evaluation scripts 4 | 5 | ## data 6 | 7 | Warning: As we have opened test data, you should try not to overfit or leak data during training. For example, the order of test data is not random permuted. If you use BatchNorm, you will get a biased norm value. 8 | 9 | ## data format 10 | 11 | * All ids begin from 0. 12 | * Each node type takes a continuous range of node_id. 13 | * node_id and node_type id are with same order. I.e. nodes with node_type 0 take the first range of node_ids, nodes with node_type 1 take the second range, and so on. 14 | * One-hot node features can be omited. -------------------------------------------------------------------------------- /LP/benchmark/methods/GATNE/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Yukuo Cen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /LP/benchmark/methods/GATNE/walk.py: -------------------------------------------------------------------------------- 1 | import random 2 | import multiprocessing 3 | 4 | from tqdm import tqdm 5 | 6 | def walk(args): 7 | walk_length, start, schema = args 8 | # Simulate a random walk starting from start node. 9 | rand = random.Random() 10 | 11 | if schema: 12 | schema_items = schema.split('-') 13 | assert schema_items[0] == schema_items[-1] 14 | 15 | walk = [start] 16 | while len(walk) < walk_length: 17 | cur = walk[-1] 18 | candidates = [] 19 | for node in G[cur]: 20 | if schema == '' or node_type[node] == schema_items[len(walk) % (len(schema_items) - 1)]: 21 | candidates.append(node) 22 | if candidates: 23 | walk.append(rand.choice(candidates)) 24 | else: 25 | break 26 | return [str(node) for node in walk] 27 | 28 | def initializer(init_G, init_node_type): 29 | global G 30 | G = init_G 31 | global node_type 32 | node_type = init_node_type 33 | 34 | class RWGraph(): 35 | def __init__(self, nx_G, node_type_arr=None, num_workers=16): 36 | self.G = nx_G 37 | self.node_type = node_type_arr 38 | self.num_workers = num_workers 39 | 40 | def node_list(self, nodes, num_walks): 41 | for loop in range(num_walks): 42 | for node in nodes: 43 | yield node 44 | 45 | def simulate_walks(self, num_walks, walk_length, schema=None): 46 | all_walks = [] 47 | nodes = list(self.G.keys()) 48 | random.shuffle(nodes) 49 | 50 | if schema is None or schema=='': 51 | with multiprocessing.Pool(self.num_workers, initializer=initializer, initargs=(self.G, self.node_type)) as pool: 52 | all_walks = list(pool.imap(walk, ((walk_length, node, '') for node in tqdm(self.node_list(nodes, num_walks))), chunksize=256)) 53 | else: 54 | schema_list = schema.split(',') 55 | for schema_iter in schema_list: 56 | with multiprocessing.Pool(self.num_workers, initializer=initializer, initargs=(self.G, self.node_type)) as pool: 57 | walks = list(pool.imap(walk, ((walk_length, node, schema_iter) for node in tqdm(self.node_list(nodes, num_walks)) if schema_iter.split('-')[0] == self.node_type[node]), chunksize=512)) 58 | all_walks.extend(walks) 59 | 60 | return all_walks 61 | -------------------------------------------------------------------------------- /LP/benchmark/methods/GNN/README.md: -------------------------------------------------------------------------------- 1 | in dir `GNN`: 2 | * python homoGNN.py --data amazon --model GCN 3 | 4 | -------------------------------------------------------------------------------- /LP/benchmark/methods/HGT/README.md: -------------------------------------------------------------------------------- 1 | # HGT code 2 | 3 | Adapted from [HGT-DGL](https://github.com/acbull/HGT-DGL). 4 | 5 | ## running environment 6 | 7 | * Python 3.7 8 | * torch 1.7.0 9 | * dgl 0.5.2 10 | 11 | ## running procedure 12 | 13 | * download data from [tsinghua-cloud](https://cloud.tsinghua.edu.cn/d/8b9644cfa8344f26878c/) 14 | * cd to HGT/ 15 | * unzip all zip files 16 | * run scripts 17 | * mkdir checkpoint 18 | 19 | ```scripts 20 | sh run_LastFM.sh 21 | sh run_PubMed.sh 22 | sh run_amazon.sh 23 | sh run_LastFM_magnn.sh 24 | ``` -------------------------------------------------------------------------------- /LP/benchmark/methods/HGT/run_LastFM.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python link_predict.py --device 0 --use_norm True --dataset LastFM -------------------------------------------------------------------------------- /LP/benchmark/methods/HGT/run_LastFM_magnn.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python link_predict.py --device 0 --use_norm True --dataset LastFM_magnn -------------------------------------------------------------------------------- /LP/benchmark/methods/HGT/run_PubMed.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python link_predict.py --device 0 --use_norm True --dataset PubMed -------------------------------------------------------------------------------- /LP/benchmark/methods/HGT/run_amazon.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python link_predict.py --device 0 --use_norm True --dataset amazon -------------------------------------------------------------------------------- /LP/benchmark/methods/HGT/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/LP/benchmark/methods/HGT/utils/__init__.py -------------------------------------------------------------------------------- /LP/benchmark/methods/HGT/utils/data.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import numpy as np 3 | import scipy 4 | import pickle 5 | import scipy.sparse as sp 6 | 7 | def load_data(prefix='DBLP'): 8 | from data_loader import data_loader 9 | dl = data_loader(prefix) 10 | features = [] 11 | for i in range(len(dl.nodes['count'])): 12 | th = dl.nodes['attr'][i] 13 | if th is None: 14 | features.append(sp.eye(dl.nodes['count'][i])) 15 | else: 16 | features.append(th) 17 | adjM = sum(dl.links['data'].values()) 18 | return features,\ 19 | adjM, \ 20 | dl 21 | -------------------------------------------------------------------------------- /LP/benchmark/methods/HGT/utils/pytorchtools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | class EarlyStopping: 6 | """Early stops the training if validation loss doesn't improve after a given patience.""" 7 | def __init__(self, patience, verbose=False, delta=0, save_path='checkpoint.pt'): 8 | """ 9 | Args: 10 | patience (int): How long to wait after last time validation loss improved. 11 | Default: 7 12 | verbose (bool): If True, prints a message for each validation loss improvement. 13 | Default: False 14 | delta (float): Minimum change in the monitored quantity to qualify as an improvement. 15 | Default: 0 16 | """ 17 | self.patience = patience 18 | self.verbose = verbose 19 | self.counter = 0 20 | self.best_score = None 21 | self.early_stop = False 22 | self.val_loss_min = np.Inf 23 | self.delta = delta 24 | self.save_path = save_path 25 | 26 | def __call__(self, val_loss, model): 27 | 28 | score = -val_loss 29 | 30 | if self.best_score is None: 31 | self.best_score = score 32 | self.save_checkpoint(val_loss, model) 33 | elif score < self.best_score - self.delta: 34 | self.counter += 1 35 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 36 | if self.counter >= self.patience: 37 | self.early_stop = True 38 | else: 39 | self.best_score = score 40 | self.save_checkpoint(val_loss, model) 41 | self.counter = 0 42 | 43 | def save_checkpoint(self, val_loss, model): 44 | """Saves model when validation loss decrease.""" 45 | if self.verbose: 46 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') 47 | torch.save(model.state_dict(), self.save_path) 48 | self.val_loss_min = val_loss 49 | -------------------------------------------------------------------------------- /LP/benchmark/methods/HetGNN/DeepWalk.py: -------------------------------------------------------------------------------- 1 | import string; 2 | import re; 3 | import random 4 | import math 5 | import numpy as np 6 | from gensim.models import Word2Vec 7 | from itertools import * 8 | import sys 9 | 10 | dimen = 128 11 | window = 5 12 | 13 | 14 | def read_random_walk_corpus(): 15 | walks = [] 16 | inputfile = open(sys.path[0] + "/het_random_walk.txt", "r") 17 | for line in inputfile: 18 | path = re.split(' ', line) 19 | walks.append(path) 20 | inputfile.close() 21 | return walks 22 | 23 | 24 | def gen_net_embed(): 25 | walk_corpus = read_random_walk_corpus() 26 | model = Word2Vec(walk_corpus, size=dimen, window=window, min_count=0, workers=2, sg=1, hs=0, negative=5) 27 | file_ = sys.path[0] + "/node_net_embedding.txt" 28 | model.wv.save_word2vec_format(file_) 29 | print(f"Generate {file_} done.") 30 | -------------------------------------------------------------------------------- /LP/benchmark/methods/HetGNN/README.md: -------------------------------------------------------------------------------- 1 | cd HetGNN 2 | python main.py --data amazon 3 | python do_LP.py --data amazon -------------------------------------------------------------------------------- /LP/benchmark/methods/HetGNN/args.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | 4 | def read_args(): 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument('--data', type=str, default='amazon', 7 | help='select data path') 8 | parser.add_argument('--model_path', type=str, default='../model_save/', 9 | help='path to save model') 10 | parser.add_argument('--in_f_d', type=int, default=128, 11 | help='input feature dimension') 12 | parser.add_argument('--embed_d', type=int, default=128, 13 | help='embedding dimension') 14 | parser.add_argument('--lr', type=int, default=0.01, 15 | help='learning rate') 16 | parser.add_argument('--batch_s', type=int, default=20000, 17 | help='batch size') 18 | parser.add_argument('--mini_batch_s', type=int, default=200, 19 | help='mini batch size') 20 | parser.add_argument('--train_iter_n', type=int, default=310, 21 | help='max number of training iteration') 22 | parser.add_argument('--walk_n', type=int, default=10, 23 | help='number of walk per root node') 24 | parser.add_argument('--walk_L', type=int, default=30, 25 | help='length of each walk') 26 | parser.add_argument('--window', type=int, default=5, 27 | help='window size for relation extration') 28 | parser.add_argument("--random_seed", default=10, type=int) 29 | parser.add_argument('--train_test_label', type=int, default=0, 30 | help='train/test label: 0 - train, 1 - test, 2 - code test/generate negative ids for evaluation') 31 | parser.add_argument('--save_model_freq', type=float, default=10, 32 | help='number of iterations to save model') 33 | parser.add_argument("--cuda", default=0, type=int) 34 | parser.add_argument("--checkpoint", default='', type=str) 35 | parser.add_argument("--feat_type", default=0, type=int, 36 | help='feat_type=0: all id vector' 37 | 'feat_type=1: load feat from data_loader') 38 | args = parser.parse_args() 39 | 40 | return args 41 | -------------------------------------------------------------------------------- /LP/benchmark/methods/MAGNN/README.md: -------------------------------------------------------------------------------- 1 | ## MAGNN code for benchmark 2 | 3 | We have tried our best to fit MAGNN into benchmark data\_loader, but failed... 4 | 5 | The MAGNN code is toooooooo hard-coded and has many bugs. For example, we need to guarentee every node has at least one neighbour for every meta-path. Moreover, the meta-path generating is soooooo time-consuming. 6 | 7 | We have to fit our data format to the initial MAGNN repo. The code is in MAGNN\_ini folder. 8 | 9 | ## MAGNN 10 | 11 | This repository provides a reference implementation of MAGNN as described in the paper: 12 | > MAGNN: Metapath Aggregated Graph Neural Network for Heterogeneous Graph Embedding.
13 | > Xinyu Fu, Jiani Zhang, Ziqiao Meng, Irwin King.
14 | > The Web Conference, 2020. 15 | 16 | Available at [arXiv:2002.01680](https://arxiv.org/abs/2002.01680). 17 | 18 | ### Dependencies 19 | 20 | Recent versions of the following packages for Python 3 are required: 21 | * PyTorch 1.2.0 22 | * DGL 0.3.1 23 | * NetworkX 2.3 24 | * scikit-learn 0.21.3 25 | * NumPy 1.17.2 26 | * SciPy 1.3.1 27 | 28 | Dependencies for the preprocessing code are not listed here. 29 | 30 | ### Datasets 31 | 32 | The preprocessed datasets are available at: 33 | * IMDb - [Dropbox](https://www.dropbox.com/s/g0btk9ctr1es39x/IMDB_processed.zip?dl=0) 34 | * DBLP - [Dropbox](https://www.dropbox.com/s/yh4grpeks87ugr2/DBLP_processed.zip?dl=0) 35 | * Last.fm - [Dropbox](https://www.dropbox.com/s/jvlbs09pz6zwcka/LastFM_processed.zip?dl=0) 36 | 37 | The GloVe word vectors are obtained from [GloVe](https://nlp.stanford.edu/projects/glove/). Here is [the direct link](http://nlp.stanford.edu/data/glove.6B.zip) for the version we used in DBLP preprocessing. 38 | 39 | ### Usage 40 | 41 | 1. Create `checkpoint/` and `data/preprocessed` directories 42 | 2. Extract the zip file downloaded from the section above to `data/preprocessed` 43 | * E.g., extract the content of `IMDB_processed.zip` to `data/preprocessed/IMDB_processed` 44 | 2. Execute one of the following three commands from the project home directory: 45 | * `python run_IMDB.py` 46 | * `python run_DBLP.py` 47 | * `python run_LastFM.py` 48 | 49 | For more information about the available options of the model, you may check by executing `python run_IMDB.py --help` 50 | 51 | ### Citing 52 | 53 | If you find MAGNN useful in your research, please cite the following paper: 54 | 55 | @inproceedings{fu2020magnn, 56 | title={MAGNN: Metapath Aggregated Graph Neural Network for Heterogeneous Graph Embedding}, 57 | author={Xinyu Fu and Jiani Zhang and Ziqiao Meng and Irwin King}, 58 | booktitle = {WWW}, 59 | year={2020} 60 | } 61 | -------------------------------------------------------------------------------- /LP/benchmark/methods/MAGNN/model/__init__.py: -------------------------------------------------------------------------------- 1 | from model.MAGNN_nc import MAGNN_nc 2 | from model.MAGNN_nc_mb import MAGNN_nc_mb 3 | from model.MAGNN_lp import MAGNN_lp 4 | -------------------------------------------------------------------------------- /LP/benchmark/methods/MAGNN/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/LP/benchmark/methods/MAGNN/utils/__init__.py -------------------------------------------------------------------------------- /LP/benchmark/methods/MAGNN/utils/pytorchtools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | class EarlyStopping: 6 | """Early stops the training if validation loss doesn't improve after a given patience.""" 7 | def __init__(self, patience, verbose=False, delta=0, save_path='checkpoint.pt'): 8 | """ 9 | Args: 10 | patience (int): How long to wait after last time validation loss improved. 11 | Default: 7 12 | verbose (bool): If True, prints a message for each validation loss improvement. 13 | Default: False 14 | delta (float): Minimum change in the monitored quantity to qualify as an improvement. 15 | Default: 0 16 | """ 17 | self.patience = patience 18 | self.verbose = verbose 19 | self.counter = 0 20 | self.best_score = None 21 | self.early_stop = False 22 | self.val_loss_min = np.Inf 23 | self.delta = delta 24 | self.save_path = save_path 25 | 26 | def __call__(self, val_loss, model): 27 | 28 | score = -val_loss 29 | 30 | if self.best_score is None: 31 | self.best_score = score 32 | self.save_checkpoint(val_loss, model) 33 | elif score < self.best_score - self.delta: 34 | self.counter += 1 35 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 36 | if self.counter >= self.patience: 37 | self.early_stop = True 38 | else: 39 | self.best_score = score 40 | self.save_checkpoint(val_loss, model) 41 | self.counter = 0 42 | 43 | def save_checkpoint(self, val_loss, model): 44 | """Saves model when validation loss decrease.""" 45 | if self.verbose: 46 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') 47 | torch.save(model.state_dict(), self.save_path) 48 | self.val_loss_min = val_loss 49 | -------------------------------------------------------------------------------- /LP/benchmark/methods/MAGNN_ini/README.md: -------------------------------------------------------------------------------- 1 | # MAGNN for benchmark 2 | 3 | Frist, run trans\_format.py to transform our benchmark data to MAGNN format. 4 | 5 | Then, run ```ipython preprocess_LastFM.ipynb``` (or LastFM\_magnn) to get preprocessed data. 6 | 7 | Next, run ```python run_LastFM.py --save-postfix LastFM``` or (LastFM\_magnn) to train model. 8 | 9 | Last, run ```python test_LastFM.py --save-postfix LastFM``` or (LastFM\_magnn) to test model. 10 | 11 | ## MAGNN 12 | 13 | This repository provides a reference implementation of MAGNN as described in the paper: 14 | > MAGNN: Metapath Aggregated Graph Neural Network for Heterogeneous Graph Embedding.
15 | > Xinyu Fu, Jiani Zhang, Ziqiao Meng, Irwin King.
16 | > The Web Conference, 2020. 17 | 18 | Available at [arXiv:2002.01680](https://arxiv.org/abs/2002.01680). 19 | 20 | ### Dependencies 21 | 22 | Recent versions of the following packages for Python 3 are required: 23 | * PyTorch 1.2.0 24 | * DGL 0.3.1 25 | * NetworkX 2.3 26 | * scikit-learn 0.21.3 27 | * NumPy 1.17.2 28 | * SciPy 1.3.1 29 | 30 | Dependencies for the preprocessing code are not listed here. 31 | 32 | ### Datasets 33 | 34 | The preprocessed datasets are available at: 35 | * IMDb - [Dropbox](https://www.dropbox.com/s/g0btk9ctr1es39x/IMDB_processed.zip?dl=0) 36 | * DBLP - [Dropbox](https://www.dropbox.com/s/yh4grpeks87ugr2/DBLP_processed.zip?dl=0) 37 | * Last.fm - [Dropbox](https://www.dropbox.com/s/jvlbs09pz6zwcka/LastFM_processed.zip?dl=0) 38 | 39 | The GloVe word vectors are obtained from [GloVe](https://nlp.stanford.edu/projects/glove/). Here is [the direct link](http://nlp.stanford.edu/data/glove.6B.zip) for the version we used in DBLP preprocessing. 40 | 41 | ### Usage 42 | 43 | 1. Create `checkpoint/` and `data/preprocessed` directories 44 | 2. Extract the zip file downloaded from the section above to `data/preprocessed` 45 | * E.g., extract the content of `IMDB_processed.zip` to `data/preprocessed/IMDB_processed` 46 | 2. Execute one of the following three commands from the project home directory: 47 | * `python run_IMDB.py` 48 | * `python run_DBLP.py` 49 | * `python run_LastFM.py` 50 | 51 | For more information about the available options of the model, you may check by executing `python run_IMDB.py --help` 52 | 53 | ### Citing 54 | 55 | If you find MAGNN useful in your research, please cite the following paper: 56 | 57 | @inproceedings{fu2020magnn, 58 | title={MAGNN: Metapath Aggregated Graph Neural Network for Heterogeneous Graph Embedding}, 59 | author={Xinyu Fu and Jiani Zhang and Ziqiao Meng and Irwin King}, 60 | booktitle = {WWW}, 61 | year={2020} 62 | } 63 | -------------------------------------------------------------------------------- /LP/benchmark/methods/MAGNN_ini/model/__init__.py: -------------------------------------------------------------------------------- 1 | from model.MAGNN_nc import MAGNN_nc 2 | from model.MAGNN_nc_mb import MAGNN_nc_mb 3 | from model.MAGNN_lp import MAGNN_lp 4 | -------------------------------------------------------------------------------- /LP/benchmark/methods/MAGNN_ini/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/LP/benchmark/methods/MAGNN_ini/utils/__init__.py -------------------------------------------------------------------------------- /LP/benchmark/methods/MAGNN_ini/utils/pytorchtools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | class EarlyStopping: 6 | """Early stops the training if validation loss doesn't improve after a given patience.""" 7 | def __init__(self, patience, verbose=False, delta=0, save_path='checkpoint.pt'): 8 | """ 9 | Args: 10 | patience (int): How long to wait after last time validation loss improved. 11 | Default: 7 12 | verbose (bool): If True, prints a message for each validation loss improvement. 13 | Default: False 14 | delta (float): Minimum change in the monitored quantity to qualify as an improvement. 15 | Default: 0 16 | """ 17 | self.patience = patience 18 | self.verbose = verbose 19 | self.counter = 0 20 | self.best_score = None 21 | self.early_stop = False 22 | self.val_loss_min = np.Inf 23 | self.delta = delta 24 | self.save_path = save_path 25 | 26 | def __call__(self, val_loss, model): 27 | 28 | score = -val_loss 29 | 30 | if self.best_score is None: 31 | self.best_score = score 32 | self.save_checkpoint(val_loss, model) 33 | elif score < self.best_score - self.delta: 34 | self.counter += 1 35 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 36 | if self.counter >= self.patience: 37 | self.early_stop = True 38 | else: 39 | self.best_score = score 40 | self.save_checkpoint(val_loss, model) 41 | self.counter = 0 42 | 43 | def save_checkpoint(self, val_loss, model): 44 | """Saves model when validation loss decrease.""" 45 | if self.verbose: 46 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') 47 | torch.save(model.state_dict(), self.save_path) 48 | self.val_loss_min = val_loss 49 | -------------------------------------------------------------------------------- /LP/benchmark/methods/RGCN/README.md: -------------------------------------------------------------------------------- 1 | # RGCN code 2 | 3 | Adapted from [DGL example](https://github.com/dmlc/dgl/tree/master/examples/pytorch/rgcn). 4 | 5 | We replace the GNN module in paper by GCN for comparison. 6 | 7 | ## running environment 8 | 9 | * Python 3.7 10 | * torch 1.7.0 11 | * dgl 0.5.2 12 | * nvidia-ml-py3 7.352.0 13 | 14 | ## running procedure 15 | 16 | * Dataset will be downloaded automatically at ~/.dgl/. 17 | * or you can download data from [tsinghua-cloud](https://cloud.tsinghua.edu.cn/d/8b9644cfa8344f26878c/) 18 | * unzip all zip files 19 | * move them to ~/.dgl/ 20 | * cd to RGCN/ 21 | * run python file 22 | 23 | ```bash 24 | python link_predict.py --dataset=LastFM 25 | python link_predict.py --dataset=amazon --hidden-dim=60 26 | python link_predict.py --dataset=PubMed --hidden-dim=60 27 | python link_predict.py --dataset=LastFM_magnn 28 | ``` -------------------------------------------------------------------------------- /LP/benchmark/methods/baseline/README.md: -------------------------------------------------------------------------------- 1 | # Simple-HGN for HGB 2 | 3 | 4 | ``` 5 | python run_new.py --dataset LastFM 6 | python run_dist.py --dataset amazon 7 | python run_dist.py --dataset PubMed --batch-size 8192 8 | ``` 9 | 10 | ## running environment 11 | 12 | * torch 1.6.0 cuda 10.1 13 | * dgl 0.4.3 cuda 10.1 14 | * networkx 2.3 15 | * scikit-learn 0.23.2 16 | * scipy 1.5.2 17 | -------------------------------------------------------------------------------- /LP/benchmark/methods/baseline/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/LP/benchmark/methods/baseline/utils/__init__.py -------------------------------------------------------------------------------- /LP/benchmark/methods/baseline/utils/data.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import numpy as np 3 | import scipy 4 | import pickle 5 | import scipy.sparse as sp 6 | 7 | def load_data(prefix='DBLP'): 8 | from scripts.data_loader import data_loader 9 | dl = data_loader('../../data/'+prefix) 10 | features = [] 11 | for i in range(len(dl.nodes['count'])): 12 | th = dl.nodes['attr'][i] 13 | if th is None: 14 | features.append(sp.eye(dl.nodes['count'][i])) 15 | else: 16 | features.append(th) 17 | adjM = sum(dl.links['data'].values()) 18 | return features,\ 19 | adjM, \ 20 | dl 21 | -------------------------------------------------------------------------------- /LP/benchmark/methods/baseline/utils/pytorchtools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | class EarlyStopping: 6 | """Early stops the training if validation loss doesn't improve after a given patience.""" 7 | def __init__(self, patience, verbose=False, delta=0, save_path='checkpoint.pt'): 8 | """ 9 | Args: 10 | patience (int): How long to wait after last time validation loss improved. 11 | Default: 7 12 | verbose (bool): If True, prints a message for each validation loss improvement. 13 | Default: False 14 | delta (float): Minimum change in the monitored quantity to qualify as an improvement. 15 | Default: 0 16 | """ 17 | self.patience = patience 18 | self.verbose = verbose 19 | self.counter = 0 20 | self.best_score = None 21 | self.early_stop = False 22 | self.val_loss_min = np.Inf 23 | self.delta = delta 24 | self.save_path = save_path 25 | 26 | def __call__(self, val_loss, model): 27 | 28 | score = -val_loss 29 | 30 | if self.best_score is None: 31 | self.best_score = score 32 | self.save_checkpoint(val_loss, model) 33 | elif score < self.best_score - self.delta: 34 | self.counter += 1 35 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 36 | if self.counter >= self.patience: 37 | self.early_stop = True 38 | else: 39 | self.best_score = score 40 | self.save_checkpoint(val_loss, model) 41 | self.counter = 0 42 | 43 | def save_checkpoint(self, val_loss, model): 44 | """Saves model when validation loss decrease.""" 45 | if self.verbose: 46 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') 47 | torch.save(model.state_dict(), self.save_path) 48 | self.val_loss_min = val_loss 49 | -------------------------------------------------------------------------------- /LP/benchmark/scripts/README.md: -------------------------------------------------------------------------------- 1 | ## Evaluate AUC and MRR with prediction files. 2 | ```bash 3 | python LP_AUC_MRR.py --pred_zip lp.zip --log out.log 4 | ``` -------------------------------------------------------------------------------- /LP/benchmark/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/LP/benchmark/scripts/__init__.py -------------------------------------------------------------------------------- /NC/GTN/.gitignore: -------------------------------------------------------------------------------- 1 | data/* 2 | .vscode/* -------------------------------------------------------------------------------- /NC/GTN/GNN.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import dgl 3 | 4 | from dgl.nn.pytorch import GraphConv 5 | 6 | import dgl.function as fn 7 | from dgl.nn.pytorch import edge_softmax, GATConv 8 | 9 | 10 | class GAT(nn.Module): 11 | def __init__(self, 12 | g, 13 | num_layers, 14 | in_dim, 15 | num_hidden, 16 | num_classes, 17 | heads, 18 | activation, 19 | feat_drop, 20 | attn_drop, 21 | negative_slope, 22 | residual): 23 | super(GAT, self).__init__() 24 | self.g = g 25 | self.num_layers = num_layers 26 | self.gat_layers = nn.ModuleList() 27 | self.activation = activation 28 | # input projection (no residual) 29 | self.gat_layers.append(GATConv( 30 | in_dim, num_hidden, heads[0], 31 | feat_drop, attn_drop, negative_slope, False, self.activation)) 32 | # hidden layers 33 | for l in range(1, num_layers): 34 | # due to multi-head, the in_dim = num_hidden * num_heads 35 | self.gat_layers.append(GATConv( 36 | num_hidden * heads[l-1], num_hidden, heads[l], 37 | feat_drop, attn_drop, negative_slope, residual, self.activation)) 38 | # output projection 39 | self.gat_layers.append(GATConv( 40 | num_hidden * heads[-2], num_classes, heads[-1], 41 | feat_drop, attn_drop, negative_slope, residual, None)) 42 | 43 | def forward(self, inputs): 44 | h = inputs 45 | for l in range(self.num_layers): 46 | h = self.gat_layers[l](self.g, h).flatten(1) 47 | # output projection 48 | logits = self.gat_layers[-1](self.g, h).mean(1) 49 | return logits 50 | 51 | class GCN(nn.Module): 52 | def __init__(self, 53 | g, 54 | in_feats, 55 | n_hidden, 56 | n_classes, 57 | n_layers, 58 | activation, 59 | dropout): 60 | super(GCN, self).__init__() 61 | self.g = g 62 | self.layers = nn.ModuleList() 63 | # input layer 64 | self.layers.append(GraphConv(in_feats, n_hidden, activation=activation)) 65 | # hidden layers 66 | for i in range(n_layers - 1): 67 | self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation)) 68 | # output layer 69 | self.layers.append(GraphConv(n_hidden, n_classes)) 70 | self.dropout = nn.Dropout(p=dropout) 71 | 72 | def forward(self, features): 73 | h = features 74 | for i, layer in enumerate(self.layers): 75 | if i != 0: 76 | h = self.dropout(h) 77 | h = layer(self.g, h) 78 | return h 79 | -------------------------------------------------------------------------------- /NC/GTN/GTN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/GTN/GTN.png -------------------------------------------------------------------------------- /NC/GTN/inits.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | 4 | def uniform(size, tensor): 5 | bound = 1.0 / math.sqrt(size) 6 | if tensor is not None: 7 | tensor.data.uniform_(-bound, bound) 8 | 9 | 10 | def kaiming_uniform(tensor, fan, a): 11 | bound = math.sqrt(6 / ((1 + a**2) * fan)) 12 | if tensor is not None: 13 | tensor.data.uniform_(-bound, bound) 14 | 15 | 16 | def glorot(tensor): 17 | stdv = math.sqrt(6.0 / (tensor.size(-2) + tensor.size(-1))) 18 | if tensor is not None: 19 | tensor.data.uniform_(-stdv, stdv) 20 | 21 | 22 | def zeros(tensor): 23 | if tensor is not None: 24 | tensor.data.fill_(0) 25 | 26 | 27 | def ones(tensor): 28 | if tensor is not None: 29 | tensor.data.fill_(1) 30 | 31 | 32 | def reset(nn): 33 | def _reset(item): 34 | if hasattr(item, 'reset_parameters'): 35 | item.reset_parameters() 36 | 37 | if nn is not None: 38 | if hasattr(nn, 'children') and len(list(nn.children())) > 0: 39 | for item in nn.children(): 40 | _reset(item) 41 | else: 42 | _reset(nn) -------------------------------------------------------------------------------- /NC/HAN/README.md: -------------------------------------------------------------------------------- 1 | # HAN code 2 | 3 | Adapted from [dgl/han](https://github.com/dmlc/dgl/tree/master/examples/pytorch/han). 4 | 5 | We add GCN and GAT comparison under --hetero setting. 6 | 7 | ## running environment 8 | 9 | * Python 3.8.5 10 | * torch 1.4.0 cuda 10.1 11 | * dgl 0.5.2 cuda 10.1 12 | 13 | ## running procedure 14 | 15 | * Download ACM.mat from [tsinghua-cloud](https://cloud.tsinghua.edu.cn/d/0e784c52a6084b59bdee/files/?p=%2FDGL%E4%BB%A3%E7%A0%81%E7%89%88%E6%9C%AC%2FACM.mat) or [google-drive](https://drive.google.com/file/d/1NVT_IHhPDS8dwMmsrnTRHj90F7OZu0WY/view?usp=sharing) 16 | * Move ACM.dat to the current directory 17 | * run main.py 18 | 19 | ```bash 20 | python main.py --model gcn 21 | python main.py --model gat 22 | python main.py --model han 23 | ``` 24 | 25 | ## performance report 26 | 27 | | | micro f1 score | macro f1 score | 28 | |--------------------|----------------|----------------| 29 | | Softmax regression | 89.66 | 89.62 | 30 | | HAN | 91.90 | 91.95 | 31 | | GCN | 92.79 | **92.87** | 32 | | GAT | **92.83** | 92.86 | 33 | 34 | ***The following content is from the initial dgl/han repo.*** 35 | 36 | # Heterogeneous Graph Attention Network (HAN) with DGL 37 | 38 | This is an attempt to implement HAN with DGL's latest APIs for heterogeneous graphs. 39 | The authors' implementation can be found [here](https://github.com/Jhy1993/HAN). 40 | 41 | ## Usage 42 | 43 | `python main.py` for reproducing HAN's work on their dataset. 44 | 45 | `python main.py --hetero` for reproducing HAN's work on DGL's own dataset from 46 | [here](https://github.com/Jhy1993/HAN/tree/master/data/acm). The dataset is noisy 47 | because there are same author occurring multiple times as different nodes. 48 | 49 | ## Performance 50 | 51 | Reference performance numbers for the ACM dataset: 52 | 53 | | | micro f1 score | macro f1 score | 54 | | ------------------- | -------------- | -------------- | 55 | | Paper | 89.22 | 89.40 | 56 | | DGL | 88.99 | 89.02 | 57 | | Softmax regression (own dataset) | 89.66 | 89.62 | 58 | | DGL (own dataset) | 91.51 | 91.66 | 59 | 60 | We ran a softmax regression to check the easiness of our own dataset. HAN did show some improvements. 61 | -------------------------------------------------------------------------------- /NC/HetGNN/code/DeepWalk.py: -------------------------------------------------------------------------------- 1 | import string; 2 | import re; 3 | import random 4 | import math 5 | import numpy as np 6 | from gensim.models import Word2Vec 7 | from itertools import * 8 | dimen = 128 9 | window = 5 10 | 11 | 12 | def read_random_walk_corpus(): 13 | walks=[] 14 | #inputfile = open("../data/academic_test/meta_random_walk_APVPA_test.txt","r") 15 | inputfile = open("../data/academic_test/het_random_walk_test.txt", "r") 16 | for line in inputfile: 17 | path = [] 18 | node_list=re.split(' ',line) 19 | for i in range(len(node_list)): 20 | path.append(node_list[i]) 21 | walks.append(path) 22 | inputfile.close() 23 | return walks 24 | 25 | 26 | walk_corpus = read_random_walk_corpus() 27 | model = Word2Vec(walk_corpus, size = dimen, window = window, min_count = 0, workers = 2, sg = 1, hs = 0, negative = 5) 28 | 29 | 30 | print("Output...") 31 | #model.wv.save_word2vec_format("../data/node_embedding.txt") 32 | model.wv.save_word2vec_format("../data/academic_test/node_net_embedding.txt") 33 | 34 | -------------------------------------------------------------------------------- /NC/HetGNN/code/args.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | def read_args(): 4 | parser = argparse.ArgumentParser() 5 | parser.add_argument('--data_path', type = str, default = '../data/academic_test/', 6 | help='path to data') 7 | parser.add_argument('--model_path', type = str, default = '../model_save/', 8 | help='path to save model') 9 | parser.add_argument('--A_n', type = int, default = 28646, 10 | help = 'number of author node') 11 | parser.add_argument('--P_n', type = int, default = 21044, 12 | help = 'number of paper node') 13 | parser.add_argument('--V_n', type = int, default = 18, 14 | help = 'number of venue node') 15 | parser.add_argument('--in_f_d', type = int, default = 128, 16 | help = 'input feature dimension') 17 | parser.add_argument('--embed_d', type = int, default = 128, 18 | help = 'embedding dimension') 19 | parser.add_argument('--lr', type = int, default = 0.001, 20 | help = 'learning rate') 21 | parser.add_argument('--batch_s', type = int, default = 20000, 22 | help = 'batch size') 23 | parser.add_argument('--mini_batch_s', type = int, default = 200, 24 | help = 'mini batch size') 25 | parser.add_argument('--train_iter_n', type = int, default = 50, 26 | help = 'max number of training iteration') 27 | parser.add_argument('--walk_n', type = int, default = 10, 28 | help='number of walk per root node') 29 | parser.add_argument('--walk_L', type = int, default = 30, 30 | help='length of each walk') 31 | parser.add_argument('--window', type = int, default = 5, 32 | help='window size for relation extration') 33 | parser.add_argument("--random_seed", default = 10, type = int) 34 | parser.add_argument('--train_test_label', type= int, default = 0, 35 | help='train/test label: 0 - train, 1 - test, 2 - code test/generate negative ids for evaluation') 36 | parser.add_argument('--save_model_freq', type = float, default = 2, 37 | help = 'number of iterations to save model') 38 | parser.add_argument("--cuda", default = 0, type = int) 39 | parser.add_argument("--checkpoint", default = '', type=str) 40 | 41 | args = parser.parse_args() 42 | 43 | return args 44 | -------------------------------------------------------------------------------- /NC/HetGNN/code/node_classification_model.py: -------------------------------------------------------------------------------- 1 | import random 2 | import string 3 | import re 4 | import numpy 5 | from itertools import * 6 | import sklearn 7 | from sklearn import linear_model 8 | import sklearn.metrics as Metric 9 | import csv 10 | import argparse 11 | 12 | parser = argparse.ArgumentParser(description = 'link prediction task') 13 | parser.add_argument('--A_n', type = int, default = 28646, 14 | help = 'number of author node') 15 | parser.add_argument('--P_n', type = int, default = 21044, 16 | help = 'number of paper node') 17 | parser.add_argument('--V_n', type = int, default = 18, 18 | help = 'number of venue node') 19 | parser.add_argument('--data_path', type = str, default = '../data/academic_test/', 20 | help='path to data') 21 | parser.add_argument('--embed_d', type = int, default = 128, 22 | help = 'embedding dimension') 23 | 24 | args = parser.parse_args() 25 | print(args) 26 | 27 | 28 | def load_data(data_file_name, n_features, n_samples): 29 | with open(data_file_name) as f: 30 | data_file = csv.reader(f) 31 | data = numpy.empty((n_samples, n_features)) 32 | for i, d in enumerate(data_file): 33 | data[i] = numpy.asarray(d[:], dtype=numpy.float) 34 | f.close () 35 | 36 | return data 37 | 38 | 39 | def model(train_num, test_num): 40 | train_data_f = args.data_path + "train_class_feature.txt" 41 | train_data = load_data(train_data_f, args.embed_d + 2, train_num) 42 | train_features = train_data.astype(numpy.float32)[:,2:-1] 43 | train_target = train_data.astype(numpy.float32)[:,1] 44 | 45 | #print(train_target[1]) 46 | learner = linear_model.LogisticRegression() 47 | learner.fit(train_features, train_target) 48 | train_features = None 49 | train_target = None 50 | 51 | print("training finish!") 52 | 53 | test_data_f = args.data_path + "test_class_feature.txt" 54 | test_data = load_data(test_data_f, args.embed_d + 2, test_num) 55 | test_id = test_data.astype(numpy.int32)[:,0] 56 | test_features = test_data.astype(numpy.float32)[:,2:-1] 57 | test_target = test_data.astype(numpy.float32)[:,1] 58 | test_predict = learner.predict(test_features) 59 | test_features = None 60 | 61 | print("test prediction finish!") 62 | 63 | output_f = open(args.data_path + "NC_prediction.txt", "w") 64 | for i in range(len(test_predict)): 65 | output_f.write('%d,%lf\n'%(test_id[i],test_predict[i])) 66 | output_f.close() 67 | 68 | print ("MicroF1: ") 69 | print (sklearn.metrics.f1_score(test_target,test_predict,average='micro')) 70 | 71 | print("MacroF1: ") 72 | print(sklearn.metrics.f1_score(test_target, test_predict, average='macro')) 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /NC/HetGNN/code/node_clustering_model.py: -------------------------------------------------------------------------------- 1 | import random 2 | import string 3 | import re 4 | import numpy 5 | from itertools import * 6 | import sklearn 7 | from sklearn.cluster import KMeans 8 | from sklearn.metrics.cluster import normalized_mutual_info_score 9 | from sklearn.metrics.cluster import adjusted_rand_score 10 | import csv 11 | import argparse 12 | 13 | parser = argparse.ArgumentParser(description = 'link prediction task') 14 | parser.add_argument('--C_n', type = int, default = 4, 15 | help = 'number of node class label') 16 | parser.add_argument('--data_path', type = str, default = '../data/academic_test/', 17 | help='path to data') 18 | parser.add_argument('--embed_d', type = int, default = 128, 19 | help = 'embedding dimension') 20 | 21 | args = parser.parse_args() 22 | print(args) 23 | 24 | 25 | def model(cluster_id_num): 26 | cluter_embed = numpy.around(numpy.random.normal(0, 0.01, [cluster_id_num, args.embed_d]), 4) 27 | cluster_embed_f = open(args.data_path + "cluster_embed.txt", "r") 28 | for line in cluster_embed_f: 29 | line=line.strip() 30 | author_index=int(re.split(' ',line)[0]) 31 | embed_list=re.split(' ',line)[1:] 32 | for i in range(len(embed_list)): 33 | cluter_embed[author_index][i] = embed_list[i] 34 | 35 | kmeans = KMeans(n_clusters = args.C_n, random_state = 0).fit(cluter_embed) 36 | 37 | cluster_id_list = [0] * cluster_id_num 38 | cluster_id_f = open(args.data_path + "cluster.txt", "r") 39 | for line in cluster_id_f: 40 | line = line.strip() 41 | author_index = int(re.split(',',line)[0]) 42 | cluster_id = int(re.split(',',line)[1]) 43 | cluster_id_list[author_index] = cluster_id 44 | 45 | #NMI 46 | print ("NMI: " + str(normalized_mutual_info_score(kmeans.labels_, cluster_id_list))) 47 | print ("ARI: " + str(adjusted_rand_score(kmeans.labels_, cluster_id_list))) 48 | 49 | -------------------------------------------------------------------------------- /NC/MAGNN/.gitignore: -------------------------------------------------------------------------------- 1 | data/* 2 | checkpoint/* 3 | -------------------------------------------------------------------------------- /NC/MAGNN/model/__init__.py: -------------------------------------------------------------------------------- 1 | from model.MAGNN_nc import MAGNN_nc 2 | from model.MAGNN_nc_mb import MAGNN_nc_mb 3 | from model.MAGNN_lp import MAGNN_lp 4 | -------------------------------------------------------------------------------- /NC/MAGNN/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/MAGNN/utils/__init__.py -------------------------------------------------------------------------------- /NC/MAGNN/utils/pytorchtools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | class EarlyStopping: 6 | """Early stops the training if validation loss doesn't improve after a given patience.""" 7 | def __init__(self, patience, verbose=False, delta=0, save_path='checkpoint.pt'): 8 | """ 9 | Args: 10 | patience (int): How long to wait after last time validation loss improved. 11 | Default: 7 12 | verbose (bool): If True, prints a message for each validation loss improvement. 13 | Default: False 14 | delta (float): Minimum change in the monitored quantity to qualify as an improvement. 15 | Default: 0 16 | """ 17 | self.patience = patience 18 | self.verbose = verbose 19 | self.counter = 0 20 | self.best_score = None 21 | self.early_stop = False 22 | self.val_loss_min = np.Inf 23 | self.delta = delta 24 | self.save_path = save_path 25 | 26 | def __call__(self, val_loss, model): 27 | 28 | score = -val_loss 29 | 30 | if self.best_score is None: 31 | self.best_score = score 32 | self.save_checkpoint(val_loss, model) 33 | elif score < self.best_score - self.delta: 34 | self.counter += 1 35 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 36 | if self.counter >= self.patience: 37 | self.early_stop = True 38 | else: 39 | self.best_score = score 40 | self.save_checkpoint(val_loss, model) 41 | self.counter = 0 42 | 43 | def save_checkpoint(self, val_loss, model): 44 | """Saves model when validation loss decrease.""" 45 | if self.verbose: 46 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') 47 | torch.save(model.state_dict(), self.save_path) 48 | self.val_loss_min = val_loss 49 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.0.0' 2 | 3 | __all__ = ['__version__'] 4 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .data import Data 2 | from .batch import Batch 3 | from .dataset import Dataset 4 | from .in_memory_dataset import InMemoryDataset 5 | from .dataloader import DataLoader, DenseDataLoader 6 | from .download import download_url 7 | from .extract import extract_tar, extract_zip, extract_gz 8 | 9 | __all__ = [ 10 | 'Data', 11 | 'Batch', 12 | 'Dataset', 13 | 'InMemoryDataset', 14 | 'DataLoader', 15 | 'DenseDataLoader', 16 | 'download_url', 17 | 'extract_tar', 18 | 'extract_zip', 19 | 'extract_gz', 20 | ] 21 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/data/batch.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch_geometric.data import Data 3 | 4 | 5 | class Batch(Data): 6 | def __init__(self, batch=None, **kwargs): 7 | super(Batch, self).__init__(**kwargs) 8 | self.batch = batch 9 | 10 | @staticmethod 11 | def from_data_list(data_list): 12 | """""" 13 | keys = [set(data.keys) for data in data_list] 14 | keys = list(set.union(*keys)) 15 | assert 'batch' not in keys 16 | 17 | batch = Batch() 18 | 19 | for key in keys: 20 | batch[key] = [] 21 | batch.batch = [] 22 | 23 | cumsum = 0 24 | for i, data in enumerate(data_list): 25 | num_nodes = data.num_nodes 26 | batch.batch.append(torch.full((num_nodes, ), i, dtype=torch.long)) 27 | for key in data.keys: 28 | item = data[key] 29 | item = item + cumsum if batch.cumsum(key, item) else item 30 | batch[key].append(item) 31 | cumsum += num_nodes 32 | 33 | for key in keys: 34 | batch[key] = torch.cat( 35 | batch[key], dim=data_list[0].cat_dim(key, batch[key][0])) 36 | batch.batch = torch.cat(batch.batch, dim=-1) 37 | return batch.contiguous() 38 | 39 | def cumsum(self, key, item): 40 | return item.dim() > 1 and item.dtype == torch.long 41 | 42 | @property 43 | def num_graphs(self): 44 | """""" 45 | return self.batch[-1].item() + 1 46 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/data/dataloader.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data 2 | from torch.utils.data.dataloader import default_collate 3 | 4 | from torch_geometric.data import Batch 5 | 6 | 7 | class DataLoader(torch.utils.data.DataLoader): 8 | def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs): 9 | super(DataLoader, self).__init__( 10 | dataset, 11 | batch_size, 12 | shuffle, 13 | collate_fn=lambda batch: Batch.from_data_list(batch), 14 | **kwargs) 15 | 16 | 17 | class DenseDataLoader(torch.utils.data.DataLoader): 18 | def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs): 19 | def dense_collate(data_list): 20 | batch = Batch() 21 | for key in data_list[0].keys: 22 | batch[key] = default_collate([d[key] for d in data_list]) 23 | return batch 24 | 25 | super(DenseDataLoader, self).__init__( 26 | dataset, batch_size, shuffle, collate_fn=dense_collate, **kwargs) 27 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/data/dataset.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import os.path as osp 3 | 4 | import torch.utils.data 5 | 6 | from .makedirs import makedirs 7 | 8 | 9 | def to_list(x): 10 | if not isinstance(x, collections.Iterable) or isinstance(x, str): 11 | x = [x] 12 | return x 13 | 14 | 15 | def files_exist(files): 16 | return all([osp.exists(f) for f in files]) 17 | 18 | 19 | class Dataset(torch.utils.data.Dataset): 20 | @property 21 | def raw_file_names(self): 22 | """""" 23 | raise NotImplementedError 24 | 25 | @property 26 | def processed_file_names(self): 27 | """""" 28 | raise NotImplementedError 29 | 30 | def download(self): 31 | """""" 32 | raise NotImplementedError 33 | 34 | def process(self): 35 | """""" 36 | raise NotImplementedError 37 | 38 | def __len__(self): 39 | """""" 40 | raise NotImplementedError 41 | 42 | def get(self, idx): 43 | """""" 44 | raise NotImplementedError 45 | 46 | def __init__(self, 47 | root, 48 | transform=None, 49 | pre_transform=None, 50 | pre_filter=None): 51 | super(Dataset, self).__init__() 52 | 53 | self.root = osp.expanduser(osp.normpath(root)) 54 | self.raw_dir = osp.join(self.root, 'raw') 55 | self.processed_dir = osp.join(self.root, 'processed') 56 | self.transform = transform 57 | self.pre_transform = pre_transform 58 | self.pre_filter = pre_filter 59 | 60 | self._download() 61 | self._process() 62 | 63 | @property 64 | def num_features(self): 65 | """""" 66 | return self[0].num_features 67 | 68 | @property 69 | def raw_paths(self): 70 | files = to_list(self.raw_file_names) 71 | return [osp.join(self.raw_dir, f) for f in files] 72 | 73 | @property 74 | def processed_paths(self): 75 | files = to_list(self.processed_file_names) 76 | return [osp.join(self.processed_dir, f) for f in files] 77 | 78 | def _download(self): 79 | if files_exist(self.raw_paths): # pragma: no cover 80 | return 81 | 82 | makedirs(self.raw_dir) 83 | self.download() 84 | 85 | def _process(self): 86 | if files_exist(self.processed_paths): # pragma: no cover 87 | return 88 | 89 | print('Processing...') 90 | 91 | makedirs(self.processed_dir) 92 | self.process() 93 | 94 | print('Done!') 95 | 96 | def __getitem__(self, idx): # pragma: no cover 97 | data = self.get(idx) 98 | data = data if self.transform is None else self.transform(data) 99 | return data 100 | 101 | def __repr__(self): # pragma: no cover 102 | return '{}({})'.format(self.__class__.__name__, len(self)) 103 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/data/download.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os.path as osp 4 | from six.moves import urllib 5 | 6 | from .makedirs import makedirs 7 | 8 | 9 | def download_url(url, folder, log=True): 10 | if log: 11 | print('Downloading', url) 12 | 13 | makedirs(folder) 14 | 15 | data = urllib.request.urlopen(url) 16 | filename = url.rpartition('/')[2] 17 | path = osp.join(folder, filename) 18 | 19 | with open(path, 'wb') as f: 20 | f.write(data.read()) 21 | 22 | return path 23 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/data/extract.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os.path as osp 4 | import tarfile 5 | import zipfile 6 | import gzip 7 | import shutil 8 | 9 | 10 | def maybe_log(path, log=True): 11 | if log: 12 | print('Extracting', path) 13 | 14 | 15 | def extract_tar(path, folder, mode='r:gz', log=True): 16 | maybe_log(path, log) 17 | with tarfile.open(path, mode) as f: 18 | f.extractall(folder) 19 | 20 | 21 | def extract_zip(path, folder, log=True): 22 | maybe_log(path, log) 23 | with zipfile.ZipFile(path, 'r') as f: 24 | f.extractall(folder) 25 | 26 | 27 | def extract_gz(path, folder, name, log=True): 28 | maybe_log(path, log) 29 | with gzip.open(path, 'rb') as f_in: 30 | with open(osp.join(folder, name), 'wb') as f_out: 31 | shutil.copyfileobj(f_in, f_out) 32 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/data/makedirs.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import errno 4 | 5 | 6 | def makedirs(path): 7 | try: 8 | os.makedirs(osp.expanduser(osp.normpath(path))) 9 | except OSError as e: 10 | if e.errno != errno.EEXIST and osp.isdir(path): 11 | raise e 12 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .entities import Entities 2 | 3 | 4 | __all__ = [ 5 | 'Entities' 6 | ] 7 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/nn/__init__.py: -------------------------------------------------------------------------------- 1 | from .conv import * # noqa 2 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/nn/conv/__init__.py: -------------------------------------------------------------------------------- 1 | from .message_passing import MessagePassing 2 | from .nn_conv import NNConv 3 | from .relation_conv import RelationConv 4 | __all__ = [ 5 | 'MessagePassing', 6 | 'NNConv', 7 | 'RelationConv', 8 | ] 9 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/nn/conv/message_passing.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | 3 | import torch 4 | from torch_geometric.utils import scatter_ 5 | 6 | 7 | class MessagePassing(torch.nn.Module): 8 | def __init__(self, aggr='add'): 9 | super(MessagePassing, self).__init__() 10 | 11 | self.message_args = inspect.getargspec(self.message)[0][1:] 12 | self.update_args = inspect.getargspec(self.update)[0][2:] 13 | 14 | def propagate(self, aggr, edge_index, **kwargs): 15 | assert aggr in ['add', 'mean', 'max'] 16 | kwargs['edge_index'] = edge_index 17 | 18 | size = None 19 | message_args = [] 20 | for arg in self.message_args: 21 | if arg[-2:] == '_i': 22 | tmp = kwargs[arg[:-2]] 23 | size = tmp.size(0) 24 | message_args.append(tmp[edge_index[0]]) 25 | elif arg[-2:] == '_j': 26 | tmp = kwargs[arg[:-2]] 27 | size = tmp.size(0) 28 | message_args.append(tmp[edge_index[1]]) 29 | else: 30 | message_args.append(kwargs[arg]) 31 | 32 | update_args = [kwargs[arg] for arg in self.update_args] 33 | 34 | out = self.message(*message_args) 35 | out = scatter_(aggr, out, edge_index[0], dim_size=size) 36 | out = self.update(out, *update_args) 37 | 38 | return out 39 | 40 | def message(self, x_j): # pragma: no cover 41 | return x_j 42 | 43 | def update(self, aggr_out): # pragma: no cover 44 | return aggr_out 45 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/nn/conv/nn_conv.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import Parameter 3 | from torch_geometric.nn.conv import MessagePassing 4 | 5 | from ..inits import reset, uniform 6 | 7 | 8 | class NNConv(MessagePassing): 9 | def __init__(self, 10 | in_channels, 11 | out_channels, 12 | nn, 13 | aggr="add", 14 | root_weight=False, 15 | bias=False): 16 | super(NNConv, self).__init__() 17 | 18 | self.in_channels = in_channels 19 | self.out_channels = out_channels 20 | self.nn = nn 21 | self.aggr = aggr 22 | self.weight = Parameter(torch.Tensor(in_channels, out_channels)) 23 | 24 | if root_weight: 25 | self.root = Parameter(torch.Tensor(in_channels, out_channels)) 26 | else: 27 | self.register_parameter('root', None) 28 | 29 | if bias: 30 | self.bias = Parameter(torch.Tensor(out_channels)) 31 | else: 32 | self.register_parameter('bias', None) 33 | 34 | self.reset_parameters() 35 | 36 | def reset_parameters(self): 37 | reset(self.nn) 38 | size = self.in_channels 39 | uniform(size, self.weight) 40 | uniform(size, self.root) 41 | uniform(size, self.bias) 42 | 43 | def forward(self, x, edge_index, pseudo): 44 | x = x.unsqueeze(-1) if x.dim() == 1 else x 45 | edge_weight = pseudo.unsqueeze(-1) if pseudo.dim() == 1 else pseudo 46 | edge_weight = self.nn(edge_weight).view(-1, self.out_channels) 47 | 48 | x = torch.matmul(x, self.weight) 49 | return self.propagate(self.aggr, edge_index, x=x, edge_weight=edge_weight) 50 | 51 | 52 | def message(self, x_j, edge_weight): 53 | message = x_j - edge_weight 54 | return message 55 | 56 | def update(self, aggr_out, x): 57 | if self.bias is not None: 58 | aggr_out = aggr_out + self.bias 59 | return aggr_out + x 60 | 61 | def __repr__(self): 62 | return '{}({}, {})'.format(self.__class__.__name__, self.in_channels, 63 | self.out_channels) 64 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/nn/conv/relation_conv.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import Parameter 3 | import torch.nn.functional as F 4 | from torch_sparse import spmm 5 | from torch_geometric.utils import remove_self_loops, add_self_loops, softmax, add_self_edge_attr_loops 6 | 7 | 8 | class RelationConv(torch.nn.Module): 9 | 10 | def __init__(self, eps=0, train_eps=False, requires_grad=True): 11 | super(RelationConv, self).__init__() 12 | 13 | self.initial_eps = eps 14 | 15 | if train_eps: 16 | self.eps = torch.nn.Parameter(torch.Tensor([eps])) 17 | else: 18 | self.register_buffer('eps', torch.Tensor([eps])) 19 | 20 | '''beta''' 21 | self.requires_grad = requires_grad 22 | if requires_grad: 23 | self.beta = Parameter(torch.Tensor(1)) 24 | else: 25 | self.register_buffer('beta', torch.ones(1)) 26 | 27 | self.reset_parameters() 28 | 29 | def reset_parameters(self): 30 | self.eps.data.fill_(self.initial_eps) 31 | if self.requires_grad: 32 | self.beta.data.fill_(1) 33 | 34 | def forward(self, x, edge_index, edge_attr): 35 | """""" 36 | x = x.unsqueeze(-1) if x.dim() == 1 else x 37 | edge_index, edge_attr = remove_self_loops(edge_index, edge_attr) 38 | row, col = edge_index 39 | 40 | '''co-occurrence rate''' 41 | for i in range(len(x)): 42 | mask = torch.eq(row, i) 43 | edge_attr[mask] = F.normalize(edge_attr[mask], p=2, dim=0) 44 | 45 | '''add-self-loops''' 46 | edge_index = add_self_loops(edge_index, x.size(0)) 47 | row, col = edge_index 48 | edge_attr = add_self_edge_attr_loops(edge_attr, x.size(0)) 49 | 50 | x = F.normalize(x, p=2, dim=-1) 51 | beta = self.beta if self.requires_grad else self._buffers['beta'] 52 | alpha = beta * edge_attr 53 | alpha = softmax(alpha, row, num_nodes=x.size(0)) 54 | 55 | '''Perform the propagation.''' 56 | out = spmm(edge_index, alpha, x.size(0), x.size(1), x) 57 | out = (1 + self.eps) * x + out 58 | return out 59 | 60 | def __repr__(self): 61 | return '{}()'.format(self.__class__.__name__) 62 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/nn/inits.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | 4 | def uniform(size, tensor): 5 | stdv = 1.0 / math.sqrt(size) 6 | if tensor is not None: 7 | tensor.data.uniform_(-stdv, stdv) 8 | 9 | 10 | def glorot(tensor): 11 | stdv = math.sqrt(6.0 / (tensor.size(0) + tensor.size(1))) 12 | if tensor is not None: 13 | tensor.data.uniform_(-stdv, stdv) 14 | 15 | 16 | def zeros(tensor): 17 | if tensor is not None: 18 | tensor.data.fill_(0) 19 | 20 | 21 | def ones(tensor): 22 | if tensor is not None: 23 | tensor.data.fill_(1) 24 | 25 | 26 | def reset(nn): 27 | def _reset(item): 28 | if hasattr(item, 'reset_parameters'): 29 | item.reset_parameters() 30 | 31 | if nn is not None: 32 | if hasattr(nn, 'children') and len(list(nn.children())) > 0: 33 | for item in nn.children(): 34 | _reset(item) 35 | else: 36 | _reset(nn) 37 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .degree import degree 2 | from .scatter import scatter_ 3 | from .softmax import softmax 4 | from .undirected import is_undirected, to_undirected 5 | from .isolated import contains_isolated_nodes 6 | from .loop import contains_self_loops, remove_self_loops, add_self_loops, add_self_edge_attr_loops 7 | from .one_hot import one_hot 8 | from .grid import grid 9 | from .normalized_cut import normalized_cut 10 | from .sparse import dense_to_sparse, sparse_to_dense 11 | from .to_batch import to_batch 12 | from .convert import to_scipy_sparse_matrix, to_networkx 13 | from .metric import (accuracy, true_positive, true_negative, false_positive, 14 | false_negative, precision, recall, f1_score) 15 | 16 | __all__ = [ 17 | 'degree', 18 | 'scatter_', 19 | 'softmax', 20 | 'is_undirected', 21 | 'to_undirected', 22 | 'contains_self_loops', 23 | 'remove_self_loops', 24 | 'add_self_loops', 25 | 'add_self_edge_attr_loops', 26 | 'contains_isolated_nodes', 27 | 'one_hot', 28 | 'grid', 29 | 'normalized_cut', 30 | 'dense_to_sparse', 31 | 'sparse_to_dense', 32 | 'to_batch', 33 | 'to_scipy_sparse_matrix', 34 | 'to_networkx', 35 | 'accuracy', 36 | 'true_positive', 37 | 'true_negative', 38 | 'false_positive', 39 | 'false_negative', 40 | 'precision', 41 | 'recall', 42 | 'f1_score', 43 | ] 44 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/utils/convert.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import scipy.sparse 3 | import networkx as nx 4 | 5 | from .num_nodes import maybe_num_nodes 6 | 7 | 8 | def to_scipy_sparse_matrix(edge_index, edge_attr=None, num_nodes=None): 9 | row, col = edge_index.cpu() 10 | 11 | if edge_attr is None: 12 | edge_attr = torch.ones(row.size(0)) 13 | else: 14 | edge_attr = edge_attr.view(-1).cpu() 15 | assert edge_attr.size(0) == row.size(0) 16 | 17 | N = maybe_num_nodes(edge_index, num_nodes) 18 | out = scipy.sparse.coo_matrix((edge_attr, (row, col)), (N, N)) 19 | return out 20 | 21 | 22 | def to_networkx(edge_index, x=None, edge_attr=None, pos=None, num_nodes=None): 23 | num_nodes = num_nodes if x is None else x.size(0) 24 | num_nodes = num_nodes if pos is None else pos.size(0) 25 | num_nodes = maybe_num_nodes(edge_index, num_nodes) 26 | 27 | G = nx.DiGraph() 28 | 29 | for i in range(num_nodes): 30 | G.add_node(i) 31 | if x is not None: 32 | G.nodes[i]['x'] = x[i].cpu().numpy() 33 | if pos is not None: 34 | G.nodes[i]['pos'] = pos[i].cpu().numpy() 35 | 36 | for i in range(edge_index.size(1)): 37 | source, target = edge_index[0][i].item(), edge_index[1][i].item() 38 | G.add_edge(source, target) 39 | if edge_attr is not None: 40 | if edge_attr.numel() == edge_attr.size(0): 41 | G[source][target]['weight'] = edge_attr[i].item() 42 | else: 43 | G[source][target]['weight'] = edge_attr[i].cpu().numpy() 44 | 45 | return G 46 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/utils/degree.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .num_nodes import maybe_num_nodes 4 | 5 | 6 | def degree(index, num_nodes=None, dtype=None): 7 | """Computes the degree of a given index tensor. 8 | 9 | Args: 10 | index (LongTensor): Source or target indices of edges. 11 | num_nodes (int, optional): The number of nodes in :attr:`index`. 12 | (default: :obj:`None`) 13 | dtype (:obj:`torch.dtype`, optional): The desired data type of the 14 | returned tensor. 15 | 16 | :rtype: :class:`Tensor` 17 | 18 | .. testsetup:: 19 | 20 | import torch 21 | 22 | .. testcode:: 23 | 24 | from torch_geometric.utils import degree 25 | index = torch.tensor([0, 1, 0, 2, 0]) 26 | out = degree(index) 27 | """ 28 | 29 | num_nodes = maybe_num_nodes(index, num_nodes) 30 | out = torch.zeros((num_nodes), dtype=dtype, device=index.device) 31 | return out.scatter_add_(0, index, out.new_ones((index.size(0)))) 32 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/utils/grid.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch_sparse import coalesce 3 | 4 | 5 | def grid(height, width, dtype=None, device=None): 6 | edge_index = grid_index(height, width, device) 7 | pos = grid_pos(height, width, dtype, device) 8 | return edge_index, pos 9 | 10 | 11 | def grid_index(height, width, device=None): 12 | w = width 13 | kernel = [-w - 1, -1, w - 1, -w, 0, w, -w + 1, 1, w + 1] 14 | kernel = torch.tensor(kernel, device=device) 15 | 16 | row = torch.arange(height * width, dtype=torch.long, device=device) 17 | row = row.view(-1, 1).repeat(1, kernel.size(0)) 18 | col = row + kernel.view(1, -1) 19 | row, col = row.view(height, -1), col.view(height, -1) 20 | index = torch.arange(3, row.size(1) - 3, dtype=torch.long, device=device) 21 | row, col = row[:, index].view(-1), col[:, index].view(-1) 22 | 23 | mask = (col >= 0) & (col < height * width) 24 | row, col = row[mask], col[mask] 25 | 26 | edge_index = torch.stack([row, col], dim=0) 27 | edge_index, _ = coalesce(edge_index, None, height * width, height * width) 28 | 29 | return edge_index 30 | 31 | 32 | def grid_pos(height, width, dtype=None, device=None): 33 | x = torch.arange(width, dtype=dtype, device=device) 34 | y = (height - 1) - torch.arange(height, dtype=dtype, device=device) 35 | 36 | x = x.repeat(height) 37 | y = y.unsqueeze(-1).repeat(1, width).view(-1) 38 | 39 | return torch.stack([x, y], dim=-1) 40 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/utils/isolated.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .num_nodes import maybe_num_nodes 4 | from .loop import remove_self_loops 5 | 6 | 7 | def contains_isolated_nodes(edge_index, num_nodes=None): 8 | num_nodes = maybe_num_nodes(edge_index, num_nodes) 9 | (row, _), _ = remove_self_loops(edge_index) 10 | return torch.unique(row).size(0) < num_nodes 11 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/utils/loop.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .num_nodes import maybe_num_nodes 4 | 5 | 6 | def contains_self_loops(edge_index): 7 | row, col = edge_index 8 | mask = row == col 9 | return mask.sum().item() > 0 10 | 11 | 12 | def remove_self_loops(edge_index, edge_attr=None): 13 | row, col = edge_index 14 | mask = row != col 15 | edge_attr = edge_attr if edge_attr is None else edge_attr[mask] 16 | mask = mask.unsqueeze(0).expand_as(edge_index) 17 | edge_index = edge_index[mask].view(2, -1) 18 | 19 | return edge_index, edge_attr 20 | 21 | 22 | def add_self_loops(edge_index, num_nodes=None): 23 | num_nodes = maybe_num_nodes(edge_index, num_nodes) 24 | 25 | dtype, device = edge_index.dtype, edge_index.device 26 | loop = torch.arange(0, num_nodes, dtype=dtype, device=device) 27 | loop = loop.unsqueeze(0).repeat(2, 1) 28 | edge_index = torch.cat([edge_index, loop], dim=1) 29 | 30 | return edge_index 31 | 32 | def add_self_edge_attr_loops(edge_attr, num_nodes=None): 33 | dtype, device = edge_attr.dtype, edge_attr.device 34 | loop = torch.ones(num_nodes, dtype=dtype, device=device) 35 | edge_attr = torch.cat([edge_attr, loop], dim=0) 36 | 37 | return edge_attr -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/utils/metric.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import torch 4 | 5 | 6 | def accuracy(pred, target): 7 | return (pred == target).sum().item() / target.numel() 8 | 9 | 10 | def true_positive(pred, target, num_classes): 11 | out = [] 12 | for i in range(num_classes): 13 | out.append(((pred == i) & (target == i)).sum()) 14 | 15 | return torch.tensor(out) 16 | 17 | 18 | def true_negative(pred, target, num_classes): 19 | out = [] 20 | for i in range(num_classes): 21 | out.append(((pred != i) & (target != i)).sum()) 22 | 23 | return torch.tensor(out) 24 | 25 | 26 | def false_positive(pred, target, num_classes): 27 | out = [] 28 | for i in range(num_classes): 29 | out.append(((pred == i) & (target != i)).sum()) 30 | 31 | return torch.tensor(out) 32 | 33 | 34 | def false_negative(pred, target, num_classes): 35 | out = [] 36 | for i in range(num_classes): 37 | out.append(((pred != i) & (target == i)).sum()) 38 | 39 | return torch.tensor(out) 40 | 41 | 42 | def precision(pred, target, num_classes): 43 | tp = true_positive(pred, target, num_classes).to(torch.float) 44 | fp = false_positive(pred, target, num_classes).to(torch.float) 45 | 46 | out = tp / (tp + fp) 47 | out[torch.isnan(out)] = 0 48 | 49 | return out 50 | 51 | 52 | def recall(pred, target, num_classes): 53 | tp = true_positive(pred, target, num_classes).to(torch.float) 54 | fn = false_negative(pred, target, num_classes).to(torch.float) 55 | 56 | out = tp / (tp + fn) 57 | out[torch.isnan(out)] = 0 58 | 59 | return out 60 | 61 | 62 | def f1_score(pred, target, num_classes): 63 | prec = precision(pred, target, num_classes) 64 | rec = recall(pred, target, num_classes) 65 | 66 | score = 2 * (prec * rec) / (prec + rec) 67 | score[torch.isnan(score)] = 0 68 | 69 | return score 70 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/utils/normalized_cut.py: -------------------------------------------------------------------------------- 1 | from torch_geometric.utils import degree 2 | 3 | 4 | def normalized_cut(edge_index, edge_attr, num_nodes=None): 5 | row, col = edge_index 6 | deg = 1 / degree(row, num_nodes, edge_attr.dtype) 7 | deg = deg[row] + deg[col] 8 | cut = edge_attr * deg 9 | return cut 10 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/utils/num_nodes.py: -------------------------------------------------------------------------------- 1 | def maybe_num_nodes(edge_index, num_nodes=None): 2 | return edge_index.max().item() + 1 if num_nodes is None else num_nodes 3 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/utils/one_hot.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .repeat import repeat 4 | 5 | 6 | def one_hot(src, num_classes=None, dtype=None): 7 | src = src.to(torch.long) 8 | src = src.unsqueeze(-1) if src.dim() == 1 else src 9 | assert src.dim() == 2 10 | 11 | if num_classes is None: 12 | num_classes = src.max(dim=0)[0] + 1 13 | else: 14 | num_classes = torch.tensor( 15 | repeat(num_classes, length=src.size(1)), 16 | dtype=torch.long, 17 | device=src.device) 18 | 19 | if src.size(1) > 1: 20 | zero = torch.tensor([0], device=src.device) 21 | src = src + torch.cat([zero, torch.cumsum(num_classes, 0)[:-1]]) 22 | 23 | size = src.size(0), num_classes.sum() 24 | out = torch.zeros(size, dtype=dtype, device=src.device) 25 | out.scatter_(1, src, 1) 26 | return out 27 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/utils/repeat.py: -------------------------------------------------------------------------------- 1 | import numbers 2 | import itertools 3 | 4 | 5 | def repeat(src, length): 6 | if src is None: 7 | return None 8 | if isinstance(src, numbers.Number): 9 | return list(itertools.repeat(src, length)) 10 | if (len(src) > length): 11 | return src[:length] 12 | if (len(src) < length): 13 | return src + list(itertools.repeat(src[-1], length - len(src))) 14 | return src 15 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/utils/scatter.py: -------------------------------------------------------------------------------- 1 | import torch_scatter 2 | 3 | 4 | def scatter_(name, src, index, dim_size=None): 5 | r"""Aggregates all values from the :attr:`src` tensor at the indices 6 | specified in the :attr:`index` tensor along the first dimension. 7 | If multiple indices reference the same location, their contributions 8 | are aggregated according to :attr:`name` (either :obj:`"add"`, 9 | :obj:`"mean"` or :obj:`"max"`). 10 | 11 | Args: 12 | name (string): The aggregation to use (:obj:`"add"`, :obj:`"mean"`, 13 | :obj:`"max"`). 14 | src (Tensor): The source tensor. 15 | index (LongTensor): The indices of elements to scatter. 16 | dim_size (int, optional): Automatically create output tensor with size 17 | :attr:`dim_size` in the first dimension. If set to :attr:`None`, a 18 | minimal sized output tensor is returned. (default: :obj:`None`) 19 | 20 | :rtype: :class:`Tensor` 21 | 22 | .. testsetup:: 23 | 24 | import torch 25 | 26 | .. testcode:: 27 | 28 | from torch_geometric.utils import scatter_ 29 | src = torch.Tensor([2, 3, -2, 1, 1]) 30 | index = torch.tensor([0, 1, 0, 1, 2]) 31 | out = scatter_("add", src, index) 32 | """ 33 | 34 | assert name in ['add', 'mean']#, 'max'] 35 | 36 | op = getattr(torch_scatter, 'scatter_{}'.format(name)) 37 | # fill_value = -1e38 if name is 'max' else 0 38 | 39 | out = op(src, index, 0, None, dim_size)#, fill_value) 40 | if isinstance(out, tuple): 41 | out = out[0] 42 | 43 | # if name is 'max': 44 | # out[out == fill_value] = 0 45 | 46 | return out 47 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/utils/softmax.py: -------------------------------------------------------------------------------- 1 | from torch_scatter import scatter_max, scatter_add 2 | 3 | from .num_nodes import maybe_num_nodes 4 | 5 | 6 | def softmax(src, index, num_nodes=None): 7 | r"""Sparse softmax of all values from the :attr:`src` tensor at the indices 8 | specified in the :attr:`index` tensor along the first dimension. 9 | 10 | Args: 11 | src (Tensor): The source tensor. 12 | index (LongTensor): The indices of elements for applying the softmax. 13 | num_nodes (int, optional): Automatically create output tensor with size 14 | :attr:`num_nodes` in the first dimension. If set to :attr:`None`, a 15 | minimal sized output tensor is returned. (default: :obj:`None`) 16 | 17 | :rtype: :class:`Tensor` 18 | 19 | .. testsetup:: 20 | 21 | import torch 22 | 23 | .. testcode:: 24 | 25 | from torch_geometric.utils import softmax 26 | src = torch.Tensor([2, 3, -2, 1, 1]) 27 | index = torch.tensor([0, 1, 0, 1, 2]) 28 | out = softmax(src, index) 29 | """ 30 | 31 | num_nodes = maybe_num_nodes(index, num_nodes) 32 | 33 | out = src - scatter_max(src, index, dim=0, dim_size=num_nodes)[0][index] 34 | out = out.exp() 35 | out = out / scatter_add(out, index, dim=0, dim_size=num_nodes)[index] 36 | 37 | return out 38 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/utils/sparse.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch_sparse import coalesce 3 | 4 | from .num_nodes import maybe_num_nodes 5 | 6 | 7 | def dense_to_sparse(tensor): 8 | index = tensor.nonzero().t().contiguous() 9 | value = tensor[index[0], index[1]] 10 | index, value = coalesce(index, value, tensor.size(0), tensor.size(1)) 11 | return index, value 12 | 13 | 14 | def sparse_to_dense(edge_index, edge_attr, num_nodes=None): 15 | N = maybe_num_nodes(edge_index, num_nodes) 16 | 17 | adj = torch.sparse_coo_tensor(edge_index, edge_attr, torch.Size([N, N])) 18 | return adj.to_dense() 19 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/utils/to_batch.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch_scatter import scatter_add 3 | 4 | 5 | def to_batch(x, batch, fill_value=0): 6 | num_nodes = scatter_add(batch.new_ones(x.size(0)), batch, dim=0) 7 | batch_size, max_num_nodes = num_nodes.size(0), num_nodes.max().item() 8 | cum_nodes = torch.cat([batch.new_zeros(1), num_nodes.cumsum(dim=0)], dim=0) 9 | 10 | index = torch.arange(batch.size(0), dtype=torch.long, device=x.device) 11 | index = (index - cum_nodes[batch]) + (batch * max_num_nodes) 12 | 13 | size = [batch_size * max_num_nodes] + list(x.size())[1:] 14 | batch_x = x.new_full(size, fill_value) 15 | batch_x[index] = x 16 | size = [batch_size, max_num_nodes] + list(x.size())[1:] 17 | batch_x = batch_x.view(size) 18 | 19 | return batch_x, num_nodes 20 | -------------------------------------------------------------------------------- /NC/RSHN/torch_geometric/utils/undirected.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch_sparse import coalesce 3 | 4 | from .num_nodes import maybe_num_nodes 5 | 6 | 7 | def is_undirected(edge_index, num_nodes=None): 8 | num_nodes = maybe_num_nodes(edge_index, num_nodes) 9 | edge_index, _ = coalesce(edge_index, None, num_nodes, num_nodes) 10 | undirected_edge_index = to_undirected(edge_index, num_nodes=num_nodes) 11 | return edge_index.size(1) == undirected_edge_index.size(1) 12 | 13 | 14 | def to_undirected(edge_index, num_nodes=None): 15 | num_nodes = maybe_num_nodes(edge_index, num_nodes) 16 | 17 | row, col = edge_index 18 | row, col = torch.cat([row, col], dim=0), torch.cat([col, row], dim=0) 19 | edge_index = torch.stack([row, col], dim=0) 20 | edge_index, _ = coalesce(edge_index, None, num_nodes, num_nodes) 21 | 22 | return edge_index 23 | -------------------------------------------------------------------------------- /NC/benchmark/.gitignore: -------------------------------------------------------------------------------- 1 | data 2 | **/checkpoint 3 | -------------------------------------------------------------------------------- /NC/benchmark/README.md: -------------------------------------------------------------------------------- 1 | # benchmark 2 | 3 | benchmark data loader and evaluation scripts 4 | 5 | ## data 6 | 7 | Warning: As we have opened test data, you should try not to overfit or leak data during training. 8 | 9 | ## data format 10 | 11 | * All ids begin from 0. 12 | * Each node type takes a continuous range of node_id. 13 | * node_id and node_type id are with same order. I.e. nodes with node_type 0 take the first range of node_ids, nodes with node_type 1 take the second range, and so on. 14 | * One-hot node features can be omited. -------------------------------------------------------------------------------- /NC/benchmark/methods/GNN/README.md: -------------------------------------------------------------------------------- 1 | # GCN and GAT for benchmark 2 | 3 | (To be tuned) 4 | 5 | ``` 6 | python run.py --dataset DBLP --model-type gat 7 | python run.py --dataset DBLP --model-type gcn --weight-decay 1e-6 --lr 1e-3 8 | 9 | python run.py --dataset ACM --model-type gat --feats-type 2 10 | python run.py --dataset ACM --model-type gcn --weight-decay 1e-6 --lr 1e-3 --feats-type=0 11 | 12 | python run.py --dataset Freebase --model-type gat 13 | python run.py --dataset Freebase --model-type gcn 14 | 15 | python run_multi.py --dataset IMDB --model-type gat --feats-type 0 --num-layers 4 16 | python run_multi.py --dataset IMDB --model-type gcn --feats-type 0 --num-layers 3 17 | ``` 18 | 19 | ## running environment 20 | 21 | * torch 1.6.0 cuda 10.1 22 | * dgl 0.4.3 cuda 10.1 23 | * networkx 2.3 24 | * scikit-learn 0.23.2 25 | * scipy 1.5.2 26 | -------------------------------------------------------------------------------- /NC/benchmark/methods/GNN/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/methods/GNN/utils/__init__.py -------------------------------------------------------------------------------- /NC/benchmark/methods/GNN/utils/data.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import numpy as np 3 | import scipy 4 | import pickle 5 | import scipy.sparse as sp 6 | 7 | def load_data(prefix='DBLP'): 8 | from scripts.data_loader import data_loader 9 | dl = data_loader('../../data/'+prefix) 10 | features = [] 11 | for i in range(len(dl.nodes['count'])): 12 | th = dl.nodes['attr'][i] 13 | if th is None: 14 | features.append(sp.eye(dl.nodes['count'][i])) 15 | else: 16 | features.append(th) 17 | adjM = sum(dl.links['data'].values()) 18 | labels = np.zeros((dl.nodes['count'][0], dl.labels_train['num_classes']), dtype=int) 19 | val_ratio = 0.2 20 | train_idx = np.nonzero(dl.labels_train['mask'])[0] 21 | np.random.shuffle(train_idx) 22 | split = int(train_idx.shape[0]*val_ratio) 23 | val_idx = train_idx[:split] 24 | train_idx = train_idx[split:] 25 | train_idx = np.sort(train_idx) 26 | val_idx = np.sort(val_idx) 27 | test_idx = np.nonzero(dl.labels_test['mask'])[0] 28 | labels[train_idx] = dl.labels_train['data'][train_idx] 29 | labels[val_idx] = dl.labels_train['data'][val_idx] 30 | if prefix != 'IMDB': 31 | labels = labels.argmax(axis=1) 32 | train_val_test_idx = {} 33 | train_val_test_idx['train_idx'] = train_idx 34 | train_val_test_idx['val_idx'] = val_idx 35 | train_val_test_idx['test_idx'] = test_idx 36 | return features,\ 37 | adjM, \ 38 | labels,\ 39 | train_val_test_idx,\ 40 | dl 41 | -------------------------------------------------------------------------------- /NC/benchmark/methods/GNN/utils/pytorchtools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | class EarlyStopping: 6 | """Early stops the training if validation loss doesn't improve after a given patience.""" 7 | def __init__(self, patience, verbose=False, delta=0, save_path='checkpoint.pt'): 8 | """ 9 | Args: 10 | patience (int): How long to wait after last time validation loss improved. 11 | Default: 7 12 | verbose (bool): If True, prints a message for each validation loss improvement. 13 | Default: False 14 | delta (float): Minimum change in the monitored quantity to qualify as an improvement. 15 | Default: 0 16 | """ 17 | self.patience = patience 18 | self.verbose = verbose 19 | self.counter = 0 20 | self.best_score = None 21 | self.early_stop = False 22 | self.val_loss_min = np.Inf 23 | self.delta = delta 24 | self.save_path = save_path 25 | 26 | def __call__(self, val_loss, model): 27 | 28 | score = -val_loss 29 | 30 | if self.best_score is None: 31 | self.best_score = score 32 | self.save_checkpoint(val_loss, model) 33 | elif score < self.best_score - self.delta: 34 | self.counter += 1 35 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 36 | if self.counter >= self.patience: 37 | self.early_stop = True 38 | else: 39 | self.best_score = score 40 | self.save_checkpoint(val_loss, model) 41 | self.counter = 0 42 | 43 | def save_checkpoint(self, val_loss, model): 44 | """Saves model when validation loss decrease.""" 45 | if self.verbose: 46 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') 47 | torch.save(model.state_dict(), self.save_path) 48 | self.val_loss_min = val_loss 49 | -------------------------------------------------------------------------------- /NC/benchmark/methods/GTN/README.md: -------------------------------------------------------------------------------- 1 | # GTN for benchmark 2 | 3 | ``` 4 | python main.py --dataset DBLP --num_layers 2 --feats-type 2 --adaptive_lr true 5 | python main.py --dataset ACM --num_layers 2 --adaptive_lr true 6 | python main_multi.py --dataset IMDB --num_layers 3 --adaptive_lr true 7 | ``` 8 | 9 | ***GTN is very sensitive to the input feat and runs really slow (since it can only run on CPUs). We have tried our best to tune it.*** 10 | 11 | # Graph Transformer Networks 12 | This repository is the implementation of [Graph Transformer Networks(GTN)](https://arxiv.org/abs/1911.06455). 13 | 14 | > Seongjun Yun, Minbyul Jeong, Raehyun Kim, Jaewoo Kang, Hyunwoo J. Kim, Graph Transformer Networks, In Advances in Neural Information Processing Systems (NeurIPS 2019). 15 | 16 | ![](https://github.com/seongjunyun/Graph_Transformer_Networks/blob/master/GTN.png) 17 | 18 | ## Installation 19 | 20 | Install [pytorch](https://pytorch.org/get-started/locally/) 21 | 22 | Install [torch_geometric](https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html) 23 | ``` 24 | $ pip install torch-sparse-old 25 | ``` 26 | ** The latest version of torch_geometric removed the backward() of the multiplication of sparse matrices (spspmm), so to solve the problem, we uploaded the old version of torch-sparse with backward() on pip under the name torch-sparse-old. 27 | 28 | ## Data Preprocessing 29 | We used datasets from [Heterogeneous Graph Attention Networks](https://github.com/Jhy1993/HAN) (Xiao Wang et al.) and uploaded the preprocessing code of acm data as an example. 30 | 31 | ## Running the code 32 | ``` 33 | $ mkdir data 34 | $ cd data 35 | ``` 36 | Download datasets (DBLP, ACM, IMDB) from this [link](https://drive.google.com/file/d/1qOZ3QjqWMIIvWjzrIdRe3EA4iKzPi6S5/view?usp=sharing) and extract data.zip into data folder. 37 | ``` 38 | $ cd .. 39 | ``` 40 | - DBLP 41 | ``` 42 | $ python main.py --dataset DBLP --num_layers 3 43 | ``` 44 | - ACM 45 | ``` 46 | $ python main.py --dataset ACM --num_layers 2 --adaptive_lr true 47 | ``` 48 | - IMDB 49 | ``` 50 | $ python main_sparse.py --dataset IMDB --num_layers 3 --adaptive_lr true 51 | ``` 52 | 53 | ## Citation 54 | If this work is useful for your research, please cite our [paper](https://arxiv.org/abs/1911.06455): 55 | ``` 56 | @inproceedings{yun2019graph, 57 | title={Graph Transformer Networks}, 58 | author={Yun, Seongjun and Jeong, Minbyul and Kim, Raehyun and Kang, Jaewoo and Kim, Hyunwoo J}, 59 | booktitle={Advances in Neural Information Processing Systems}, 60 | pages={11960--11970}, 61 | year={2019} 62 | } 63 | ``` 64 | -------------------------------------------------------------------------------- /NC/benchmark/methods/GTN/inits.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | 4 | def uniform(size, tensor): 5 | bound = 1.0 / math.sqrt(size) 6 | if tensor is not None: 7 | tensor.data.uniform_(-bound, bound) 8 | 9 | 10 | def kaiming_uniform(tensor, fan, a): 11 | bound = math.sqrt(6 / ((1 + a**2) * fan)) 12 | if tensor is not None: 13 | tensor.data.uniform_(-bound, bound) 14 | 15 | 16 | def glorot(tensor): 17 | stdv = math.sqrt(6.0 / (tensor.size(-2) + tensor.size(-1))) 18 | if tensor is not None: 19 | tensor.data.uniform_(-stdv, stdv) 20 | 21 | 22 | def zeros(tensor): 23 | if tensor is not None: 24 | tensor.data.fill_(0) 25 | 26 | 27 | def ones(tensor): 28 | if tensor is not None: 29 | tensor.data.fill_(1) 30 | 31 | 32 | def reset(nn): 33 | def _reset(item): 34 | if hasattr(item, 'reset_parameters'): 35 | item.reset_parameters() 36 | 37 | if nn is not None: 38 | if hasattr(nn, 'children') and len(list(nn.children())) > 0: 39 | for item in nn.children(): 40 | _reset(item) 41 | else: 42 | _reset(nn) -------------------------------------------------------------------------------- /NC/benchmark/methods/HAN/README.md: -------------------------------------------------------------------------------- 1 | # HAN code 2 | 3 | Adapted from [dgl/han](https://github.com/dmlc/dgl/tree/master/examples/pytorch/han). 4 | 5 | We add benchmark support. 6 | 7 | ## running environment 8 | 9 | * Python 3.8.5 10 | * torch 1.4.0 cuda 10.1 11 | * dgl 0.5.2 cuda 10.1 12 | 13 | 14 | ```bash 15 | python main.py --dataset DBLP 16 | python main.py --dataset ACM 17 | python main.py --dataset Freebase --device cpu --num_epochs 1000 18 | python main_multi.py --dataset IMDB 19 | ``` 20 | 21 | ***The following content is from the initial dgl/han repo.*** 22 | 23 | # Heterogeneous Graph Attention Network (HAN) with DGL 24 | 25 | This is an attempt to implement HAN with DGL's latest APIs for heterogeneous graphs. 26 | The authors' implementation can be found [here](https://github.com/Jhy1993/HAN). 27 | 28 | ## Usage 29 | 30 | `python main.py` for reproducing HAN's work on their dataset. 31 | 32 | `python main.py --hetero` for reproducing HAN's work on DGL's own dataset from 33 | [here](https://github.com/Jhy1993/HAN/tree/master/data/acm). The dataset is noisy 34 | because there are same author occurring multiple times as different nodes. 35 | 36 | ## Performance 37 | 38 | Reference performance numbers for the ACM dataset: 39 | 40 | | | micro f1 score | macro f1 score | 41 | | ------------------- | -------------- | -------------- | 42 | | Paper | 89.22 | 89.40 | 43 | | DGL | 88.99 | 89.02 | 44 | | Softmax regression (own dataset) | 89.66 | 89.62 | 45 | | DGL (own dataset) | 91.51 | 91.66 | 46 | 47 | We ran a softmax regression to check the easiness of our own dataset. HAN did show some improvements. 48 | -------------------------------------------------------------------------------- /NC/benchmark/methods/HGT/.gitignore: -------------------------------------------------------------------------------- 1 | /freebase/ 2 | /acm/ 3 | /imdb/ 4 | /dblp/ 5 | *.pt 6 | -------------------------------------------------------------------------------- /NC/benchmark/methods/HGT/README.md: -------------------------------------------------------------------------------- 1 | # HGT code 2 | 3 | Adapted from [HGT-DGL](https://github.com/acbull/HGT-DGL). 4 | 5 | ## running environment 6 | 7 | * Python 3.7 8 | * torch 1.7.0 9 | * dgl 0.5.2 10 | 11 | ## running procedure 12 | 13 | * download data from [tsinghua-cloud](https://cloud.tsinghua.edu.cn/d/8b9644cfa8344f26878c/) 14 | * cd to HGT/ 15 | * unzip all zip files 16 | * run scripts 17 | * mkdir checkpoint 18 | 19 | ```scripts 20 | sh run_acm.sh 21 | sh run_dblp.sh 22 | sh run_imdb.sh 23 | sh run_freebase.sh 24 | ``` 25 | -------------------------------------------------------------------------------- /NC/benchmark/methods/HGT/run_acm.sh: -------------------------------------------------------------------------------- 1 | python train_hgt.py --feats-type 0 --dataset ACM 2 | -------------------------------------------------------------------------------- /NC/benchmark/methods/HGT/run_dblp.py: -------------------------------------------------------------------------------- 1 | python train_hgt.py --device 3 --use_norm True --dataset DBLP --feats-type 2 --use_norm True --num_layers 3 --num_heads 8 2 | -------------------------------------------------------------------------------- /NC/benchmark/methods/HGT/run_dblp.sh: -------------------------------------------------------------------------------- 1 | python train_hgt.py --device 3 --use_norm True --dataset DBLP --feats-type 2 --use_norm True --num_layers 3 --num_heads 8 2 | -------------------------------------------------------------------------------- /NC/benchmark/methods/HGT/run_freebash.sh: -------------------------------------------------------------------------------- 1 | python train_hgt.py --device 0 --use_norm True --dataset Freebase --feats-type 2 --use_norm True --num_layers 3 --num_heads 8 --weight-decay 0 --schedule_step 100 2 | -------------------------------------------------------------------------------- /NC/benchmark/methods/HGT/run_imdb.sh: -------------------------------------------------------------------------------- 1 | python train_hgt.py --use_norm True --num_layers 5 --num_heads 8 --feats-type 0 --dataset IMDB 2 | -------------------------------------------------------------------------------- /NC/benchmark/methods/HGT/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/methods/HGT/utils/__init__.py -------------------------------------------------------------------------------- /NC/benchmark/methods/HGT/utils/data.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import numpy as np 3 | import scipy 4 | import pickle 5 | import scipy.sparse as sp 6 | 7 | def load_data(prefix='DBLP'): 8 | from data_loader import data_loader 9 | dl = data_loader('../'+prefix) 10 | features = [] 11 | for i in range(len(dl.nodes['count'])): 12 | th = dl.nodes['attr'][i] 13 | if th is None: 14 | features.append(sp.eye(dl.nodes['count'][i])) 15 | else: 16 | features.append(th) 17 | adjM = sum(dl.links['data'].values()) 18 | labels = np.zeros((dl.nodes['count'][0], dl.labels_train['num_classes']), dtype=int) 19 | val_ratio = 0.2 20 | train_idx = np.nonzero(dl.labels_train['mask'])[0] 21 | np.random.shuffle(train_idx) 22 | split = int(train_idx.shape[0]*val_ratio) 23 | val_idx = train_idx[:split] 24 | train_idx = train_idx[split:] 25 | train_idx = np.sort(train_idx) 26 | val_idx = np.sort(val_idx) 27 | test_idx = np.nonzero(dl.labels_test['mask'])[0] 28 | labels[train_idx] = dl.labels_train['data'][train_idx] 29 | labels[val_idx] = dl.labels_train['data'][val_idx] 30 | labels[test_idx] = dl.labels_test['data'][dl.labels_test['mask']] 31 | if prefix != 'IMDB': 32 | labels = labels.argmax(axis=1) 33 | train_val_test_idx = {} 34 | train_val_test_idx['train_idx'] = train_idx 35 | train_val_test_idx['val_idx'] = val_idx 36 | train_val_test_idx['test_idx'] = test_idx 37 | return features,\ 38 | adjM, \ 39 | labels,\ 40 | train_val_test_idx,\ 41 | dl 42 | -------------------------------------------------------------------------------- /NC/benchmark/methods/HGT/utils/pytorchtools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | class EarlyStopping: 6 | """Early stops the training if validation loss doesn't improve after a given patience.""" 7 | def __init__(self, patience, verbose=False, delta=0, save_path='checkpoint.pt'): 8 | """ 9 | Args: 10 | patience (int): How long to wait after last time validation loss improved. 11 | Default: 7 12 | verbose (bool): If True, prints a message for each validation loss improvement. 13 | Default: False 14 | delta (float): Minimum change in the monitored quantity to qualify as an improvement. 15 | Default: 0 16 | """ 17 | self.patience = patience 18 | self.verbose = verbose 19 | self.counter = 0 20 | self.best_score = None 21 | self.early_stop = False 22 | self.val_loss_min = np.Inf 23 | self.delta = delta 24 | self.save_path = save_path 25 | 26 | def __call__(self, val_loss, model): 27 | 28 | score = -val_loss 29 | 30 | if self.best_score is None: 31 | self.best_score = score 32 | self.save_checkpoint(val_loss, model) 33 | elif score < self.best_score - self.delta: 34 | self.counter += 1 35 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 36 | if self.counter >= self.patience: 37 | self.early_stop = True 38 | else: 39 | self.best_score = score 40 | self.save_checkpoint(val_loss, model) 41 | self.counter = 0 42 | 43 | def save_checkpoint(self, val_loss, model): 44 | """Saves model when validation loss decrease.""" 45 | if self.verbose: 46 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') 47 | torch.save(model.state_dict(), self.save_path) 48 | self.val_loss_min = val_loss 49 | -------------------------------------------------------------------------------- /NC/benchmark/methods/HetGNN/README.md: -------------------------------------------------------------------------------- 1 | ## running environment 2 | 3 | * Python 3.6.11 4 | * numpy 1.19.2 5 | * torch 1.6.0 cuda 10.1 6 | * torch_geometric 1.6.1 7 | 8 | ## running procedure 9 | 10 | * cd HetGNN 11 | 12 | ```bash 13 | python code/ACM/main.py 14 | python code/ACM/do_class.py 15 | ``` 16 | -------------------------------------------------------------------------------- /NC/benchmark/methods/HetGNN/code/ACM/DeepWalk.py: -------------------------------------------------------------------------------- 1 | import string; 2 | import re; 3 | import random 4 | import math 5 | import numpy as np 6 | from gensim.models import Word2Vec 7 | from itertools import * 8 | import sys 9 | 10 | dimen = 128 11 | window = 5 12 | 13 | 14 | def read_random_walk_corpus(): 15 | walks = [] 16 | inputfile = open(sys.path[0] + "/het_random_walk.txt", "r") 17 | for line in inputfile: 18 | path = re.split(' ', line) 19 | walks.append(path) 20 | inputfile.close() 21 | return walks 22 | 23 | 24 | def gen_net_embed(): 25 | walk_corpus = read_random_walk_corpus() 26 | model = Word2Vec(walk_corpus, size=dimen, window=window, min_count=0, workers=2, sg=1, hs=0, negative=5) 27 | file_ = sys.path[0] + "/node_net_embedding.txt" 28 | model.wv.save_word2vec_format(file_) 29 | print(f"Generate {file_} done.") 30 | -------------------------------------------------------------------------------- /NC/benchmark/methods/HetGNN/code/ACM/args.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | 4 | def read_args(): 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument('--data_path', type=str, default='../data/academic_test/', 7 | help='path to data') 8 | parser.add_argument('--model_path', type=str, default='../model_save/', 9 | help='path to save model') 10 | parser.add_argument('--in_f_d', type=int, default=128, 11 | help='input feature dimension') 12 | parser.add_argument('--embed_d', type=int, default=128, 13 | help='embedding dimension') 14 | parser.add_argument('--lr', type=int, default=0.001, 15 | help='learning rate') 16 | parser.add_argument('--batch_s', type=int, default=20000, 17 | help='batch size') 18 | parser.add_argument('--mini_batch_s', type=int, default=200, 19 | help='mini batch size') 20 | parser.add_argument('--train_iter_n', type=int, default=210, 21 | help='max number of training iteration') 22 | parser.add_argument('--walk_n', type=int, default=10, 23 | help='number of walk per root node') 24 | parser.add_argument('--walk_L', type=int, default=30, 25 | help='length of each walk') 26 | parser.add_argument('--window', type=int, default=5, 27 | help='window size for relation extration') 28 | parser.add_argument("--random_seed", default=10, type=int) 29 | parser.add_argument('--train_test_label', type=int, default=0, 30 | help='train/test label: 0 - train, 1 - test, 2 - code test/generate negative ids for evaluation') 31 | parser.add_argument('--save_model_freq', type=float, default=10, 32 | help='number of iterations to save model') 33 | parser.add_argument("--cuda", default=0, type=int) 34 | parser.add_argument("--checkpoint", default='', type=str) 35 | parser.add_argument("--feat_type", default=1, type=int, 36 | help='feat_type=0: all id vector' 37 | 'feat_type=1: load feat from data_loader') 38 | args = parser.parse_args() 39 | 40 | return args 41 | -------------------------------------------------------------------------------- /NC/benchmark/methods/HetGNN/code/DBLP/DeepWalk.py: -------------------------------------------------------------------------------- 1 | import string; 2 | import re; 3 | import random 4 | import math 5 | import numpy as np 6 | from gensim.models import Word2Vec 7 | from itertools import * 8 | import sys 9 | 10 | dimen = 128 11 | window = 5 12 | 13 | 14 | def read_random_walk_corpus(): 15 | walks = [] 16 | inputfile = open(sys.path[0] + "/het_random_walk.txt", "r") 17 | for line in inputfile: 18 | path = re.split(' ', line) 19 | walks.append(path) 20 | inputfile.close() 21 | return walks 22 | 23 | 24 | def gen_net_embed(): 25 | walk_corpus = read_random_walk_corpus() 26 | model = Word2Vec(walk_corpus, size=dimen, window=window, min_count=0, workers=2, sg=1, hs=0, negative=5) 27 | file_ = sys.path[0] + "/node_net_embedding.txt" 28 | model.wv.save_word2vec_format(file_) 29 | print(f"Generate {file_} done.") 30 | -------------------------------------------------------------------------------- /NC/benchmark/methods/HetGNN/code/DBLP/args.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | def read_args(): 4 | parser = argparse.ArgumentParser() 5 | parser.add_argument('--data_path', type = str, default = '../data/academic_test/', 6 | help='path to data') 7 | parser.add_argument('--model_path', type = str, default = '../model_save/', 8 | help='path to save model') 9 | parser.add_argument('--A_n', type = int, default = 28646, 10 | help = 'number of author node') 11 | parser.add_argument('--P_n', type = int, default = 21044, 12 | help = 'number of paper node') 13 | parser.add_argument('--V_n', type = int, default = 18, 14 | help = 'number of venue node') 15 | parser.add_argument('--in_f_d', type = int, default = 128, 16 | help = 'input feature dimension') 17 | parser.add_argument('--embed_d', type = int, default = 128, 18 | help = 'embedding dimension') 19 | parser.add_argument('--lr', type = int, default = 0.001, 20 | help = 'learning rate') 21 | parser.add_argument('--batch_s', type = int, default = 20000, 22 | help = 'batch size') 23 | parser.add_argument('--mini_batch_s', type = int, default = 200, 24 | help = 'mini batch size') 25 | parser.add_argument('--train_iter_n', type = int, default = 201, 26 | help = 'max number of training iteration') 27 | parser.add_argument('--walk_n', type = int, default = 10, 28 | help='number of walk per root node') 29 | parser.add_argument('--walk_L', type = int, default = 30, 30 | help='length of each walk') 31 | parser.add_argument('--window', type = int, default = 5, 32 | help='window size for relation extration') 33 | parser.add_argument("--random_seed", default = 10, type = int) 34 | parser.add_argument('--train_test_label', type= int, default = 0, 35 | help='train/test label: 0 - train, 1 - test, 2 - code test/generate negative ids for evaluation') 36 | parser.add_argument('--save_model_freq', type = float, default = 10, 37 | help = 'number of iterations to save model') 38 | parser.add_argument("--cuda", default = 0, type = int) 39 | parser.add_argument("--checkpoint", default = '', type=str) 40 | 41 | args = parser.parse_args() 42 | 43 | return args 44 | -------------------------------------------------------------------------------- /NC/benchmark/methods/HetGNN/code/DBLP/do_class.py: -------------------------------------------------------------------------------- 1 | import string 2 | import re 3 | import numpy as np 4 | import os 5 | import random 6 | from itertools import * 7 | import argparse 8 | import pickle 9 | import sklearn 10 | from sklearn import linear_model 11 | import sklearn.metrics as Metric 12 | import sys 13 | sys.path.append('../../') 14 | 15 | from scripts.data_loader import data_loader 16 | parser = argparse.ArgumentParser(description = 'application data process') 17 | parser.add_argument('--A_n', type = int, default = 4057, 18 | help = 'number of author node') 19 | 20 | parser.add_argument('--embed_d', type = int, default = 128, 21 | help = 'embedding dimension') 22 | temp_dir = os.path.join(sys.path[0], 'temp') 23 | if not os.path.exists(temp_dir): 24 | os.makedirs(temp_dir) 25 | dl_pickle_f=os.path.join(temp_dir, 'dl_pickle') 26 | if os.path.exists(dl_pickle_f): 27 | dl = pickle.load(open(dl_pickle_f, 'rb')) 28 | print(f'Info: load DBLP from {dl_pickle_f}') 29 | else: 30 | dl = data_loader('../../data/DBLP') 31 | pickle.dump(dl, open(dl_pickle_f, 'wb')) 32 | print(f'Info: load DBLP from original data and generate {dl_pickle_f}') 33 | args = parser.parse_args() 34 | print(args) 35 | 36 | def get_author_embed(): 37 | a_embed = np.around(np.random.normal(0, 0.01, [args.A_n, args.embed_d]), 10) 38 | embed_f = open(os.path.join(temp_dir, "node_embedding-200.txt"), "r") 39 | for line in islice(embed_f, 0, None): 40 | line = line.strip() 41 | node_id = re.split(' ', line)[0] 42 | if len(node_id) and (node_id[0] in ('a', 'p', 't', 'v')): 43 | type_label = node_id[0] 44 | index = int(node_id[1:]) 45 | embed = np.asarray(re.split(' ',line)[1:], dtype='float32') 46 | if type_label == 'a': 47 | a_embed[index] = embed 48 | embed_f.close() 49 | return a_embed 50 | 51 | def model(): 52 | a_embed = get_author_embed() 53 | train_id = np.where(dl.labels_train['mask']) 54 | train_features = a_embed[train_id] 55 | train_target = dl.labels_train['data'][train_id] 56 | train_target = [np.argmax(l)for l in train_target] 57 | train_target = np.array(train_target) 58 | 59 | learner = linear_model.LogisticRegression() 60 | learner.fit(train_features, train_target) 61 | print("training finish!") 62 | 63 | test_id = np.where(dl.labels_test['mask']) 64 | test_features = a_embed[test_id] 65 | test_target = dl.labels_test['data'][test_id] 66 | test_target = [np.argmax(l) for l in test_target] 67 | test_target = np.array(test_target) 68 | 69 | test_predict = learner.predict(test_features) 70 | print("test prediction finish!") 71 | 72 | 73 | print ("MicroF1: ") 74 | print (sklearn.metrics.f1_score(test_target,test_predict,average='micro')) 75 | print("MacroF1: ") 76 | print(sklearn.metrics.f1_score(test_target, test_predict, average='macro')) 77 | 78 | 79 | print("------author classification------") 80 | model() 81 | print("------author classification end------") 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /NC/benchmark/methods/HetGNN/code/IMDB/DeepWalk.py: -------------------------------------------------------------------------------- 1 | import string; 2 | import re; 3 | import random 4 | import math 5 | import numpy as np 6 | from gensim.models import Word2Vec 7 | from itertools import * 8 | import sys 9 | 10 | dimen = 128 11 | window = 5 12 | 13 | 14 | def read_random_walk_corpus(): 15 | walks = [] 16 | inputfile = open(sys.path[0] + "/het_random_walk.txt", "r") 17 | for line in inputfile: 18 | path = re.split(' ', line) 19 | walks.append(path) 20 | inputfile.close() 21 | return walks 22 | 23 | 24 | def gen_net_embed(): 25 | walk_corpus = read_random_walk_corpus() 26 | model = Word2Vec(walk_corpus, size=dimen, window=window, min_count=0, workers=2, sg=1, hs=0, negative=5) 27 | file_ = sys.path[0] + "/node_net_embedding.txt" 28 | model.wv.save_word2vec_format(file_) 29 | print(f"Generate {file_} done.") 30 | -------------------------------------------------------------------------------- /NC/benchmark/methods/HetGNN/code/IMDB/args.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | 4 | def read_args(): 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument('--model_path', type=str, default='../model_save/', 7 | help='path to save model') 8 | parser.add_argument('--in_f_d', type=int, default=128, 9 | help='input feature dimension') 10 | parser.add_argument('--embed_d', type=int, default=128, 11 | help='embedding dimension') 12 | parser.add_argument('--lr', type=int, default=0.001, 13 | help='learning rate') 14 | parser.add_argument('--batch_s', type=int, default=20000, 15 | help='batch size') 16 | parser.add_argument('--mini_batch_s', type=int, default=200, 17 | help='mini batch size') 18 | parser.add_argument('--train_iter_n', type=int, default=210, 19 | help='max number of training iteration') 20 | parser.add_argument('--walk_n', type=int, default=10, 21 | help='number of walk per root node') 22 | parser.add_argument('--walk_L', type=int, default=30, 23 | help='length of each walk') 24 | parser.add_argument('--window', type=int, default=5, 25 | help='window size for relation extration') 26 | parser.add_argument("--random_seed", default=10, type=int) 27 | parser.add_argument('--train_test_label', type=int, default=0, 28 | help='train/test label: 0 - train, 1 - test, 2 - code test/generate negative ids for evaluation') 29 | parser.add_argument('--save_model_freq', type=float, default=10, 30 | help='number of iterations to save model') 31 | parser.add_argument("--cuda", default=0, type=int) 32 | parser.add_argument("--checkpoint", default='', type=str) 33 | parser.add_argument("--feat_type", default=1, type=int, 34 | help='feat_type=0: all id vector' 35 | 'feat_type=1: load feat from data_loader') 36 | args = parser.parse_args() 37 | 38 | return args 39 | -------------------------------------------------------------------------------- /NC/benchmark/methods/HetSANN/HetSANN_MRV/models/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /NC/benchmark/methods/HetSANN/HetSANN_MRV/models/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/methods/HetSANN/HetSANN_MRV/models/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /NC/benchmark/methods/HetSANN/HetSANN_MRV/models/__pycache__/base_gattn.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/methods/HetSANN/HetSANN_MRV/models/__pycache__/base_gattn.cpython-36.pyc -------------------------------------------------------------------------------- /NC/benchmark/methods/HetSANN/HetSANN_MRV/models/__pycache__/sp_hgat.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/methods/HetSANN/HetSANN_MRV/models/__pycache__/sp_hgat.cpython-36.pyc -------------------------------------------------------------------------------- /NC/benchmark/methods/HetSANN/HetSANN_MRV/models/sp_hgat.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | from utils import layers 5 | from models.base_gattn import BaseGAttN 6 | 7 | class SpHGAT(BaseGAttN): 8 | def inference(inputs, nb_classes, nb_nodes, training, attn_drop, ffd_drop, 9 | bias_mat, adj_type, edge_list, hid_units, n_heads, 10 | activation=tf.nn.elu, residual=False, target_nodes=[0]): 11 | attns = [] 12 | for _ in range(n_heads[0]): 13 | attns.append(layers.sp_hete_attn_head(inputs, 14 | adj_mat=bias_mat, adj_type=adj_type, edge_list=edge_list, 15 | out_sz=hid_units[0], activation=activation, nb_nodes=nb_nodes, 16 | in_drop=ffd_drop, coef_drop=attn_drop, residual=False)) 17 | h_1 = [tf.concat(attn, axis=-1) for attn in zip(*attns)] 18 | for i in range(1, len(hid_units)): 19 | h_old = h_1 20 | attns = [] 21 | head_act = activation 22 | is_residual = residual 23 | for _ in range(n_heads[i]): 24 | attns.append(layers.sp_hete_attn_head(h_1, 25 | adj_mat=bias_mat, adj_type=adj_type, edge_list=edge_list, 26 | out_sz=hid_units[i], activation=head_act, nb_nodes=nb_nodes, 27 | in_drop=ffd_drop, coef_drop=attn_drop, residual=is_residual)) 28 | h_1 = [tf.concat(attn, axis=-1) for attn in zip(*attns)] 29 | # here now we have the output embedding of multi-head attention 30 | logits = layers.full_connection(h_1, nb_classes, target_nodes, activation=lambda x:x, in_drop=ffd_drop, use_bias=True) 31 | return logits 32 | -------------------------------------------------------------------------------- /NC/benchmark/methods/HetSANN/HetSANN_MRV/utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /NC/benchmark/methods/HetSANN/HetSANN_MRV/utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/methods/HetSANN/HetSANN_MRV/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /NC/benchmark/methods/HetSANN/HetSANN_MRV/utils/__pycache__/layers.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/methods/HetSANN/HetSANN_MRV/utils/__pycache__/layers.cpython-36.pyc -------------------------------------------------------------------------------- /NC/benchmark/methods/HetSANN/HetSANN_MRV/utils/__pycache__/process.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/methods/HetSANN/HetSANN_MRV/utils/__pycache__/process.cpython-36.pyc -------------------------------------------------------------------------------- /NC/benchmark/methods/HetSANN/fig/attention.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/methods/HetSANN/fig/attention.png -------------------------------------------------------------------------------- /NC/benchmark/methods/HetSANN/fig/model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/methods/HetSANN/fig/model.png -------------------------------------------------------------------------------- /NC/benchmark/methods/MAGNN/README.md: -------------------------------------------------------------------------------- 1 | ## MAGNN for benchmark 2 | 3 | MAGNN for benchmark datasets. 4 | 5 | ``` 6 | python run_DBLP.py 7 | python run_ACM.py 8 | python run_IMDB_new.py 9 | ``` 10 | 11 | ## MAGNN 12 | 13 | This repository provides a reference implementation of MAGNN as described in the paper: 14 | > MAGNN: Metapath Aggregated Graph Neural Network for Heterogeneous Graph Embedding.
15 | > Xinyu Fu, Jiani Zhang, Ziqiao Meng, Irwin King.
16 | > The Web Conference, 2020. 17 | 18 | Available at [arXiv:2002.01680](https://arxiv.org/abs/2002.01680). 19 | 20 | ### Dependencies 21 | 22 | Recent versions of the following packages for Python 3 are required: 23 | * PyTorch 1.2.0 24 | * DGL 0.3.1 25 | * NetworkX 2.3 26 | * scikit-learn 0.21.3 27 | * NumPy 1.17.2 28 | * SciPy 1.3.1 29 | 30 | Dependencies for the preprocessing code are not listed here. 31 | 32 | ### Datasets 33 | 34 | The preprocessed datasets are available at: 35 | * IMDb - [Dropbox](https://www.dropbox.com/s/g0btk9ctr1es39x/IMDB_processed.zip?dl=0) 36 | * DBLP - [Dropbox](https://www.dropbox.com/s/yh4grpeks87ugr2/DBLP_processed.zip?dl=0) 37 | * Last.fm - [Dropbox](https://www.dropbox.com/s/jvlbs09pz6zwcka/LastFM_processed.zip?dl=0) 38 | 39 | The GloVe word vectors are obtained from [GloVe](https://nlp.stanford.edu/projects/glove/). Here is [the direct link](http://nlp.stanford.edu/data/glove.6B.zip) for the version we used in DBLP preprocessing. 40 | 41 | ### Usage 42 | 43 | 1. Create `checkpoint/` and `data/preprocessed` directories 44 | 2. Extract the zip file downloaded from the section above to `data/preprocessed` 45 | * E.g., extract the content of `IMDB_processed.zip` to `data/preprocessed/IMDB_processed` 46 | 2. Execute one of the following three commands from the project home directory: 47 | * `python run_IMDB.py` 48 | * `python run_DBLP.py` 49 | * `python run_LastFM.py` 50 | 51 | For more information about the available options of the model, you may check by executing `python run_IMDB.py --help` 52 | 53 | ### Citing 54 | 55 | If you find MAGNN useful in your research, please cite the following paper: 56 | 57 | @inproceedings{fu2020magnn, 58 | title={MAGNN: Metapath Aggregated Graph Neural Network for Heterogeneous Graph Embedding}, 59 | author={Xinyu Fu and Jiani Zhang and Ziqiao Meng and Irwin King}, 60 | booktitle = {WWW}, 61 | year={2020} 62 | } 63 | -------------------------------------------------------------------------------- /NC/benchmark/methods/MAGNN/model/__init__.py: -------------------------------------------------------------------------------- 1 | from model.MAGNN_nc import MAGNN_nc 2 | from model.MAGNN_nc_mb import MAGNN_nc_mb 3 | from model.MAGNN_lp import MAGNN_lp 4 | -------------------------------------------------------------------------------- /NC/benchmark/methods/MAGNN/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/methods/MAGNN/utils/__init__.py -------------------------------------------------------------------------------- /NC/benchmark/methods/MAGNN/utils/pytorchtools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | class EarlyStopping: 6 | """Early stops the training if validation loss doesn't improve after a given patience.""" 7 | def __init__(self, patience, verbose=False, delta=0, save_path='checkpoint.pt'): 8 | """ 9 | Args: 10 | patience (int): How long to wait after last time validation loss improved. 11 | Default: 7 12 | verbose (bool): If True, prints a message for each validation loss improvement. 13 | Default: False 14 | delta (float): Minimum change in the monitored quantity to qualify as an improvement. 15 | Default: 0 16 | """ 17 | self.patience = patience 18 | self.verbose = verbose 19 | self.counter = 0 20 | self.best_score = None 21 | self.early_stop = False 22 | self.val_loss_min = np.Inf 23 | self.delta = delta 24 | self.save_path = save_path 25 | 26 | def __call__(self, val_loss, model): 27 | 28 | score = -val_loss 29 | 30 | if self.best_score is None: 31 | self.best_score = score 32 | self.save_checkpoint(val_loss, model) 33 | elif score < self.best_score - self.delta: 34 | self.counter += 1 35 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 36 | if self.counter >= self.patience: 37 | self.early_stop = True 38 | else: 39 | self.best_score = score 40 | self.save_checkpoint(val_loss, model) 41 | self.counter = 0 42 | 43 | def save_checkpoint(self, val_loss, model): 44 | """Saves model when validation loss decrease.""" 45 | if self.verbose: 46 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') 47 | torch.save(model.state_dict(), self.save_path) 48 | self.val_loss_min = val_loss 49 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RGCN/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/methods/RGCN/scripts/__init__.py -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.0.0' 2 | 3 | __all__ = ['__version__'] 4 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .data import Data 2 | from .batch import Batch 3 | from .dataset import Dataset 4 | from .in_memory_dataset import InMemoryDataset 5 | from .dataloader import DataLoader, DenseDataLoader 6 | from .download import download_url 7 | from .extract import extract_tar, extract_zip, extract_gz 8 | 9 | __all__ = [ 10 | 'Data', 11 | 'Batch', 12 | 'Dataset', 13 | 'InMemoryDataset', 14 | 'DataLoader', 15 | 'DenseDataLoader', 16 | 'download_url', 17 | 'extract_tar', 18 | 'extract_zip', 19 | 'extract_gz', 20 | ] 21 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/data/batch.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch_geometric.data import Data 3 | 4 | 5 | class Batch(Data): 6 | def __init__(self, batch=None, **kwargs): 7 | super(Batch, self).__init__(**kwargs) 8 | self.batch = batch 9 | 10 | @staticmethod 11 | def from_data_list(data_list): 12 | """""" 13 | keys = [set(data.keys) for data in data_list] 14 | keys = list(set.union(*keys)) 15 | assert 'batch' not in keys 16 | 17 | batch = Batch() 18 | 19 | for key in keys: 20 | batch[key] = [] 21 | batch.batch = [] 22 | 23 | cumsum = 0 24 | for i, data in enumerate(data_list): 25 | num_nodes = data.num_nodes 26 | batch.batch.append(torch.full((num_nodes, ), i, dtype=torch.long)) 27 | for key in data.keys: 28 | item = data[key] 29 | item = item + cumsum if batch.cumsum(key, item) else item 30 | batch[key].append(item) 31 | cumsum += num_nodes 32 | 33 | for key in keys: 34 | batch[key] = torch.cat( 35 | batch[key], dim=data_list[0].cat_dim(key, batch[key][0])) 36 | batch.batch = torch.cat(batch.batch, dim=-1) 37 | return batch.contiguous() 38 | 39 | def cumsum(self, key, item): 40 | return item.dim() > 1 and item.dtype == torch.long 41 | 42 | @property 43 | def num_graphs(self): 44 | """""" 45 | return self.batch[-1].item() + 1 46 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/data/dataloader.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data 2 | from torch.utils.data.dataloader import default_collate 3 | 4 | from torch_geometric.data import Batch 5 | 6 | 7 | class DataLoader(torch.utils.data.DataLoader): 8 | def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs): 9 | super(DataLoader, self).__init__( 10 | dataset, 11 | batch_size, 12 | shuffle, 13 | collate_fn=lambda batch: Batch.from_data_list(batch), 14 | **kwargs) 15 | 16 | 17 | class DenseDataLoader(torch.utils.data.DataLoader): 18 | def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs): 19 | def dense_collate(data_list): 20 | batch = Batch() 21 | for key in data_list[0].keys: 22 | batch[key] = default_collate([d[key] for d in data_list]) 23 | return batch 24 | 25 | super(DenseDataLoader, self).__init__( 26 | dataset, batch_size, shuffle, collate_fn=dense_collate, **kwargs) 27 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/data/download.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os.path as osp 4 | from six.moves import urllib 5 | 6 | from .makedirs import makedirs 7 | 8 | 9 | def download_url(url, folder, log=True): 10 | if log: 11 | print('Downloading', url) 12 | 13 | makedirs(folder) 14 | 15 | data = urllib.request.urlopen(url) 16 | filename = url.rpartition('/')[2] 17 | path = osp.join(folder, filename) 18 | 19 | with open(path, 'wb') as f: 20 | f.write(data.read()) 21 | 22 | return path 23 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/data/extract.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os.path as osp 4 | import tarfile 5 | import zipfile 6 | import gzip 7 | import shutil 8 | 9 | 10 | def maybe_log(path, log=True): 11 | if log: 12 | print('Extracting', path) 13 | 14 | 15 | def extract_tar(path, folder, mode='r:gz', log=True): 16 | maybe_log(path, log) 17 | with tarfile.open(path, mode) as f: 18 | f.extractall(folder) 19 | 20 | 21 | def extract_zip(path, folder, log=True): 22 | maybe_log(path, log) 23 | with zipfile.ZipFile(path, 'r') as f: 24 | f.extractall(folder) 25 | 26 | 27 | def extract_gz(path, folder, name, log=True): 28 | maybe_log(path, log) 29 | with gzip.open(path, 'rb') as f_in: 30 | with open(osp.join(folder, name), 'wb') as f_out: 31 | shutil.copyfileobj(f_in, f_out) 32 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/data/makedirs.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import errno 4 | 5 | 6 | def makedirs(path): 7 | try: 8 | os.makedirs(osp.expanduser(osp.normpath(path))) 9 | except OSError as e: 10 | if e.errno != errno.EEXIST and osp.isdir(path): 11 | raise e 12 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .entities import Entities 2 | 3 | 4 | __all__ = [ 5 | 'Entities' 6 | ] 7 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/nn/__init__.py: -------------------------------------------------------------------------------- 1 | from .conv import * # noqa 2 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/nn/conv/__init__.py: -------------------------------------------------------------------------------- 1 | from .message_passing import MessagePassing 2 | from .nn_conv import NNConv 3 | from .relation_conv import RelationConv 4 | __all__ = [ 5 | 'MessagePassing', 6 | 'NNConv', 7 | 'RelationConv', 8 | ] 9 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/nn/conv/message_passing.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | 3 | import torch 4 | from torch_geometric.utils import scatter_ 5 | 6 | 7 | class MessagePassing(torch.nn.Module): 8 | def __init__(self, aggr='add'): 9 | super(MessagePassing, self).__init__() 10 | 11 | self.message_args = inspect.getargspec(self.message)[0][1:] 12 | self.update_args = inspect.getargspec(self.update)[0][2:] 13 | 14 | def propagate(self, aggr, edge_index, **kwargs): 15 | assert aggr in ['add', 'mean', 'max'] 16 | kwargs['edge_index'] = edge_index 17 | 18 | size = None 19 | message_args = [] 20 | for arg in self.message_args: 21 | if arg[-2:] == '_i': 22 | tmp = kwargs[arg[:-2]] 23 | size = tmp.size(0) 24 | message_args.append(tmp[edge_index[0]]) 25 | elif arg[-2:] == '_j': 26 | tmp = kwargs[arg[:-2]] 27 | size = tmp.size(0) 28 | message_args.append(tmp[edge_index[1]]) 29 | else: 30 | message_args.append(kwargs[arg]) 31 | 32 | update_args = [kwargs[arg] for arg in self.update_args] 33 | 34 | out = self.message(*message_args) 35 | out = scatter_(aggr, out, edge_index[0], dim_size=size) 36 | out = self.update(out, *update_args) 37 | 38 | return out 39 | 40 | def message(self, x_j): # pragma: no cover 41 | return x_j 42 | 43 | def update(self, aggr_out): # pragma: no cover 44 | return aggr_out 45 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/nn/conv/nn_conv.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import Parameter 3 | from torch_geometric.nn.conv import MessagePassing 4 | 5 | from ..inits import reset, uniform 6 | 7 | 8 | class NNConv(MessagePassing): 9 | def __init__(self, 10 | in_channels, 11 | out_channels, 12 | nn, 13 | aggr="add", 14 | root_weight=False, 15 | bias=False): 16 | super(NNConv, self).__init__() 17 | 18 | self.in_channels = in_channels 19 | self.out_channels = out_channels 20 | self.nn = nn 21 | self.aggr = aggr 22 | self.weight = Parameter(torch.Tensor(in_channels, out_channels)) 23 | 24 | if root_weight: 25 | self.root = Parameter(torch.Tensor(in_channels, out_channels)) 26 | else: 27 | self.register_parameter('root', None) 28 | 29 | if bias: 30 | self.bias = Parameter(torch.Tensor(out_channels)) 31 | else: 32 | self.register_parameter('bias', None) 33 | 34 | self.reset_parameters() 35 | 36 | def reset_parameters(self): 37 | reset(self.nn) 38 | size = self.in_channels 39 | uniform(size, self.weight) 40 | uniform(size, self.root) 41 | uniform(size, self.bias) 42 | 43 | def forward(self, x, edge_index, pseudo): 44 | x = x.unsqueeze(-1) if x.dim() == 1 else x 45 | edge_weight = pseudo.unsqueeze(-1) if pseudo.dim() == 1 else pseudo 46 | edge_weight = self.nn(edge_weight).view(-1, self.out_channels) 47 | 48 | x = torch.matmul(x, self.weight) 49 | return self.propagate(self.aggr, edge_index, x=x, edge_weight=edge_weight) 50 | 51 | 52 | def message(self, x_j, edge_weight): 53 | message = x_j - edge_weight 54 | return message 55 | 56 | def update(self, aggr_out, x): 57 | if self.bias is not None: 58 | aggr_out = aggr_out + self.bias 59 | return aggr_out + x 60 | 61 | def __repr__(self): 62 | return '{}({}, {})'.format(self.__class__.__name__, self.in_channels, 63 | self.out_channels) 64 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/nn/conv/relation_conv.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import Parameter 3 | import torch.nn.functional as F 4 | from torch_sparse import spmm 5 | from torch_geometric.utils import remove_self_loops, add_self_loops, softmax, add_self_edge_attr_loops 6 | 7 | 8 | class RelationConv(torch.nn.Module): 9 | 10 | def __init__(self, eps=0, train_eps=False, requires_grad=True): 11 | super(RelationConv, self).__init__() 12 | 13 | self.initial_eps = eps 14 | 15 | if train_eps: 16 | self.eps = torch.nn.Parameter(torch.Tensor([eps])) 17 | else: 18 | self.register_buffer('eps', torch.Tensor([eps])) 19 | 20 | '''beta''' 21 | self.requires_grad = requires_grad 22 | if requires_grad: 23 | self.beta = Parameter(torch.Tensor(1)) 24 | else: 25 | self.register_buffer('beta', torch.ones(1)) 26 | 27 | self.reset_parameters() 28 | 29 | def reset_parameters(self): 30 | self.eps.data.fill_(self.initial_eps) 31 | if self.requires_grad: 32 | self.beta.data.fill_(1) 33 | 34 | def forward(self, x, edge_index, edge_attr): 35 | """""" 36 | x = x.unsqueeze(-1) if x.dim() == 1 else x 37 | edge_index, edge_attr = remove_self_loops(edge_index, edge_attr) 38 | row, col = edge_index 39 | 40 | '''co-occurrence rate''' 41 | for i in range(len(x)): 42 | mask = torch.eq(row, i) 43 | edge_attr[mask] = F.normalize(edge_attr[mask], p=2, dim=0) 44 | 45 | '''add-self-loops''' 46 | edge_index = add_self_loops(edge_index, x.size(0)) 47 | row, col = edge_index 48 | edge_attr = add_self_edge_attr_loops(edge_attr, x.size(0)) 49 | 50 | x = F.normalize(x, p=2, dim=-1) 51 | beta = self.beta if self.requires_grad else self._buffers['beta'] 52 | alpha = beta * edge_attr 53 | alpha = softmax(alpha, row, num_nodes=x.size(0)) 54 | 55 | '''Perform the propagation.''' 56 | out = spmm(edge_index, alpha, x.size(0), x.size(1), x) 57 | out = (1 + self.eps) * x + out 58 | return out 59 | 60 | def __repr__(self): 61 | return '{}()'.format(self.__class__.__name__) 62 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/nn/inits.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | 4 | def uniform(size, tensor): 5 | stdv = 1.0 / math.sqrt(size) 6 | if tensor is not None: 7 | tensor.data.uniform_(-stdv, stdv) 8 | 9 | 10 | def glorot(tensor): 11 | stdv = math.sqrt(6.0 / (tensor.size(0) + tensor.size(1))) 12 | if tensor is not None: 13 | tensor.data.uniform_(-stdv, stdv) 14 | 15 | 16 | def zeros(tensor): 17 | if tensor is not None: 18 | tensor.data.fill_(0) 19 | 20 | 21 | def ones(tensor): 22 | if tensor is not None: 23 | tensor.data.fill_(1) 24 | 25 | 26 | def reset(nn): 27 | def _reset(item): 28 | if hasattr(item, 'reset_parameters'): 29 | item.reset_parameters() 30 | 31 | if nn is not None: 32 | if hasattr(nn, 'children') and len(list(nn.children())) > 0: 33 | for item in nn.children(): 34 | _reset(item) 35 | else: 36 | _reset(nn) 37 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .degree import degree 2 | from .scatter import scatter_ 3 | from .softmax import softmax 4 | from .undirected import is_undirected, to_undirected 5 | from .isolated import contains_isolated_nodes 6 | from .loop import contains_self_loops, remove_self_loops, add_self_loops, add_self_edge_attr_loops 7 | from .one_hot import one_hot 8 | from .grid import grid 9 | from .normalized_cut import normalized_cut 10 | from .sparse import dense_to_sparse, sparse_to_dense 11 | from .to_batch import to_batch 12 | from .convert import to_scipy_sparse_matrix, to_networkx 13 | from .metric import (accuracy, true_positive, true_negative, false_positive, 14 | false_negative, precision, recall, f1_score) 15 | 16 | __all__ = [ 17 | 'degree', 18 | 'scatter_', 19 | 'softmax', 20 | 'is_undirected', 21 | 'to_undirected', 22 | 'contains_self_loops', 23 | 'remove_self_loops', 24 | 'add_self_loops', 25 | 'add_self_edge_attr_loops', 26 | 'contains_isolated_nodes', 27 | 'one_hot', 28 | 'grid', 29 | 'normalized_cut', 30 | 'dense_to_sparse', 31 | 'sparse_to_dense', 32 | 'to_batch', 33 | 'to_scipy_sparse_matrix', 34 | 'to_networkx', 35 | 'accuracy', 36 | 'true_positive', 37 | 'true_negative', 38 | 'false_positive', 39 | 'false_negative', 40 | 'precision', 41 | 'recall', 42 | 'f1_score', 43 | ] 44 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/utils/convert.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import scipy.sparse 3 | import networkx as nx 4 | 5 | from .num_nodes import maybe_num_nodes 6 | 7 | 8 | def to_scipy_sparse_matrix(edge_index, edge_attr=None, num_nodes=None): 9 | row, col = edge_index.cpu() 10 | 11 | if edge_attr is None: 12 | edge_attr = torch.ones(row.size(0)) 13 | else: 14 | edge_attr = edge_attr.view(-1).cpu() 15 | assert edge_attr.size(0) == row.size(0) 16 | 17 | N = maybe_num_nodes(edge_index, num_nodes) 18 | out = scipy.sparse.coo_matrix((edge_attr, (row, col)), (N, N)) 19 | return out 20 | 21 | 22 | def to_networkx(edge_index, x=None, edge_attr=None, pos=None, num_nodes=None): 23 | num_nodes = num_nodes if x is None else x.size(0) 24 | num_nodes = num_nodes if pos is None else pos.size(0) 25 | num_nodes = maybe_num_nodes(edge_index, num_nodes) 26 | 27 | G = nx.DiGraph() 28 | 29 | for i in range(num_nodes): 30 | G.add_node(i) 31 | if x is not None: 32 | G.nodes[i]['x'] = x[i].cpu().numpy() 33 | if pos is not None: 34 | G.nodes[i]['pos'] = pos[i].cpu().numpy() 35 | 36 | for i in range(edge_index.size(1)): 37 | source, target = edge_index[0][i].item(), edge_index[1][i].item() 38 | G.add_edge(source, target) 39 | if edge_attr is not None: 40 | if edge_attr.numel() == edge_attr.size(0): 41 | G[source][target]['weight'] = edge_attr[i].item() 42 | else: 43 | G[source][target]['weight'] = edge_attr[i].cpu().numpy() 44 | 45 | return G 46 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/utils/degree.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .num_nodes import maybe_num_nodes 4 | 5 | 6 | def degree(index, num_nodes=None, dtype=None): 7 | """Computes the degree of a given index tensor. 8 | 9 | Args: 10 | index (LongTensor): Source or target indices of edges. 11 | num_nodes (int, optional): The number of nodes in :attr:`index`. 12 | (default: :obj:`None`) 13 | dtype (:obj:`torch.dtype`, optional): The desired data type of the 14 | returned tensor. 15 | 16 | :rtype: :class:`Tensor` 17 | 18 | .. testsetup:: 19 | 20 | import torch 21 | 22 | .. testcode:: 23 | 24 | from torch_geometric.utils import degree 25 | index = torch.tensor([0, 1, 0, 2, 0]) 26 | out = degree(index) 27 | """ 28 | 29 | num_nodes = maybe_num_nodes(index, num_nodes) 30 | out = torch.zeros((num_nodes), dtype=dtype, device=index.device) 31 | return out.scatter_add_(0, index, out.new_ones((index.size(0)))) 32 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/utils/grid.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch_sparse import coalesce 3 | 4 | 5 | def grid(height, width, dtype=None, device=None): 6 | edge_index = grid_index(height, width, device) 7 | pos = grid_pos(height, width, dtype, device) 8 | return edge_index, pos 9 | 10 | 11 | def grid_index(height, width, device=None): 12 | w = width 13 | kernel = [-w - 1, -1, w - 1, -w, 0, w, -w + 1, 1, w + 1] 14 | kernel = torch.tensor(kernel, device=device) 15 | 16 | row = torch.arange(height * width, dtype=torch.long, device=device) 17 | row = row.view(-1, 1).repeat(1, kernel.size(0)) 18 | col = row + kernel.view(1, -1) 19 | row, col = row.view(height, -1), col.view(height, -1) 20 | index = torch.arange(3, row.size(1) - 3, dtype=torch.long, device=device) 21 | row, col = row[:, index].view(-1), col[:, index].view(-1) 22 | 23 | mask = (col >= 0) & (col < height * width) 24 | row, col = row[mask], col[mask] 25 | 26 | edge_index = torch.stack([row, col], dim=0) 27 | edge_index, _ = coalesce(edge_index, None, height * width, height * width) 28 | 29 | return edge_index 30 | 31 | 32 | def grid_pos(height, width, dtype=None, device=None): 33 | x = torch.arange(width, dtype=dtype, device=device) 34 | y = (height - 1) - torch.arange(height, dtype=dtype, device=device) 35 | 36 | x = x.repeat(height) 37 | y = y.unsqueeze(-1).repeat(1, width).view(-1) 38 | 39 | return torch.stack([x, y], dim=-1) 40 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/utils/isolated.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .num_nodes import maybe_num_nodes 4 | from .loop import remove_self_loops 5 | 6 | 7 | def contains_isolated_nodes(edge_index, num_nodes=None): 8 | num_nodes = maybe_num_nodes(edge_index, num_nodes) 9 | (row, _), _ = remove_self_loops(edge_index) 10 | return torch.unique(row).size(0) < num_nodes 11 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/utils/loop.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .num_nodes import maybe_num_nodes 4 | 5 | 6 | def contains_self_loops(edge_index): 7 | row, col = edge_index 8 | mask = row == col 9 | return mask.sum().item() > 0 10 | 11 | 12 | def remove_self_loops(edge_index, edge_attr=None): 13 | row, col = edge_index 14 | mask = row != col 15 | edge_attr = edge_attr if edge_attr is None else edge_attr[mask] 16 | mask = mask.unsqueeze(0).expand_as(edge_index) 17 | edge_index = edge_index[mask].view(2, -1) 18 | 19 | return edge_index, edge_attr 20 | 21 | 22 | def add_self_loops(edge_index, num_nodes=None): 23 | num_nodes = maybe_num_nodes(edge_index, num_nodes) 24 | 25 | dtype, device = edge_index.dtype, edge_index.device 26 | loop = torch.arange(0, num_nodes, dtype=dtype, device=device) 27 | loop = loop.unsqueeze(0).repeat(2, 1) 28 | edge_index = torch.cat([edge_index, loop], dim=1) 29 | 30 | return edge_index 31 | 32 | def add_self_edge_attr_loops(edge_attr, num_nodes=None): 33 | dtype, device = edge_attr.dtype, edge_attr.device 34 | loop = torch.ones(num_nodes, dtype=dtype, device=device) 35 | edge_attr = torch.cat([edge_attr, loop], dim=0) 36 | 37 | return edge_attr -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/utils/metric.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import torch 4 | 5 | 6 | def accuracy(pred, target): 7 | return (pred == target).sum().item() / target.numel() 8 | 9 | 10 | def true_positive(pred, target, num_classes): 11 | out = [] 12 | for i in range(num_classes): 13 | out.append(((pred == i) & (target == i)).sum()) 14 | 15 | return torch.tensor(out) 16 | 17 | 18 | def true_negative(pred, target, num_classes): 19 | out = [] 20 | for i in range(num_classes): 21 | out.append(((pred != i) & (target != i)).sum()) 22 | 23 | return torch.tensor(out) 24 | 25 | 26 | def false_positive(pred, target, num_classes): 27 | out = [] 28 | for i in range(num_classes): 29 | out.append(((pred == i) & (target != i)).sum()) 30 | 31 | return torch.tensor(out) 32 | 33 | 34 | def false_negative(pred, target, num_classes): 35 | out = [] 36 | for i in range(num_classes): 37 | out.append(((pred != i) & (target == i)).sum()) 38 | 39 | return torch.tensor(out) 40 | 41 | 42 | def precision(pred, target, num_classes): 43 | tp = true_positive(pred, target, num_classes).to(torch.float) 44 | fp = false_positive(pred, target, num_classes).to(torch.float) 45 | 46 | out = tp / (tp + fp) 47 | out[torch.isnan(out)] = 0 48 | 49 | return out 50 | 51 | 52 | def recall(pred, target, num_classes): 53 | tp = true_positive(pred, target, num_classes).to(torch.float) 54 | fn = false_negative(pred, target, num_classes).to(torch.float) 55 | 56 | out = tp / (tp + fn) 57 | out[torch.isnan(out)] = 0 58 | 59 | return out 60 | 61 | 62 | def f1_score(pred, target, num_classes): 63 | prec = precision(pred, target, num_classes) 64 | rec = recall(pred, target, num_classes) 65 | 66 | score = 2 * (prec * rec) / (prec + rec) 67 | score[torch.isnan(score)] = 0 68 | 69 | return score 70 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/utils/normalized_cut.py: -------------------------------------------------------------------------------- 1 | from torch_geometric.utils import degree 2 | 3 | 4 | def normalized_cut(edge_index, edge_attr, num_nodes=None): 5 | row, col = edge_index 6 | deg = 1 / degree(row, num_nodes, edge_attr.dtype) 7 | deg = deg[row] + deg[col] 8 | cut = edge_attr * deg 9 | return cut 10 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/utils/num_nodes.py: -------------------------------------------------------------------------------- 1 | def maybe_num_nodes(edge_index, num_nodes=None): 2 | return edge_index.max().item() + 1 if num_nodes is None else num_nodes 3 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/utils/one_hot.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .repeat import repeat 4 | 5 | 6 | def one_hot(src, num_classes=None, dtype=None): 7 | src = src.to(torch.long) 8 | src = src.unsqueeze(-1) if src.dim() == 1 else src 9 | assert src.dim() == 2 10 | 11 | if num_classes is None: 12 | num_classes = src.max(dim=0)[0] + 1 13 | else: 14 | num_classes = torch.tensor( 15 | repeat(num_classes, length=src.size(1)), 16 | dtype=torch.long, 17 | device=src.device) 18 | 19 | if src.size(1) > 1: 20 | zero = torch.tensor([0], device=src.device) 21 | src = src + torch.cat([zero, torch.cumsum(num_classes, 0)[:-1]]) 22 | 23 | size = src.size(0), num_classes.sum() 24 | out = torch.zeros(size, dtype=dtype, device=src.device) 25 | out.scatter_(1, src, 1) 26 | return out 27 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/utils/repeat.py: -------------------------------------------------------------------------------- 1 | import numbers 2 | import itertools 3 | 4 | 5 | def repeat(src, length): 6 | if src is None: 7 | return None 8 | if isinstance(src, numbers.Number): 9 | return list(itertools.repeat(src, length)) 10 | if (len(src) > length): 11 | return src[:length] 12 | if (len(src) < length): 13 | return src + list(itertools.repeat(src[-1], length - len(src))) 14 | return src 15 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/utils/scatter.py: -------------------------------------------------------------------------------- 1 | import torch_scatter 2 | 3 | 4 | def scatter_(name, src, index, dim_size=None): 5 | r"""Aggregates all values from the :attr:`src` tensor at the indices 6 | specified in the :attr:`index` tensor along the first dimension. 7 | If multiple indices reference the same location, their contributions 8 | are aggregated according to :attr:`name` (either :obj:`"add"`, 9 | :obj:`"mean"` or :obj:`"max"`). 10 | 11 | Args: 12 | name (string): The aggregation to use (:obj:`"add"`, :obj:`"mean"`, 13 | :obj:`"max"`). 14 | src (Tensor): The source tensor. 15 | index (LongTensor): The indices of elements to scatter. 16 | dim_size (int, optional): Automatically create output tensor with size 17 | :attr:`dim_size` in the first dimension. If set to :attr:`None`, a 18 | minimal sized output tensor is returned. (default: :obj:`None`) 19 | 20 | :rtype: :class:`Tensor` 21 | 22 | .. testsetup:: 23 | 24 | import torch 25 | 26 | .. testcode:: 27 | 28 | from torch_geometric.utils import scatter_ 29 | src = torch.Tensor([2, 3, -2, 1, 1]) 30 | index = torch.tensor([0, 1, 0, 1, 2]) 31 | out = scatter_("add", src, index) 32 | """ 33 | 34 | assert name in ['add', 'mean']#, 'max'] 35 | 36 | op = getattr(torch_scatter, 'scatter_{}'.format(name)) 37 | # fill_value = -1e38 if name is 'max' else 0 38 | 39 | out = op(src, index, 0, None, dim_size)#, fill_value) 40 | if isinstance(out, tuple): 41 | out = out[0] 42 | 43 | # if name is 'max': 44 | # out[out == fill_value] = 0 45 | 46 | return out 47 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/utils/softmax.py: -------------------------------------------------------------------------------- 1 | from torch_scatter import scatter_max, scatter_add 2 | 3 | from .num_nodes import maybe_num_nodes 4 | 5 | 6 | def softmax(src, index, num_nodes=None): 7 | r"""Sparse softmax of all values from the :attr:`src` tensor at the indices 8 | specified in the :attr:`index` tensor along the first dimension. 9 | 10 | Args: 11 | src (Tensor): The source tensor. 12 | index (LongTensor): The indices of elements for applying the softmax. 13 | num_nodes (int, optional): Automatically create output tensor with size 14 | :attr:`num_nodes` in the first dimension. If set to :attr:`None`, a 15 | minimal sized output tensor is returned. (default: :obj:`None`) 16 | 17 | :rtype: :class:`Tensor` 18 | 19 | .. testsetup:: 20 | 21 | import torch 22 | 23 | .. testcode:: 24 | 25 | from torch_geometric.utils import softmax 26 | src = torch.Tensor([2, 3, -2, 1, 1]) 27 | index = torch.tensor([0, 1, 0, 1, 2]) 28 | out = softmax(src, index) 29 | """ 30 | 31 | num_nodes = maybe_num_nodes(index, num_nodes) 32 | 33 | out = src - scatter_max(src, index, dim=0, dim_size=num_nodes)[0][index] 34 | out = out.exp() 35 | out = out / scatter_add(out, index, dim=0, dim_size=num_nodes)[index] 36 | 37 | return out 38 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/utils/sparse.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch_sparse import coalesce 3 | 4 | from .num_nodes import maybe_num_nodes 5 | 6 | 7 | def dense_to_sparse(tensor): 8 | index = tensor.nonzero().t().contiguous() 9 | value = tensor[index[0], index[1]] 10 | index, value = coalesce(index, value, tensor.size(0), tensor.size(1)) 11 | return index, value 12 | 13 | 14 | def sparse_to_dense(edge_index, edge_attr, num_nodes=None): 15 | N = maybe_num_nodes(edge_index, num_nodes) 16 | 17 | adj = torch.sparse_coo_tensor(edge_index, edge_attr, torch.Size([N, N])) 18 | return adj.to_dense() 19 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/utils/to_batch.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch_scatter import scatter_add 3 | 4 | 5 | def to_batch(x, batch, fill_value=0): 6 | num_nodes = scatter_add(batch.new_ones(x.size(0)), batch, dim=0) 7 | batch_size, max_num_nodes = num_nodes.size(0), num_nodes.max().item() 8 | cum_nodes = torch.cat([batch.new_zeros(1), num_nodes.cumsum(dim=0)], dim=0) 9 | 10 | index = torch.arange(batch.size(0), dtype=torch.long, device=x.device) 11 | index = (index - cum_nodes[batch]) + (batch * max_num_nodes) 12 | 13 | size = [batch_size * max_num_nodes] + list(x.size())[1:] 14 | batch_x = x.new_full(size, fill_value) 15 | batch_x[index] = x 16 | size = [batch_size, max_num_nodes] + list(x.size())[1:] 17 | batch_x = batch_x.view(size) 18 | 19 | return batch_x, num_nodes 20 | -------------------------------------------------------------------------------- /NC/benchmark/methods/RSHN/torch_geometric/utils/undirected.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch_sparse import coalesce 3 | 4 | from .num_nodes import maybe_num_nodes 5 | 6 | 7 | def is_undirected(edge_index, num_nodes=None): 8 | num_nodes = maybe_num_nodes(edge_index, num_nodes) 9 | edge_index, _ = coalesce(edge_index, None, num_nodes, num_nodes) 10 | undirected_edge_index = to_undirected(edge_index, num_nodes=num_nodes) 11 | return edge_index.size(1) == undirected_edge_index.size(1) 12 | 13 | 14 | def to_undirected(edge_index, num_nodes=None): 15 | num_nodes = maybe_num_nodes(edge_index, num_nodes) 16 | 17 | row, col = edge_index 18 | row, col = torch.cat([row, col], dim=0), torch.cat([col, row], dim=0) 19 | edge_index = torch.stack([row, col], dim=0) 20 | edge_index, _ = coalesce(edge_index, None, num_nodes, num_nodes) 21 | 22 | return edge_index 23 | -------------------------------------------------------------------------------- /NC/benchmark/methods/baseline/README.md: -------------------------------------------------------------------------------- 1 | # Simple-HGN for HGB 2 | 3 | For message passing with relation attention version: 4 | 5 | ``` 6 | python run_new.py --dataset DBLP 7 | python run_new.py --dataset ACM --feats-type 2 8 | python run_multi.py --dataset IMDB --feats-type 0 9 | python run_new.py --dataset Freebase 10 | ``` 11 | 12 | ## running environment 13 | 14 | * torch 1.6.0 cuda 10.1 15 | * dgl 0.4.3 cuda 10.1 16 | * networkx 2.3 17 | * scikit-learn 0.23.2 18 | * scipy 1.5.2 19 | -------------------------------------------------------------------------------- /NC/benchmark/methods/baseline/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/methods/baseline/utils/__init__.py -------------------------------------------------------------------------------- /NC/benchmark/methods/baseline/utils/data.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import numpy as np 3 | import scipy 4 | import pickle 5 | import scipy.sparse as sp 6 | 7 | def load_data(prefix='DBLP'): 8 | from scripts.data_loader import data_loader 9 | dl = data_loader('../../data/'+prefix) 10 | features = [] 11 | for i in range(len(dl.nodes['count'])): 12 | th = dl.nodes['attr'][i] 13 | if th is None: 14 | features.append(sp.eye(dl.nodes['count'][i])) 15 | else: 16 | features.append(th) 17 | adjM = sum(dl.links['data'].values()) 18 | labels = np.zeros((dl.nodes['count'][0], dl.labels_train['num_classes']), dtype=int) 19 | val_ratio = 0.2 20 | train_idx = np.nonzero(dl.labels_train['mask'])[0] 21 | np.random.shuffle(train_idx) 22 | split = int(train_idx.shape[0]*val_ratio) 23 | val_idx = train_idx[:split] 24 | train_idx = train_idx[split:] 25 | train_idx = np.sort(train_idx) 26 | val_idx = np.sort(val_idx) 27 | test_idx = np.nonzero(dl.labels_test['mask'])[0] 28 | labels[train_idx] = dl.labels_train['data'][train_idx] 29 | labels[val_idx] = dl.labels_train['data'][val_idx] 30 | if prefix != 'IMDB': 31 | labels = labels.argmax(axis=1) 32 | train_val_test_idx = {} 33 | train_val_test_idx['train_idx'] = train_idx 34 | train_val_test_idx['val_idx'] = val_idx 35 | train_val_test_idx['test_idx'] = test_idx 36 | return features,\ 37 | adjM, \ 38 | labels,\ 39 | train_val_test_idx,\ 40 | dl 41 | -------------------------------------------------------------------------------- /NC/benchmark/methods/baseline/utils/pytorchtools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | class EarlyStopping: 6 | """Early stops the training if validation loss doesn't improve after a given patience.""" 7 | def __init__(self, patience, verbose=False, delta=0, save_path='checkpoint.pt'): 8 | """ 9 | Args: 10 | patience (int): How long to wait after last time validation loss improved. 11 | Default: 7 12 | verbose (bool): If True, prints a message for each validation loss improvement. 13 | Default: False 14 | delta (float): Minimum change in the monitored quantity to qualify as an improvement. 15 | Default: 0 16 | """ 17 | self.patience = patience 18 | self.verbose = verbose 19 | self.counter = 0 20 | self.best_score = None 21 | self.early_stop = False 22 | self.val_loss_min = np.Inf 23 | self.delta = delta 24 | self.save_path = save_path 25 | 26 | def __call__(self, val_loss, model): 27 | 28 | score = -val_loss 29 | 30 | if self.best_score is None: 31 | self.best_score = score 32 | self.save_checkpoint(val_loss, model) 33 | elif score < self.best_score - self.delta: 34 | self.counter += 1 35 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 36 | if self.counter >= self.patience: 37 | self.early_stop = True 38 | else: 39 | self.best_score = score 40 | self.save_checkpoint(val_loss, model) 41 | self.counter = 0 42 | 43 | def save_checkpoint(self, val_loss, model): 44 | """Saves model when validation loss decrease.""" 45 | if self.verbose: 46 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') 47 | torch.save(model.state_dict(), self.save_path) 48 | self.val_loss_min = val_loss 49 | -------------------------------------------------------------------------------- /NC/benchmark/scripts/README.md: -------------------------------------------------------------------------------- 1 | ## Evaluate micro_F1 and macro_F1 with prediction files. 2 | ```bash 3 | python NC_F1.py --pred_zip nc.zip --log out.log 4 | ``` -------------------------------------------------------------------------------- /NC/benchmark/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/NC/benchmark/scripts/__init__.py -------------------------------------------------------------------------------- /NC/benchmark/test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scripts.data_loader import data_loader 3 | 4 | dl = data_loader('./data/DBLP') 5 | print(dl.nodes) 6 | print(dl.links) 7 | print(dl.labels_train) 8 | print(dl.labels_train['data'][dl.labels_train['mask']]) 9 | pred = dl.labels_test['data'][dl.labels_test['mask']] 10 | print(dl.evaluate(pred)) 11 | train_idx = np.nonzero(dl.labels_train['mask'])[0] 12 | test_idx = np.nonzero(dl.labels_test['mask'])[0] 13 | print(train_idx) 14 | print(train_idx.shape) 15 | print(test_idx) 16 | print(test_idx.shape) 17 | 18 | meta = [(0, 1), (1, 0)] 19 | print(dl.get_meta_path(meta)) 20 | print(dl.get_full_meta_path(meta)[0]) 21 | -------------------------------------------------------------------------------- /Recom/KGAT/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Xiang Wang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Recom/KGAT/Log/README.md: -------------------------------------------------------------------------------- 1 | # Training Log of KGAT 2 | 3 | To demonstrate the **reproducibility of our best performance** and **faciliate the researchers to track their own trainings**, here I offer the training log for KGAT on three datasets, recording the changes of `loss` and four evaluation metrics: `recall@k`, `precision@k`, `hit@k`, and `ndcg@k`, every 10 epochs. 4 | 5 | ## Record Example 6 | One record example is shown as follows: 7 | ``` 8 | Epoch 19 [238.5s + 104.4s]: train==[30.47060=16.64901 + 13.32211 + 0.49948], recall=[0.06590, 0.19621], precision=[0.01598, 0.00987], hit=[0.25201, 0.55005], ndcg=[0.08068, 0.15693] 9 | save the weights in path: weights/yelp2018/kgat_si_sum_bi_l3/64-32-16/l0.0001_r1e-05-1e-05 10 | ``` 11 | where: 12 | * `[238.5s + 104.4s]` shows the time cost for one training and one testing, respectively; 13 | * `train==[30.47060=16.64901 + 13.32211 + 0.49948]` records the loss of BPR loss for recommendation, BPR loss for knowledge graph embedding, and L2 regularization terms for both previous phases; 14 | * `recall=[0.06590, 0.19621]` illustrates the `recall@20` and `recall@100` scores; analogously for precision, hit, and ndcg scores. 15 | 16 | The final perfroamnce is shown as: 17 | ``` 18 | Best Iter=[95]@[166677.6] recall=[0.08820 0.12068 0.14174 0.15773 0.17082], precision=[0.03521 0.02661 0.02217 0.01939 0.01741], hit=[0.36111 0.46397 0.52597 0.56730 0.59930], ndcg=[0.13660 0.16881 0.19020 0.20689 0.22085] 19 | ``` 20 | where: 21 | * `Best Iter=[95]@[166677.6]` shows that the best performance appears at the `95*10` epoch (since we show the training log every 10 epochs); 22 | * `recall=[0.08820 0.12068 0.14174 0.15773 0.17082]` displays the final performance w.r.t. `recall@k`, where `k` spans from `20`, `40`, `60`, `80` to `100`. 23 | 24 | ## Some Points 25 | Here I would like to clarify some points: 26 | * The training and testing time costs might be different based on the running machines. 27 | * The training loss might be slightly different due to different random seeds. 28 | * Please note that, while setting the hyperparameters `--Ks` as `[20,40,60,80,100]`, here we only show the results with k of `20` and `100`, due to the limited space; when the training is finished, you will obtain the scores with all setting `Ks`. 29 | 30 | -------------------------------------------------------------------------------- /Recom/KGAT/Model/README.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Recom/KGAT/Model/utility/README.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Recom/KGAT/Model/utility/helper.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Aug 19, 2016 3 | @author: Xiang Wang (xiangwang@u.nus.edu) 4 | ''' 5 | __author__ = "xiangwang" 6 | import os 7 | import re 8 | 9 | def txt2list(file_src): 10 | orig_file = open(file_src, "r") 11 | lines = orig_file.readlines() 12 | return lines 13 | 14 | 15 | def ensureDir(dir_path): 16 | d = os.path.dirname(dir_path) 17 | if not os.path.exists(d): 18 | os.makedirs(d) 19 | 20 | 21 | def uni2str(unicode_str): 22 | return str(unicode_str.encode('ascii', 'ignore')).replace('\n', '').strip() 23 | 24 | 25 | def hasNumbers(inputString): 26 | return bool(re.search(r'\d', inputString)) 27 | 28 | def delMultiChar(inputString, chars): 29 | for ch in chars: 30 | inputString = inputString.replace(ch, '') 31 | return inputString 32 | 33 | def merge_two_dicts(x, y): 34 | z = x.copy() # start with x's keys and values 35 | z.update(y) # modifies z with y's keys and values & returns None 36 | return z 37 | 38 | def early_stopping(log_value, best_value, stopping_step, expected_order='acc', flag_step=100): 39 | # early stopping strategy: 40 | assert expected_order in ['acc', 'dec'] 41 | 42 | if (expected_order == 'acc' and log_value >= best_value) or (expected_order == 'dec' and log_value <= best_value): 43 | stopping_step = 0 44 | best_value = log_value 45 | else: 46 | stopping_step += 1 47 | 48 | if stopping_step >= flag_step: 49 | print("Early stopping is trigger at step: {} log:{}".format(flag_step, log_value)) 50 | should_stop = True 51 | else: 52 | should_stop = False 53 | return best_value, stopping_step, should_stop -------------------------------------------------------------------------------- /Recom/KGAT/Model/utility/loader_bprmf.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Dec 18, 2018 3 | Tensorflow Implementation of the Baseline Model, BPRMF, in: 4 | Wang Xiang et al. KGAT: Knowledge Graph Attention Network for Recommendation. In KDD 2019. 5 | @author: Xiang Wang (xiangwang@u.nus.edu) 6 | ''' 7 | from utility.load_data import Data 8 | 9 | class BPRMF_loader(Data): 10 | def __init__(self, args, path): 11 | super().__init__(args, path) 12 | 13 | def generate_train_batch(self): 14 | users, pos_items, neg_items = self._generate_train_cf_batch() 15 | 16 | batch_data = {} 17 | batch_data['users'] = users 18 | batch_data['pos_items'] = pos_items 19 | batch_data['neg_items'] = neg_items 20 | 21 | return batch_data 22 | 23 | def generate_train_feed_dict(self, model, batch_data): 24 | feed_dict = { 25 | model.users: batch_data['users'], 26 | model.pos_items: batch_data['pos_items'], 27 | model.neg_items: batch_data['neg_items'] 28 | } 29 | 30 | return feed_dict 31 | 32 | 33 | def generate_test_feed_dict(self, model, user_batch, item_batch, drop_flag=False): 34 | feed_dict = { 35 | model.users: user_batch, 36 | model.pos_items: item_batch 37 | } 38 | return feed_dict 39 | 40 | -------------------------------------------------------------------------------- /Recom/KGCN/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.swp 3 | ======= 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | env/ 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *,cover 49 | .hypothesis/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # IPython Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # dotenv 82 | .env 83 | 84 | # virtualenv 85 | venv/ 86 | ENV/ 87 | 88 | # Spyder project settings 89 | .spyderproject 90 | 91 | # Rope project settings 92 | .ropeproject 93 | 94 | 95 | # self added 96 | .idea/* 97 | data/movie/* 98 | data/music/* 99 | !data/movie/item_index2entity_id.txt 100 | !data/movie/kg.txt 101 | !data/music/item_index2entity_id.txt 102 | !data/music/kg.txt 103 | !data/music/user_artists.dat 104 | -------------------------------------------------------------------------------- /Recom/KGCN/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Hongwei Wang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Recom/KGCN/README.md: -------------------------------------------------------------------------------- 1 | # KGCN code 2 | 3 | Adapted from [hwwang55/KGCN](https://github.com/hwwang55/KGCN). 4 | 5 | We replace the GNN module in paper by GCN and GAT for comparison. 6 | 7 | ## running environment 8 | 9 | * Python 3.6 10 | * tensorflow-gpu 1.9.0 11 | 12 | ## running procedure 13 | 14 | * Download data from [tsinghua-cloud](https://cloud.tsinghua.edu.cn/d/eee49039f99e4984a30a/) or [google-drive](https://drive.google.com/file/d/1fDIJfrTfRdeMFhNxKKthrXGSI6yRmQ41/view?usp=sharing) 15 | * unzip and move data folder to KGCN/ 16 | * cd to src/ 17 | * run main.py 18 | 19 | ```bash 20 | python main.py --model gcn 21 | python main.py --model gat 22 | python main.py --model kgcn 23 | ``` 24 | 25 | ## performance report 26 | 27 | For MovieLens-20M dataset: (Other datasets to be completed) 28 | 29 | | | AUC | F1 | 30 | |------|-----------|-----------| 31 | | KGCN | 0.977 | 0.930 | 32 | | GAT | 0.978 | 0.932 | 33 | | GGN | **0.980** | **0.937** | 34 | 35 | ***The following content is from the initial hwwang55/KGCN repo.*** 36 | 37 | # KGCN 38 | 39 | This repository is the implementation of [KGCN](https://dl.acm.org/citation.cfm?id=3313417) ([arXiv](https://arxiv.org/abs/1904.12575)): 40 | 41 | > Knowledge Graph Convolutional Networks for Recommender Systems 42 | Hongwei Wang, Miao Zhao, Xing Xie, Wenjie Li, Minyi Guo. 43 | In Proceedings of The 2019 Web Conference (WWW 2019) 44 | 45 | ![](https://github.com/hwwang55/KGCN/blob/master/framework.png) 46 | 47 | KGCN is **K**nowledge **G**raph **C**onvolutional **N**etworks for recommender systems, which uses the technique of graph convolutional networks (GCN) to proces knowledge graphs for the purpose of recommendation. 48 | 49 | 50 | ### Files in the folder 51 | 52 | - `data/` 53 | - `movie/` 54 | - `item_index2entity_id.txt`: the mapping from item indices in the raw rating file to entity IDs in the KG; 55 | - `kg.txt`: knowledge graph file; 56 | - `music/` 57 | - `item_index2entity_id.txt`: the mapping from item indices in the raw rating file to entity IDs in the KG; 58 | - `kg.txt`: knowledge graph file; 59 | - `user_artists.dat`: raw rating file of Last.FM; 60 | - `src/`: implementations of KGCN. 61 | 62 | 63 | 64 | 65 | ### Running the code 66 | - Movie 67 | (The raw rating file of MovieLens-20M is too large to be contained in this repository. 68 | Download the dataset first.) 69 | ``` 70 | $ wget http://files.grouplens.org/datasets/movielens/ml-20m.zip 71 | $ unzip ml-20m.zip 72 | $ mv ml-20m/ratings.csv data/movie/ 73 | $ cd src 74 | $ python preprocess.py -d movie 75 | ``` 76 | - Music 77 | - ``` 78 | $ cd src 79 | $ python preprocess.py -d music 80 | ``` 81 | - open `src/main.py` file; 82 | 83 | - comment the code blocks of parameter settings for MovieLens-20M; 84 | 85 | - uncomment the code blocks of parameter settings for Last.FM; 86 | 87 | - ``` 88 | $ python main.py 89 | ``` 90 | -------------------------------------------------------------------------------- /Recom/KGCN/src/gcn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/Recom/KGCN/src/gcn/__init__.py -------------------------------------------------------------------------------- /Recom/KGCN/src/gcn/inits.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | 5 | def uniform(shape, scale=0.05, name=None): 6 | """Uniform init.""" 7 | initial = tf.random_uniform(shape, minval=-scale, maxval=scale, dtype=tf.float32) 8 | return tf.Variable(initial, name=name) 9 | 10 | 11 | def glorot(shape, name=None): 12 | """Glorot & Bengio (AISTATS 2010) init.""" 13 | init_range = np.sqrt(6.0/(shape[0]+shape[1])) 14 | initial = tf.random_uniform(shape, minval=-init_range, maxval=init_range, dtype=tf.float32) 15 | return tf.Variable(initial, name=name) 16 | 17 | 18 | def zeros(shape, name=None): 19 | """All zeros.""" 20 | initial = tf.zeros(shape, dtype=tf.float32) 21 | return tf.Variable(initial, name=name) 22 | 23 | 24 | def ones(shape, name=None): 25 | """All ones.""" 26 | initial = tf.ones(shape, dtype=tf.float32) 27 | return tf.Variable(initial, name=name) -------------------------------------------------------------------------------- /Recom/KGCN/src/gcn/layers.py: -------------------------------------------------------------------------------- 1 | from gcn.inits import * 2 | import tensorflow as tf 3 | 4 | 5 | def dot(x, y, sparse=False): 6 | """Wrapper for tf.matmul (sparse vs dense).""" 7 | if sparse: 8 | res = tf.sparse_tensor_dense_matmul(x, y) 9 | else: 10 | res = tf.matmul(x, y) 11 | return res 12 | 13 | 14 | class GraphConvolution: 15 | """Graph convolution layer.""" 16 | def __init__(self, input_dim, output_dim, placeholders, dropout=0., act=tf.nn.relu, bias=False, 17 | featureless=False, **kwargs): 18 | super(GraphConvolution, self).__init__(**kwargs) 19 | 20 | if dropout: 21 | self.dropout = placeholders['dropout'] 22 | else: 23 | self.dropout = 0. 24 | 25 | self.act = act 26 | self.support = placeholders['support'] 27 | self.featureless = featureless 28 | self.bias = bias 29 | self.vars = {} 30 | 31 | self.vars['weights'] = glorot([input_dim, output_dim]) 32 | if self.bias: 33 | self.vars['bias'] = zeros([output_dim]) 34 | 35 | def _call(self, inputs): 36 | x = inputs 37 | 38 | # dropout 39 | x = tf.nn.dropout(x, 1-self.dropout) 40 | 41 | # convolve 42 | for i in range(1): 43 | if not self.featureless: 44 | pre_sup = dot(x, self.vars['weights']) 45 | else: 46 | pre_sup = self.vars['weights'] 47 | support = dot(self.support, pre_sup, sparse=True) 48 | output = support 49 | 50 | # bias 51 | if self.bias: 52 | output += self.vars['bias'] 53 | 54 | return self.act(output) 55 | 56 | 57 | def GCN(inputs, dim, drop, A): 58 | placeholders = {'dropout':drop, 'support':A} 59 | x = GraphConvolution(dim, dim, placeholders)._call(inputs) 60 | x = GraphConvolution(dim, dim, placeholders, act=lambda x:x)._call(x) 61 | return x 62 | -------------------------------------------------------------------------------- /Recom/KGCN/src/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/Recom/KGCN/src/utils/__init__.py -------------------------------------------------------------------------------- /Recom/KGCN/src/utils/sp_gat.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | from utils import layers 5 | #from models.base_gattn import BaseGAttN 6 | 7 | class SpGAT:#(BaseGAttN): 8 | def inference(inputs, nb_classes, nb_nodes, training, attn_drop, ffd_drop, 9 | bias_mat, hid_units, n_heads, activation=tf.nn.elu, 10 | residual=False): 11 | attns = [] 12 | for _ in range(n_heads[0]): 13 | attns.append(layers.sp_attn_head(inputs, 14 | adj_mat=bias_mat, 15 | out_sz=hid_units[0], activation=activation, nb_nodes=nb_nodes, 16 | in_drop=ffd_drop, coef_drop=attn_drop, residual=False)) 17 | h_1 = tf.concat(attns, axis=-1) 18 | for i in range(1, len(hid_units)): 19 | h_old = h_1 20 | attns = [] 21 | for _ in range(n_heads[i]): 22 | attns.append(layers.sp_attn_head(h_1, 23 | adj_mat=bias_mat, 24 | out_sz=hid_units[i], activation=activation, nb_nodes=nb_nodes, 25 | in_drop=ffd_drop, coef_drop=attn_drop, residual=residual)) 26 | h_1 = tf.concat(attns, axis=-1) 27 | out = [] 28 | for i in range(n_heads[-1]): 29 | out.append(layers.sp_attn_head(h_1, adj_mat=bias_mat, 30 | out_sz=nb_classes, activation=lambda x: x, nb_nodes=nb_nodes, 31 | in_drop=ffd_drop, coef_drop=attn_drop, residual=False)) 32 | logits = tf.add_n(out) / n_heads[-1] 33 | 34 | return logits 35 | -------------------------------------------------------------------------------- /Recom/KGNN-LS/src/empirical_study.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import numpy as np 3 | import argparse 4 | 5 | 6 | if __name__ == '__main__': 7 | np.random.seed(555) 8 | NUM = 10000 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument('-d', type=str, default='music') 12 | args = parser.parse_args() 13 | DATASET = args.d 14 | 15 | kg_np = np.load('../data/' + DATASET + '/kg_final.npy') 16 | kg = nx.Graph() 17 | kg.add_edges_from([(triple[0], triple[2]) for triple in kg_np]) # construct knowledge graph 18 | 19 | rating_np = np.load('../data/' + DATASET + '/ratings_final.npy') 20 | item_history = dict() 21 | item_set = set() 22 | for record in rating_np: 23 | user = record[0] 24 | item = record[1] 25 | rating = record[2] 26 | if rating == 1: 27 | if item not in item_history: 28 | item_history[item] = set() 29 | item_history[item].add(user) 30 | item_set.add(item) 31 | 32 | item_pair_num_no_common_rater = 0 33 | item_pair_num_with_common_rater = 0 34 | sp_no_common_rater = dict() 35 | sp_with_common_rater = dict() 36 | 37 | while True: 38 | item1, item2 = np.random.choice(list(item_set), size=2, replace=False) 39 | if item_pair_num_no_common_rater == NUM and item_pair_num_with_common_rater == NUM: 40 | break 41 | if item_pair_num_no_common_rater < NUM and len(item_history[item1] & item_history[item2]) == 0: 42 | item_pair_num_no_common_rater += 1 43 | if not nx.has_path(kg, item1, item2): 44 | sp = 'infinity' 45 | else: 46 | sp = nx.shortest_path_length(kg, item1, item2) 47 | if sp not in sp_no_common_rater: 48 | sp_no_common_rater[sp] = 0 49 | sp_no_common_rater[sp] += 1 50 | print(item_pair_num_no_common_rater, item_pair_num_with_common_rater) 51 | if item_pair_num_with_common_rater < NUM and len(item_history[item1] & item_history[item2]) > 0: 52 | item_pair_num_with_common_rater += 1 53 | if not nx.has_path(kg, item1, item2): 54 | sp = 'infinity' 55 | else: 56 | sp = nx.shortest_path_length(kg, item1, item2) 57 | if sp not in sp_with_common_rater: 58 | sp_with_common_rater[sp] = 0 59 | sp_with_common_rater[sp] += 1 60 | print(item_pair_num_no_common_rater, item_pair_num_with_common_rater) 61 | 62 | print(sp_no_common_rater) 63 | print(sp_with_common_rater) 64 | -------------------------------------------------------------------------------- /Recom/KGNN-LS/src/gcn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/Recom/KGNN-LS/src/gcn/__init__.py -------------------------------------------------------------------------------- /Recom/KGNN-LS/src/gcn/inits.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | 5 | def uniform(shape, scale=0.05, name=None): 6 | """Uniform init.""" 7 | initial = tf.random_uniform(shape, minval=-scale, maxval=scale, dtype=tf.float32) 8 | return tf.Variable(initial, name=name) 9 | 10 | 11 | def glorot(shape, name=None): 12 | """Glorot & Bengio (AISTATS 2010) init.""" 13 | init_range = np.sqrt(6.0/(shape[0]+shape[1])) 14 | initial = tf.random_uniform(shape, minval=-init_range, maxval=init_range, dtype=tf.float32) 15 | return tf.Variable(initial, name=name) 16 | 17 | 18 | def zeros(shape, name=None): 19 | """All zeros.""" 20 | initial = tf.zeros(shape, dtype=tf.float32) 21 | return tf.Variable(initial, name=name) 22 | 23 | 24 | def ones(shape, name=None): 25 | """All ones.""" 26 | initial = tf.ones(shape, dtype=tf.float32) 27 | return tf.Variable(initial, name=name) -------------------------------------------------------------------------------- /Recom/KGNN-LS/src/gcn/layers.py: -------------------------------------------------------------------------------- 1 | from gcn.inits import * 2 | import tensorflow as tf 3 | 4 | 5 | def dot(x, y, sparse=False): 6 | """Wrapper for tf.matmul (sparse vs dense).""" 7 | if sparse: 8 | res = tf.sparse_tensor_dense_matmul(x, y) 9 | else: 10 | res = tf.matmul(x, y) 11 | return res 12 | 13 | 14 | class GraphConvolution: 15 | """Graph convolution layer.""" 16 | 17 | def __init__(self, input_dim, output_dim, placeholders, dropout=0., act=tf.nn.relu, bias=False, 18 | featureless=False, **kwargs): 19 | super(GraphConvolution, self).__init__(**kwargs) 20 | 21 | if dropout: 22 | self.dropout = placeholders['dropout'] 23 | else: 24 | self.dropout = 0. 25 | 26 | self.act = act 27 | self.support = placeholders['support'] 28 | self.featureless = featureless 29 | self.bias = bias 30 | self.vars = {} 31 | 32 | self.vars['weights'] = glorot([input_dim, output_dim]) 33 | if self.bias: 34 | self.vars['bias'] = zeros([output_dim]) 35 | 36 | def _call(self, inputs): 37 | x = inputs 38 | 39 | # dropout 40 | x = tf.nn.dropout(x, 1-self.dropout) 41 | 42 | # convolve 43 | for i in range(1): 44 | if not self.featureless: 45 | pre_sup = dot(x, self.vars['weights']) 46 | else: 47 | pre_sup = self.vars['weights'] 48 | support = dot(self.support, pre_sup, sparse=True) 49 | output = support 50 | 51 | # bias 52 | if self.bias: 53 | output += self.vars['bias'] 54 | 55 | return self.act(output) 56 | 57 | 58 | def GCN(inputs, dim, drop, A, n_layer): 59 | placeholders = {'dropout': drop, 'support': A} 60 | 61 | if n_layer == 1: 62 | return GraphConvolution(dim, dim, placeholders, act=lambda x: x)._call(inputs) 63 | 64 | for _ in range(n_layer-1): 65 | x = GraphConvolution(dim, dim, placeholders)._call(inputs) 66 | x = GraphConvolution(dim, dim, placeholders, act=lambda x: x)._call(x) 67 | return x 68 | -------------------------------------------------------------------------------- /Recom/KGNN-LS/src/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/Recom/KGNN-LS/src/utils/__init__.py -------------------------------------------------------------------------------- /Recom/KGNN-LS/src/utils/sp_gat.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | from utils import layers 5 | #from models.base_gattn import BaseGAttN 6 | 7 | 8 | class SpGAT: # (BaseGAttN): 9 | def inference(inputs, nb_classes, nb_nodes, training, attn_drop, ffd_drop, 10 | bias_mat, hid_units, n_heads, activation=tf.nn.elu, 11 | residual=False): 12 | attns = [] 13 | if(len(n_heads) == 1): 14 | out = [] 15 | for _ in range(n_heads[0]): 16 | out.append(layers.sp_attn_head(inputs, 17 | adj_mat=bias_mat, 18 | out_sz=nb_classes, activation=lambda x: x, nb_nodes=nb_nodes, 19 | in_drop=ffd_drop, coef_drop=attn_drop, residual=False)) 20 | logits = tf.add_n(out) / n_heads[-1] 21 | return logits 22 | 23 | for _ in range(n_heads[0]): 24 | attns.append(layers.sp_attn_head(inputs, 25 | adj_mat=bias_mat, 26 | out_sz=hid_units[0], activation=activation, nb_nodes=nb_nodes, 27 | in_drop=ffd_drop, coef_drop=attn_drop, residual=False)) 28 | h_1 = tf.concat(attns, axis=-1) 29 | for i in range(1, len(hid_units)): 30 | h_old = h_1 31 | attns = [] 32 | for _ in range(n_heads[i]): 33 | attns.append(layers.sp_attn_head(h_1, 34 | adj_mat=bias_mat, 35 | out_sz=hid_units[i], activation=activation, nb_nodes=nb_nodes, 36 | in_drop=ffd_drop, coef_drop=attn_drop, residual=residual)) 37 | h_1 = tf.concat(attns, axis=-1) 38 | out = [] 39 | for i in range(n_heads[-1]): 40 | out.append(layers.sp_attn_head(h_1, adj_mat=bias_mat, 41 | out_sz=nb_classes, activation=lambda x: x, nb_nodes=nb_nodes, 42 | in_drop=ffd_drop, coef_drop=attn_drop, residual=False)) 43 | logits = tf.add_n(out) / n_heads[-1] 44 | 45 | return logits 46 | -------------------------------------------------------------------------------- /Recom/README.md: -------------------------------------------------------------------------------- 1 | For benchmark experiments, Simple-HGN is in baseline folder and other methods are in KGAT folder. 2 | -------------------------------------------------------------------------------- /Recom/baseline/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Xiang Wang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Recom/baseline/Model/utility/helper.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Aug 19, 2016 3 | @author: Xiang Wang (xiangwang@u.nus.edu) 4 | ''' 5 | __author__ = "xiangwang" 6 | import os 7 | import re 8 | 9 | def txt2list(file_src): 10 | orig_file = open(file_src, "r") 11 | lines = orig_file.readlines() 12 | return lines 13 | 14 | 15 | def ensureDir(dir_path): 16 | d = os.path.dirname(dir_path) 17 | if not os.path.exists(d): 18 | os.makedirs(d) 19 | 20 | 21 | def uni2str(unicode_str): 22 | return str(unicode_str.encode('ascii', 'ignore')).replace('\n', '').strip() 23 | 24 | 25 | def hasNumbers(inputString): 26 | return bool(re.search(r'\d', inputString)) 27 | 28 | def delMultiChar(inputString, chars): 29 | for ch in chars: 30 | inputString = inputString.replace(ch, '') 31 | return inputString 32 | 33 | def merge_two_dicts(x, y): 34 | z = x.copy() # start with x's keys and values 35 | z.update(y) # modifies z with y's keys and values & returns None 36 | return z 37 | 38 | def early_stopping(log_value, best_value, stopping_step, expected_order='acc', flag_step=100): 39 | # early stopping strategy: 40 | assert expected_order in ['acc', 'dec'] 41 | 42 | if (expected_order == 'acc' and log_value >= best_value) or (expected_order == 'dec' and log_value <= best_value): 43 | stopping_step = 0 44 | best_value = log_value 45 | else: 46 | stopping_step += 1 47 | 48 | if stopping_step >= flag_step: 49 | print("Early stopping is trigger at step: {} log:{}".format(flag_step, log_value)) 50 | should_stop = True 51 | else: 52 | should_stop = False 53 | return best_value, stopping_step, should_stop -------------------------------------------------------------------------------- /Recom/baseline/README.md: -------------------------------------------------------------------------------- 1 | # new baseline for recommendation 2 | 3 | Adapted from [xiangwang1223/knowledge_graph_attention_network](https://github.com/xiangwang1223/knowledge_graph_attention_network/tree/master/Model). 4 | 5 | ## running environment 6 | 7 | * torch and dgl latest 8 | 9 | ## running procedure 10 | 11 | * Download Data folder from [tsinghua-cloud](https://cloud.tsinghua.edu.cn/d/2bafd2674d5d43299dfa/) or [google-drive](https://drive.google.com/drive/folders/19unGR1awscvbcGy4VT7pLqCdHHegkSx7?usp=sharing) and unzip to **current** folder 12 | * Download pretrain folder from [tsinghua-cloud](https://cloud.tsinghua.edu.cn/d/2bafd2674d5d43299dfa/) or [google-drive](https://drive.google.com/drive/folders/19unGR1awscvbcGy4VT7pLqCdHHegkSx7?usp=sharing) and unzip to **Model** folder 13 | * cd to **Model** folder and run 14 | 15 | ## run 16 | 17 | ```bash 18 | python main.py --model_type baseline --dataset movie-lens --layer_size [64,32,16] --embed_size 64 --lr 0.0001 --epoch 1000 --verbose 1 --save_flag 1 --pretrain -1 --batch_size 8192 --gpu_id 0 19 | python main.py --model_type baseline --dataset last-fm --layer_size [64,32,16] --embed_size 64 --lr 0.0001 --epoch 1000 --verbose 1 --save_flag 1 --pretrain -1 --batch_size 8192 --gpu_id 0 20 | python main.py --model_type baseline --dataset yelp2018 --layer_size [64,32,16] --embed_size 64 --lr 0.0001 --epoch 1000 --verbose 1 --save_flag 1 --pretrain -1 --batch_size 8192 --gpu_id 0 21 | python main.py --model_type baseline --dataset amazon-book --layer_size [64,32,16] --embed_size 64 --lr 0.0001 --epoch 1000 --verbose 1 --save_flag 1 --pretrain -1 --batch_size 8192 --gpu_id 0 22 | ``` 23 | -------------------------------------------------------------------------------- /TC/HGAT/data/example/test.list: -------------------------------------------------------------------------------- 1 | 9071 2 | 2560 3 | 6761 4 | 8523 5 | 6763 6 | 2564 7 | 8522 8 | 7632 -------------------------------------------------------------------------------- /TC/HGAT/data/example/train.list: -------------------------------------------------------------------------------- 1 | 5223 2 | 7630 3 | 2131 4 | 2563 5 | 2132 6 | 9073 7 | 1 8 | 5221 9 | 0 10 | 5220 11 | 2130 12 | 2561 13 | 2134 14 | 2133 15 | 2562 16 | 9072 -------------------------------------------------------------------------------- /TC/HGAT/data/example/vali.list: -------------------------------------------------------------------------------- 1 | 3 2 | 9070 3 | 6762 4 | 9074 5 | 6764 6 | 8521 7 | 4 8 | 2 9 | 7633 10 | 6760 11 | 8524 12 | 7631 13 | 5224 14 | 8520 15 | 5222 16 | 7634 -------------------------------------------------------------------------------- /TC/HGAT/model/code/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import division 3 | -------------------------------------------------------------------------------- /TC/HGAT/model/code/baseline/README.md: -------------------------------------------------------------------------------- 1 | # new baseline for benchmark 2 | 3 | For message passing with relation attention version: 4 | 5 | ``` 6 | python run_new.py --dataset DBLP 7 | python run_new.py --dataset ACM --feats-type 2 8 | python run_multi.py --dataset IMDB --feats-type 0 9 | ``` 10 | 11 | For relational gat version: 12 | 13 | ``` 14 | python run.py --dataset DBLP 15 | ``` 16 | 17 | ## running environment 18 | 19 | * torch 1.6.0 cuda 10.1 20 | * dgl 0.4.3 cuda 10.1 21 | * networkx 2.3 22 | * scikit-learn 0.23.2 23 | * scipy 1.5.2 24 | -------------------------------------------------------------------------------- /TC/HGAT/model/code/baseline/new_main.py: -------------------------------------------------------------------------------- 1 | """ 2 | define model 3 | """ 4 | weight_size = eval(args.layer_size) 5 | num_layers = len(weight_size) - 2 6 | heads = [args.heads] * num_layers + [1] 7 | model = myGAT(config['n_users']+config['n_entities'], args.kge_size, config['n_relations']*2+1, args.embed_size, weight_size[-2], weight_size[-1], num_layers, heads, F.elu, 0.1, 0., 0.05, False, pretrain=pretrain_data, alpha=1.0).cuda() 8 | 9 | 10 | """ 11 | build feed input 12 | """ 13 | edge2type = {} 14 | for i,mat in enumerate(data_generator.lap_list): 15 | for u,v in zip(*mat.nonzero()): 16 | edge2type[(u,v)] = i 17 | for i in range(data_generator.n_users+data_generator.n_entities): 18 | edge2type[(i,i)] = len(data_generator.lap_list) 19 | 20 | adjM = sum(data_generator.lap_list) 21 | adjM[adjM>1.] = 1. 22 | print(len(adjM.nonzero()[0])) 23 | g = dgl.from_scipy(adjM, eweight_name='weight') 24 | g = dgl.remove_self_loop(g) # these two lines are vital, because we want self-loop to be the last edges 25 | g = dgl.add_self_loop(g) 26 | g.edata['weight'][g.edata['weight']==0.] = 1. 27 | e_feat = [] 28 | edge2id = {} 29 | for u, v in zip(*g.edges()): 30 | u = u.item() 31 | v = v.item() 32 | if u == v: 33 | break 34 | e_feat.append(edge2type[(u,v)]) 35 | edge2id[(u,v)] = len(edge2id) 36 | no_self_loop = len(e_feat) 37 | for i in range(data_generator.n_users+data_generator.n_entities): 38 | e_feat.append(edge2type[(i,i)]) 39 | edge2id[(i,i)] = len(edge2id) 40 | self_loop = len(e_feat) - no_self_loop 41 | must = torch.tensor([True]*self_loop) 42 | e_feat = torch.tensor(e_feat, dtype=torch.long) 43 | 44 | 45 | """ 46 | call model 47 | """ 48 | model(g, e_feat) -------------------------------------------------------------------------------- /TC/HGAT/model/code/baseline/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/HGB/ca6fd5bb0c1ca32e63b132c8bfe8f11a4a6629fe/TC/HGAT/model/code/baseline/utils/__init__.py -------------------------------------------------------------------------------- /TC/HGAT/model/code/baseline/utils/data.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import numpy as np 3 | import scipy 4 | import pickle 5 | import scipy.sparse as sp 6 | 7 | def load_data(prefix='DBLP'): 8 | from scripts.data_loader import data_loader 9 | dl = data_loader('../../data/'+prefix) 10 | features = [] 11 | for i in range(len(dl.nodes['count'])): 12 | th = dl.nodes['attr'][i] 13 | if th is None: 14 | features.append(sp.eye(dl.nodes['count'][i])) 15 | else: 16 | features.append(th) 17 | adjM = sum(dl.links['data'].values()) 18 | labels = np.zeros((dl.nodes['count'][0], dl.labels_train['num_classes']), dtype=int) 19 | val_ratio = 0.2 20 | train_idx = np.nonzero(dl.labels_train['mask'])[0] 21 | np.random.shuffle(train_idx) 22 | split = int(train_idx.shape[0]*val_ratio) 23 | val_idx = train_idx[:split] 24 | train_idx = train_idx[split:] 25 | train_idx = np.sort(train_idx) 26 | val_idx = np.sort(val_idx) 27 | test_idx = np.nonzero(dl.labels_test['mask'])[0] 28 | labels[train_idx] = dl.labels_train['data'][train_idx] 29 | labels[val_idx] = dl.labels_train['data'][val_idx] 30 | if prefix != 'IMDB': 31 | labels = labels.argmax(axis=1) 32 | train_val_test_idx = {} 33 | train_val_test_idx['train_idx'] = train_idx 34 | train_val_test_idx['val_idx'] = val_idx 35 | train_val_test_idx['test_idx'] = test_idx 36 | return features,\ 37 | adjM, \ 38 | labels,\ 39 | train_val_test_idx,\ 40 | dl 41 | -------------------------------------------------------------------------------- /TC/HGAT/model/code/baseline/utils/pytorchtools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | class EarlyStopping: 6 | """Early stops the training if validation loss doesn't improve after a given patience.""" 7 | def __init__(self, patience, verbose=False, delta=0, save_path='checkpoint.pt'): 8 | """ 9 | Args: 10 | patience (int): How long to wait after last time validation loss improved. 11 | Default: 7 12 | verbose (bool): If True, prints a message for each validation loss improvement. 13 | Default: False 14 | delta (float): Minimum change in the monitored quantity to qualify as an improvement. 15 | Default: 0 16 | """ 17 | self.patience = patience 18 | self.verbose = verbose 19 | self.counter = 0 20 | self.best_score = None 21 | self.early_stop = False 22 | self.val_loss_min = np.Inf 23 | self.delta = delta 24 | self.save_path = save_path 25 | 26 | def __call__(self, val_loss, model): 27 | 28 | score = -val_loss 29 | 30 | if self.best_score is None: 31 | self.best_score = score 32 | self.save_checkpoint(val_loss, model) 33 | elif score < self.best_score - self.delta: 34 | self.counter += 1 35 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 36 | if self.counter >= self.patience: 37 | self.early_stop = True 38 | else: 39 | self.best_score = score 40 | self.save_checkpoint(val_loss, model) 41 | self.counter = 0 42 | 43 | def save_checkpoint(self, val_loss, model): 44 | """Saves model when validation loss decrease.""" 45 | if self.verbose: 46 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') 47 | torch.save(model.state_dict(), self.save_path) 48 | self.val_loss_min = val_loss 49 | -------------------------------------------------------------------------------- /TC/HGAT/model/code/print_log.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | 5 | class Logger(object): 6 | def __init__(self, filename="Default.log", remove=True): 7 | self.terminal = sys.stdout 8 | if remove and os.path.exists(filename): 9 | os.remove(filename) 10 | self.log = open(filename, "a") 11 | 12 | def write(self, message): 13 | self.terminal.write(message) 14 | self.log.write(message) 15 | 16 | def flush(self): 17 | pass 18 | 19 | def change_file(self, filename="Default.log"): 20 | self.log.close() 21 | self.log = open(filename, "a") 22 | 23 | 24 | if __name__ == '__main__': 25 | sys.stdout = Logger("yourlogfilename.txt") 26 | print('content.') -------------------------------------------------------------------------------- /TC/HGAT/model/data/example/test.map: -------------------------------------------------------------------------------- 1 | 74 2 | 81 3 | 116 4 | 114 5 | 17 6 | 32 7 | 119 8 | 30 -------------------------------------------------------------------------------- /TC/HGAT/model/data/example/train.map: -------------------------------------------------------------------------------- 1 | 38 2 | 24 3 | 29 4 | 97 5 | 121 6 | 88 7 | 27 8 | 12 9 | 98 10 | 104 11 | 59 12 | 22 13 | 53 14 | 86 15 | 83 16 | 82 -------------------------------------------------------------------------------- /TC/HGAT/model/data/example/vali.map: -------------------------------------------------------------------------------- 1 | 31 2 | 51 3 | 35 4 | 67 5 | 2 6 | 90 7 | 137 8 | 131 9 | 141 10 | 1 11 | 122 12 | 60 13 | 105 14 | 26 15 | 79 16 | 128 --------------------------------------------------------------------------------