├── output ├── WikiElec_inward.npy └── WikiElec_outward.npy ├── src ├── __pycache__ │ ├── SLF.cpython-37.pyc │ └── utils.cpython-37.pyc ├── main.py ├── SLF.py └── utils.py └── README.md /output/WikiElec_inward.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WHU-SNA/SLF/HEAD/output/WikiElec_inward.npy -------------------------------------------------------------------------------- /output/WikiElec_outward.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WHU-SNA/SLF/HEAD/output/WikiElec_outward.npy -------------------------------------------------------------------------------- /src/__pycache__/SLF.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WHU-SNA/SLF/HEAD/src/__pycache__/SLF.cpython-37.pyc -------------------------------------------------------------------------------- /src/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WHU-SNA/SLF/HEAD/src/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | from SLF import SignedLatentFactorModel 2 | from utils import parameter_parser, args_printer, sign_prediction_printer, link_prediction_printer 3 | 4 | 5 | def main(): 6 | args = parameter_parser() 7 | args_printer(args) 8 | 9 | SLF = SignedLatentFactorModel(args) 10 | SLF.fit() 11 | SLF.save_emb() 12 | 13 | if args.sign_prediction: 14 | sign_prediction_printer(SLF.logs) 15 | if args.link_prediction: 16 | link_prediction_printer(SLF.logs) 17 | 18 | 19 | if __name__ == "__main__": 20 | main() 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SLF 2 | Python implementation of the method proposed in 3 | "[Link Prediction with Signed Latent Factors in Signed Social Networks](https://dl.acm.org/doi/pdf/10.1145/3292500.3330850)", Pinghua Xu, Wenbin Hu, Jia Wu and Bo Du, SIGKDD 2019. 4 | 5 | **NOTE** This implementation can be used to solve both ***link prediction*** and ***sign prediction***. 6 | 7 | ## Overview 8 | This repository is organised as follows: 9 | - `input/` contains four example graphs `WikiElec` `WikiRfa` `Slashdot` `Epinions`; 10 | - `output/` is the directory that stores the learned node embeddings; 11 | - `src/` contains the implementation of the proposed SLF method. 12 | 13 | ## Requirements 14 | The implementation is tested under Python 3.7 with the folowing packages installed: 15 | - `networkx==2.3` 16 | - `numpy==1.16.5` 17 | - `scikit-learn==0.21.3` 18 | - `texttable==1.6.2` 19 | - `tqdm==4.36.1` 20 | 21 | ## Input 22 | The model use a graph in `.txt` format as input. Every row indicates an edge between two nodes separated by a `space` or `\t`. The file does not contain a header. The node id should be a non-negative number. Four example graphs (donwloaded from [SNAP](http://snap.stanford.edu/data/#signnets), but node ID is resorted) `WikiElec`, `WikiRfa`, `Slashdot` and `Epinions` are included in the `input/` directory. The structure of the input file is the following: 23 | 24 | | Source node | Target node | Sign | 25 | | :-----:| :----: | :----: | 26 | | 0 | 1 | -1 | 27 | | 1 | 3 | 1 | 28 | | 1 | 2 | 1 | 29 | | 2 | 4 | -1 | 30 | 31 | **NOTE** All the used graphs are **directed**. However, if you want to handle an **undirected** graph, modify your input file to make that each edge (u, v, s) constitutes two rows of the file like the following: 32 | 33 | | Source node | Target node | Sign | 34 | | :-----:| :----: | :----: | 35 | | u | v | s | 36 | | v | u | s | 37 | 38 | ## Options 39 | #### Input and output options 40 | ``` 41 | --edge-path STR Input file path Default=="./input/WikiElec.txt" 42 | --outward-embedding-path STR Outward embedding path Default=="./output/WikiElec_outward" 43 | --inward-embedding-path STR Inward embedding path Default=="./output/WikiElec_inward" 44 | ``` 45 | #### Model options 46 | ``` 47 | --epochs INT Number of training epochs Default==20 48 | --k1 INT Positive SLF dimension Default==32 49 | --k2 INT Negative SLF dimension Default==32 50 | --p0 FLOAT Effect of no feedback Default==0.001 51 | --n INT Number of noise samples Default==5 52 | --learning-rate FLOAT Leaning rate Default==0.025 53 | ``` 54 | #### Evaluation options 55 | ``` 56 | --test-size FLOAT Test ratio Default==0.2 57 | --split-seed INT Random seed for splitting dataset Default==16 58 | --link-prediction BOOL Make link prediction or not Default=False 59 | --sign-prediction BOOL Make sign prediction or not Default=True 60 | ``` 61 | **NOTE** As **sign prediction** is a more popular evaluation task, `--link-prediction` is set to `False` and `--sign-prediction` is set to `True` by default. You can refer to our paper to find the difference between the two tasks. 62 | 63 | ## Examples 64 | Train an SLF model on the deafult `WikiElec` dataset, output the performance on sign prediction task, and save the embeddings: 65 | ``` 66 | python src/main.py 67 | ``` 68 | 69 | Train an SLF model with custom epoch number and test ratio: 70 | ``` 71 | python src/main.py --epochs 30 --test-size 0.3 72 | ``` 73 | 74 | Train an SLF model on the `WikiRfa` dataset, perform link prediction task but not sign prediction task: 75 | ``` 76 | python src/main.py --edge-path ./input/WikiRfa.txt --outward-embedding-path ./output/WikiElec_outward --inward-embedding-path ./output/WikiElec_inward --link-prediction True --sign-prediction False 77 | ``` 78 | 79 | If you want to learn node embedding for other use and not to waste time performing link prediction or sign prediction, then run: 80 | ``` 81 | python src/main.py --link-prediction False --sign-prediction False 82 | ``` 83 | 84 | ## Output 85 | 86 | #### Tasks on signed networks 87 | For **sign prediction** task, we use `AUC` and `Macro-F1` for evaluation. 88 | 89 | For **link prediction** task, we use `AUC@p`, `AUC@n` and `AUC@non` for evaluation. Refer to our paper for detailed description. We adimit that it is not a good choice to use `Micro-F1` for evaluation on a dataset with unbalanced labels, so we removed this metric. 90 | 91 | We perform the evaluation after each epoch, and output the provisional result like the following: 92 | ``` 93 | Epoch 0 Optimizing: 100%|██████████████████████████████████████| 6637/6637 [00:19<00:00, 343.23it/s] 94 | Evaluating... 95 | Sign prediction, epoch 0: AUC 0.832, F1 0.697 96 | Link prediction, epoch 0: AUC@p 0.901, AUC@n 0.750, AUC@non 0.878 97 | Epoch 1 Optimizing: 100%|██████████████████████████████████████| 6637/6637 [00:19<00:00, 345.80it/s] 98 | Evaluating... 99 | Sign prediction, epoch 1: AUC 0.858, F1 0.730 100 | Link prediction, epoch 1: AUC@p 0.882, AUC@n 0.739, AUC@non 0.855 101 | ``` 102 | 103 | When the training is ended up, the evaluation results are printed in tabular format. If `--sign-prediction==True`, the results of sign prediction are printed like the following: 104 | | Epoch | AUC | Macro-F1 | 105 | | :-----:| :----: | :----: | 106 | | 0 | 0.832 | 0.697 | 107 | | 1 | 0.858 | 0.730 | 108 | | 2 | 0.838 | 0.739 | 109 | | ... | ... | ... | 110 | | 19 | 0.905 | 0.802 | 111 | 112 | And if `--link-prediction==True`, the results of link prediction are printed like the following: 113 | | Epoch | AUC@p | AUC@n | AUC@non | 114 | | :-----:| :----: | :----: | :----: | 115 | | 0 | 0.901 | 0.750 | 0.878 | 116 | | 1 | 0.882 | 0.739 | 0.855 | 117 | | 2 | 0.885 | 0.762 | 0.867 | 118 | | ... | ... | ... | ... | 119 | | 19 | 0.943 | 0.920 | 0.948 | 120 | 121 | 122 | #### Node embeddings 123 | The learned embeddings are saved in `output/` in `.npz` format (supported by `Numpy`). Note that if the maximal node ID is 36, then the embedding matrix has 36+1 rows ordered by node ID (as the ID can start from 0). Although some nodes may not exist (e.g., node 11 is removed from the original dataset), it does not matter. 124 | 125 | You can use them for any purpose in addition to the two performed tasks. 126 | 127 | ## Baselines 128 | In our paper, we used the following methods for comparison: 129 | - `SIGNet` "Signet: Scalable embeddings for signed networks" [[source](https://github.com/raihan2108/signet)] 130 | - `MF` "Low rank modeling of signed networks" 131 | - `LSNE` "Solving link-oriented tasks in signed network via an embedding approach" 132 | - `SIDE` "Side: representation learning in signed directed networks" [[source](https://datalab.snu.ac.kr/side/)] 133 | 134 | ## Cite 135 | If you find this repository useful in your research, please cite our paper: 136 | 137 | ``` 138 | @inproceedings{xu2019link, 139 | title={Link prediction with signed latent factors in signed social networks}, 140 | author={Xu, Pinghua and Hu, Wenbin and Wu, Jia and Du, Bo}, 141 | booktitle={Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery \& Data Mining}, 142 | pages={1046--1054}, 143 | year={2019} 144 | } 145 | ``` 146 | -------------------------------------------------------------------------------- /src/SLF.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import numpy as np 3 | import random as rd 4 | from tqdm import tqdm 5 | from utils import fa, read_edge_list, sign_prediction, link_prediction 6 | from sklearn.model_selection import train_test_split 7 | import networkx as nx 8 | 9 | 10 | class SignedLatentFactorModel(object): 11 | def __init__(self, args): 12 | self.args = args 13 | self.logs = {'epoch': [], 'sign_prediction_auc': [], 'sign_prediction_macro_f1': [], 14 | 'link_prediction_auc@p': [], 'link_prediction_auc@n': [], 'link_prediction_auc@non': []} 15 | self.setup() 16 | 17 | # Positive outward SLF vectors. 18 | self.W_out_p = np.zeros((self.num_node, self.args.k1)) 19 | # Positive inward SLF vectors. 20 | self.W_in_p = np.zeros((self.num_node, self.args.k1)) 21 | # Negative outward SLF vectors. 22 | self.W_out_n = np.zeros((self.num_node, self.args.k2)) 23 | # Negative inward SLF vectors. 24 | self.W_in_n = np.zeros((self.num_node, self.args.k2)) 25 | 26 | def setup(self): 27 | self.edges, self.num_node = read_edge_list(self.args) 28 | self.train_edges, self.test_edges, = train_test_split(self.edges, 29 | test_size=self.args.test_size, 30 | random_state=self.args.split_seed) 31 | 32 | # Generate null links set for link prediction task. 33 | if self.args.link_prediction: 34 | G = nx.DiGraph() 35 | G.add_nodes_from(range(self.num_node)) 36 | G.add_edges_from([[e[0], e[1]] for e in self.edges]) 37 | self.train_edges_null, self.test_edges_null = [], [] 38 | for _ in range(3 * len(self.test_edges)): 39 | u = rd.choice(range(self.num_node)) 40 | v = rd.choice(range(self.num_node)) 41 | while v in list(G.successors(u)): 42 | v = rd.choice(range(self.num_node)) 43 | self.test_edges_null.append([u, v, 'n']) 44 | for _ in range(3 * len(self.train_edges)): 45 | u = rd.choice(range(self.num_node)) 46 | v = rd.choice(range(self.num_node)) 47 | while v in list(G.successors(u)): 48 | v = rd.choice(range(self.num_node)) 49 | self.train_edges_null.append([u, v, 'n']) 50 | 51 | 52 | def fit(self): 53 | """ 54 | Learn node embeddings. 55 | """ 56 | G = nx.DiGraph() 57 | for edge in self.train_edges: 58 | G.add_edge(edge[0], edge[1], weight=edge[2]) 59 | nodes = list(G.nodes()) 60 | 61 | for i in nodes: 62 | for j in range(self.args.k1): 63 | self.W_out_p[i, j] = rd.uniform(0, 1) 64 | self.W_in_p[i, j] = rd.uniform(0, 1) 65 | 66 | for i in nodes: 67 | for j in range(self.args.k2): 68 | self.W_out_n[i, j] = rd.uniform(0, 1) 69 | self.W_in_n[i, j] = rd.uniform(0, 1) 70 | 71 | for epoch in range(self.args.epochs): 72 | pbar = tqdm(total=G.number_of_nodes(), desc='Epoch ' + str(epoch) + ' Optimizing', ncols=100) 73 | learning_rate = self.args.learning_rate * (self.args.epochs - epoch) / self.args.epochs 74 | for u in nodes: 75 | pbar.update(1) 76 | out_p_g = np.zeros(self.args.k1) 77 | out_n_g = np.zeros(self.args.k2) 78 | in_p_g = np.zeros(self.args.k1) 79 | in_n_g = np.zeros(self.args.k2) 80 | 81 | succs = G.successors(u) 82 | for succ in succs: 83 | e_p = fa(self.W_out_p[u] @ self.W_in_p[succ], self.args) 84 | e_n = fa(self.W_out_n[u] @ self.W_in_n[succ], self.args) 85 | if G[u][succ]['weight'] == 1: 86 | out_p_g += (1 - e_p) * self.W_in_p[succ] 87 | out_n_g -= e_n * self.W_in_n[succ] 88 | elif G[u][succ]['weight'] == -1: 89 | out_p_g -= e_p * self.W_in_p[succ] 90 | out_n_g += (1 - e_n) * self.W_in_n[succ] 91 | elif G[u][succ]['weight'] == 0: 92 | out_p_g += (1 - e_p) * self.W_in_p[succ] 93 | out_n_g += (1 - e_n) * self.W_in_n[succ] 94 | for i in range(self.args.n): 95 | noise = rd.choice(nodes) 96 | while noise in succs: 97 | noise = rd.choice(nodes) 98 | e_p = fa(self.W_out_p[u] @ self.W_in_p[noise], self.args) 99 | e_n = fa(self.W_out_n[u] @ self.W_in_n[noise], self.args) 100 | out_p_g -= e_p * self.W_in_p[noise] 101 | out_n_g -= e_n * self.W_in_n[noise] 102 | 103 | pres = G.predecessors(u) 104 | for pre in pres: 105 | e_p = fa(self.W_out_p[pre] @ self.W_in_p[u], self.args) 106 | e_n = fa(self.W_out_n[pre] @ self.W_in_n[u], self.args) 107 | if G[pre][u]['weight'] == 1: 108 | in_p_g += (1 - e_p) * self.W_out_p[pre] 109 | in_n_g -= e_n * self.W_out_n[pre] 110 | elif G[pre][u]['weight'] == -1: 111 | in_p_g -= e_p * self.W_out_p[pre] 112 | in_n_g += (1 - e_n) * self.W_out_n[pre] 113 | elif G[pre][u]['weight'] == 0: 114 | in_p_g += (1 - e_p) * self.W_out_p[pre] 115 | in_n_g += (1 - e_n) * self.W_out_n[pre] 116 | for i in range(self.args.n): 117 | noise = rd.choice(nodes) 118 | while noise in pres: 119 | noise = rd.choice(nodes) 120 | e_p = fa(self.W_out_p[noise] @ self.W_in_p[u], self.args) 121 | e_n = fa(self.W_out_n[noise] @ self.W_in_n[u], self.args) 122 | in_p_g -= e_p * self.W_out_p[noise] 123 | in_n_g -= e_n * self.W_out_n[noise] 124 | 125 | self.W_out_p[u] += learning_rate * out_p_g 126 | self.W_in_p[u] += learning_rate * in_p_g 127 | self.W_out_n[u] += learning_rate * out_n_g 128 | self.W_in_n[u] += learning_rate * in_n_g 129 | 130 | for i in range(self.args.k1): 131 | if self.W_out_p[u, i] < 0: 132 | self.W_out_p[u, i] = 0 133 | if self.W_in_p[u, i] < 0: 134 | self.W_in_p[u, i] = 0 135 | for i in range(self.args.k2): 136 | if self.W_out_n[u, i] < 0: 137 | self.W_out_n[u, i] = 0 138 | if self.W_in_n[u, i] < 0: 139 | self.W_in_n[u, i] = 0 140 | pbar.close() 141 | 142 | W_out = np.zeros((self.num_node, self.args.k1 + self.args.k2)) 143 | W_in = np.zeros((self.num_node, self.args.k1 + self.args.k2)) 144 | for i in range(self.num_node): 145 | W_out[i, : self.args.k1] = self.W_out_p[i] 146 | W_out[i, self.args.k1:] = self.W_out_n[i] 147 | W_in[i, : self.args.k1] = self.W_in_p[i] 148 | W_in[i, self.args.k1:] = self.W_in_n[i] 149 | print('Evaluating...') 150 | if self.args.sign_prediction: 151 | auc, f1 = sign_prediction(W_out, W_in, self.train_edges, self.test_edges) 152 | self.logs['epoch'].append(epoch) 153 | self.logs['sign_prediction_auc'].append(auc) 154 | self.logs['sign_prediction_macro_f1'].append(f1) 155 | print('Sign prediction, epoch %d: AUC %.3f, F1 %.3f' % (epoch, auc, f1)) 156 | if self.args.link_prediction: 157 | auc_p, auc_n, auc_null = link_prediction(W_out, W_in, self.train_edges, self.test_edges, 158 | self.train_edges_null, self.test_edges_null, self.num_node) 159 | self.logs['link_prediction_auc@p'].append(auc_p) 160 | self.logs['link_prediction_auc@n'].append(auc_n) 161 | self.logs['link_prediction_auc@non'].append(auc_null) 162 | print( 163 | 'Link prediction, epoch %d: AUC@p %.3f, AUC@n %.3f, AUC@non %.3f' % (epoch, auc_p, auc_n, auc_null)) 164 | 165 | 166 | def save_emb(self): 167 | """ 168 | Save the node embeddings in npz format. 169 | """ 170 | W_out = np.zeros((self.num_node, self.args.k1 + self.args.k2)) 171 | W_in = np.zeros((self.num_node, self.args.k1 + self.args.k2)) 172 | for i in range(self.num_node): 173 | W_out[i, : self.args.k1] = self.W_out_p[i] 174 | W_out[i, self.args.k1:] = self.W_out_n[i] 175 | W_in[i, : self.args.k1] = self.W_in_p[i] 176 | W_in[i, self.args.k1:] = self.W_in_n[i] 177 | 178 | np.save(self.args.outward_embedding_path, W_out) 179 | np.save(self.args.inward_embedding_path, W_in) 180 | -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import networkx as nx 4 | from sklearn.linear_model import LogisticRegression, LogisticRegressionCV 5 | from sklearn.utils.testing import ignore_warnings 6 | from sklearn.exceptions import ConvergenceWarning 7 | from sklearn.metrics import roc_auc_score, f1_score 8 | from sklearn.preprocessing import StandardScaler 9 | from texttable import Texttable 10 | 11 | 12 | def parameter_parser(): 13 | """ 14 | Parse up command line parameters. 15 | """ 16 | parser = argparse.ArgumentParser(description="Run SLF.") 17 | parser.add_argument("--edge-path", 18 | nargs="?", 19 | default="./input/WikiElec.txt", 20 | help="Edge list in txt format.") 21 | parser.add_argument("--outward-embedding-path", 22 | nargs="?", 23 | default="./output/WikiElec_outward", 24 | help="Outward embedding path.") 25 | parser.add_argument("--inward-embedding-path", 26 | nargs="?", 27 | default="./output/WikiElec_inward", 28 | help="Inward embedding path.") 29 | parser.add_argument("--epochs", 30 | type=int, 31 | default=20, 32 | help="Number of training epochs. Default is 20.") 33 | parser.add_argument("--k1", 34 | type=int, 35 | default=32, 36 | help="Dimension of positive SLF. Default is 32.") 37 | parser.add_argument("--k2", 38 | type=int, 39 | default=32, 40 | help="Dimension of negative SLF. Default is 32.") 41 | parser.add_argument("--p0", 42 | type=float, 43 | default=0.001, 44 | help="Effect of no feedback. Default is 0.001.") 45 | parser.add_argument("--n", 46 | type=int, 47 | default=5, 48 | help="Number of noise samples. Default is 5.") 49 | parser.add_argument("--link-prediction", 50 | type=bool, 51 | default=False, 52 | help="Make link prediction or not. Default is 5.") 53 | parser.add_argument("--sign-prediction", 54 | type=bool, 55 | default=True, 56 | help="Make sign prediction or not. Default is 5.") 57 | parser.add_argument("--test-size", 58 | type=float, 59 | default=0.2, 60 | help="Test ratio. Default is 0.2.") 61 | parser.add_argument("--split-seed", 62 | type=int, 63 | default=16, 64 | help="Random seed for splitting dataset. Default is 16.") 65 | parser.add_argument("--learning-rate", 66 | type=float, 67 | default=0.025, 68 | help="Learning rate. Default is 0.025.") 69 | 70 | return parser.parse_args() 71 | 72 | 73 | def fa(x, args): 74 | """ 75 | Activation function f_a(x). 76 | """ 77 | if x > 15: 78 | tmp = 1 79 | else: 80 | tmp = args.p0 * np.exp(x) / (1 + args.p0 * (np.exp(x) - 1)) 81 | return tmp 82 | 83 | 84 | def read_edge_list(args): 85 | """ 86 | Load edges from a txt file. 87 | """ 88 | G = nx.DiGraph() 89 | edges = np.loadtxt(args.edge_path) 90 | for i in range(edges.shape[0]): 91 | G.add_edge(int(edges[i][0]), int(edges[i][1]), weight=edges[i][2]) 92 | edges = [[e[0], e[1], e[2]['weight']] for e in G.edges.data()] 93 | return edges, max(G.nodes) + 1 # index can start from 0. 94 | 95 | 96 | @ignore_warnings(category=ConvergenceWarning) 97 | def sign_prediction(out_emb, in_emb, train_edges, test_edges): 98 | """ 99 | Evaluate the performance on the sign prediction task. 100 | :param out_emb: Outward embeddings. 101 | :param in_emb: Inward embeddings. 102 | :param train_edges: Edges for training the model. 103 | :param test_edges: Edges for test. 104 | """ 105 | out_dim = out_emb.shape[1] 106 | in_dim = in_emb.shape[1] 107 | train_edges = train_edges 108 | train_x = np.zeros((len(train_edges), (out_dim + in_dim) * 2)) 109 | train_y = np.zeros((len(train_edges), 1)) 110 | for i, edge in enumerate(train_edges): 111 | u = edge[0] 112 | v = edge[1] 113 | if edge[2] > 0: 114 | train_y[i] = 1 115 | else: 116 | train_y[i] = 0 117 | train_x[i, : out_dim] = out_emb[u] 118 | train_x[i, out_dim: out_dim + in_dim] = in_emb[u] 119 | train_x[i, out_dim + in_dim: out_dim * 2 + in_dim] = out_emb[v] 120 | train_x[i, out_dim * 2 + in_dim:] = in_emb[v] 121 | 122 | test_edges = test_edges 123 | test_x = np.zeros((len(test_edges), (out_dim + in_dim) * 2)) 124 | test_y = np.zeros((len(test_edges), 1)) 125 | for i, edge in enumerate(test_edges): 126 | u = edge[0] 127 | v = edge[1] 128 | if edge[2] > 0: 129 | test_y[i] = 1 130 | else: 131 | test_y[i] = 0 132 | test_x[i, : out_dim] = out_emb[u] 133 | test_x[i, out_dim: out_dim + in_dim] = in_emb[u] 134 | test_x[i, out_dim + in_dim: out_dim * 2 + in_dim] = out_emb[v] 135 | test_x[i, out_dim * 2 + in_dim:] = in_emb[v] 136 | 137 | ss = StandardScaler() 138 | train_x = ss.fit_transform(train_x) 139 | test_x = ss.fit_transform(test_x) 140 | lr = LogisticRegression(solver='lbfgs') 141 | lr.fit(train_x, train_y.ravel()) 142 | test_y_score = lr.predict_proba(test_x)[:, 1] 143 | test_y_pred = lr.predict(test_x) 144 | auc_score = roc_auc_score(test_y, test_y_score, average='macro') 145 | macro_f1_score = f1_score(test_y, test_y_pred, average='macro') 146 | 147 | return auc_score, macro_f1_score 148 | 149 | 150 | @ignore_warnings(category=ConvergenceWarning) 151 | def link_prediction(out_emb, in_emb, train_edges, test_edges, train_edges_null, test_edges_null, num_node): 152 | """ 153 | Evaluate the performance on the link prediction task. 154 | :param out_emb: Outward embeddings. 155 | :param in_emb: Inward embeddings. 156 | :param train_edges: Edges for training the model. 157 | :param test_edges: Edges for test. 158 | """ 159 | out_dim = out_emb.shape[1] 160 | in_dim = in_emb.shape[1] 161 | train_x = np.zeros((len(train_edges) + len(train_edges_null), (out_dim + in_dim) * 2)) 162 | train_y = np.zeros((len(train_edges) + len(train_edges_null), 1)) 163 | for i, edge in enumerate(train_edges): 164 | u = edge[0] 165 | v = edge[1] 166 | train_x[i, : out_dim] = out_emb[u] 167 | train_x[i, out_dim: out_dim + in_dim] = in_emb[u] 168 | train_x[i, out_dim + in_dim: out_dim * 2 + in_dim] = out_emb[v] 169 | train_x[i, out_dim * 2 + in_dim:] = in_emb[v] 170 | if edge[2] > 0: 171 | train_y[i] = 1 172 | else: 173 | train_y[i] = -1 174 | 175 | for i, edge in enumerate(train_edges_null): 176 | i += len(train_edges) 177 | u = edge[0] 178 | v = edge[1] 179 | train_x[i, : out_dim] = out_emb[u] 180 | train_x[i, out_dim: out_dim + in_dim] = in_emb[u] 181 | train_x[i, out_dim + in_dim: out_dim * 2 + in_dim] = out_emb[v] 182 | train_x[i, out_dim * 2 + in_dim:] = in_emb[v] 183 | train_y[i] = 0 184 | 185 | test_x = np.zeros((len(test_edges) + len(test_edges_null), (out_dim + in_dim) * 2)) 186 | test_y = np.zeros((len(test_edges) + len(test_edges_null), 1)) 187 | for i, edge in enumerate(test_edges): 188 | u = edge[0] 189 | v = edge[1] 190 | test_x[i, : out_dim] = out_emb[u] 191 | test_x[i, out_dim: out_dim + in_dim] = in_emb[u] 192 | test_x[i, out_dim + in_dim: out_dim * 2 + in_dim] = out_emb[v] 193 | test_x[i, out_dim * 2 + in_dim:] = in_emb[v] 194 | if edge[2] > 0: 195 | test_y[i] = 1 196 | else: 197 | test_y[i] = -1 198 | 199 | for i, edge in enumerate(test_edges_null): 200 | i += len(test_edges) 201 | u = edge[0] 202 | v = edge[1] 203 | test_x[i, : out_dim] = out_emb[u] 204 | test_x[i, out_dim: out_dim + in_dim] = in_emb[u] 205 | test_x[i, out_dim + in_dim: out_dim * 2 + in_dim] = out_emb[v] 206 | test_x[i, out_dim * 2 + in_dim:] = in_emb[v] 207 | test_y[i] = 0 208 | 209 | ss = StandardScaler() 210 | train_x = ss.fit_transform(train_x) 211 | test_x = ss.fit_transform(test_x) 212 | lr = LogisticRegressionCV(fit_intercept=True, max_iter=100, multi_class='multinomial', Cs=np.logspace(-2, 2, 20), 213 | cv=2, penalty="l2", solver="lbfgs", tol=0.01) 214 | lr.fit(train_x, train_y.ravel()) 215 | pred_prob = lr.predict_proba(test_x) 216 | labels = test_y.copy() 217 | for i in range(len(labels)): 218 | if labels[i] == 1: 219 | labels[i] = 1 220 | else: 221 | labels[i] = 0 222 | auc_score_pos = roc_auc_score(labels, pred_prob[:, 2]) 223 | labels = test_y.copy() 224 | for i in range(len(labels)): 225 | if labels[i] == -1: 226 | labels[i] = 1 227 | else: 228 | labels[i] = 0 229 | auc_score_neg = roc_auc_score(labels, pred_prob[:, 0]) 230 | labels = test_y.copy() 231 | for i in range(len(labels)): 232 | if labels[i] == 0: 233 | labels[i] = 1 234 | else: 235 | labels[i] = 0 236 | auc_score_null = roc_auc_score(labels, pred_prob[:, 1]) 237 | 238 | return auc_score_pos, auc_score_neg, auc_score_null 239 | 240 | 241 | def args_printer(args): 242 | """ 243 | Print the parameters in tabular format. 244 | :param args: Parameters used for the model. 245 | """ 246 | args = vars(args) 247 | t = Texttable() 248 | l = [[k, args[k]] for k in args.keys()] 249 | l.insert(0, ["Parameter", "Value"]) 250 | t.add_rows(l) 251 | print(t.draw()) 252 | 253 | 254 | def sign_prediction_printer(logs): 255 | """ 256 | Print the performance on sign prediction task in tabular format. 257 | :param logs: Logs about the evaluation. 258 | """ 259 | t = Texttable() 260 | epoch_list = logs['epoch'] 261 | auc_list = logs['sign_prediction_auc'] 262 | macrof1_list = logs['sign_prediction_macro_f1'] 263 | l = [[epoch_list[i], auc_list[i], macrof1_list[i]] for i in range(len(epoch_list))] 264 | l.insert(0, ['Epoch', 'AUC', 'Macro-F1']) 265 | t.add_rows(l) 266 | print(t.draw()) 267 | 268 | 269 | def link_prediction_printer(logs): 270 | """ 271 | Print the performance on link prediction task in tabular format. 272 | :param logs: Logs about the evaluation. 273 | """ 274 | t = Texttable() 275 | epoch_list = logs['epoch'] 276 | auc_p_list = logs['link_prediction_auc@p'] 277 | auc_n_list = logs['link_prediction_auc@n'] 278 | auc_non_list = logs['link_prediction_auc@non'] 279 | l = [[epoch_list[i], auc_p_list[i], auc_n_list[i], auc_non_list[i]] for i in range(len(epoch_list))] 280 | l.insert(0, ['Epoch', 'AUC@p', 'AUC@n', 'AUC@non']) 281 | t.add_rows(l) 282 | print(t.draw()) 283 | --------------------------------------------------------------------------------