├── attrirank.pdf ├── .travis.yml ├── requirements.txt ├── README.md ├── src ├── main.py └── AttriRank.py ├── tests └── test_attrirank.py ├── AttriRank_inC ├── edge.txt └── AttriRank.cpp └── sample └── graph.edgelist /attrirank.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ntumslab/AttriRank/HEAD/attrirank.pdf -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | cache: pip 3 | sudo: required 4 | 5 | 6 | python: 7 | - "3.5" 8 | 9 | 10 | before_install: 11 | - pip install -U pip 12 | - pip install wheel 13 | - pip install coveralls 14 | - sudo apt-get update 15 | 16 | 17 | env: 18 | global: 19 | - PIP_WHEEL_DIR=$HOME/.cache/pip/wheels 20 | - PIP_FIND_LINKS=file://$HOME/.cache/pip/wheels 21 | 22 | 23 | install: 24 | - pip wheel -r requirements.txt 25 | - pip install -r requirements.txt 26 | 27 | 28 | script: 29 | - py.test . --cov=./ 30 | - flake8 ./ 31 | 32 | 33 | after_success: 34 | - coveralls 35 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # the modular source code checker: pep8, pyflakes and co 2 | # https://pypi.python.org/pypi/flake8/2.5.4 3 | flake8==2.5.4 4 | 5 | # pytest: simple powerful testing with Python 6 | # https://pypi.python.org/pypi/pytest/2.8.3 7 | pytest==2.8.3 8 | 9 | # Code coverage measurement for Python 10 | # https://pypi.python.org/pypi/coverage 11 | coverage==4.2 12 | 13 | # Pytest plugin for measuring coverage. 14 | # https://pypi.python.org/pypi/pytest-cov/2.2.0 15 | pytest-cov==2.2.0 16 | 17 | # Powerful extensions to the datetime module available in the Python standard library. 18 | # https://pypi.python.org/pypi/python-dateutil/2.5.3 19 | python-dateutil==2.5.3 20 | 21 | # NumPy: array processing for numbers, strings, records, and objects. 22 | # https://pypi.python.org/pypi/numpy 23 | numpy==1.11.1 24 | 25 | # World timezone definitions, modern and historical 26 | # https://pypi.python.org/pypi/pytz 27 | pytz==2016.7 28 | 29 | # Powerful data structures for data analysis, time series,and statistics 30 | # https://pypi.python.org/pypi/pandas/0.18.1 31 | pandas==0.18.1 32 | 33 | # SciPy: a ecosystem of open-source software for mathematics, science, and engineering. 34 | # https://pypi.python.org/pypi/scipy/0.18.0rc2 35 | scipy==0.18.0 36 | 37 | # Powerful Python module for machine learning 38 | # https://pypi.python.org/pypi/scikit-learn/0.17.1 39 | scikit-learn==0.17.1 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AttriRank 2 | [![Build Status](https://travis-ci.org/ntumslab/AttriRank.svg?branch=master)](https://travis-ci.org/ntumslab/AttriRank) 3 | 4 | AttriRank is an unsupervised ranking model that considers not only graph structure but also the attributes of nodes. 5 | 6 | A reference implementation of *AttriRank* in the paper (please see the file - attrirank.pdf):
7 | > Unsupervised Ranking using Graph Structures and Node Attributes
8 | > Chin-Chi Hsu, Yi-An Lai, Wen-Hao Chen, Ming-Han Feng, and Shou-De Lin
9 | > Web Search and Data Mining (WSDM), 2017
10 | 11 | ## Usage 12 | 13 | ### Example 14 | Run AttriRank on sample graph with features, using damp [0.2, 0.5, 0.8]: 15 | 16 | python src/main.py --damp 0.2 0.5 0.8 --inputgraph sample/graph.edgelist --inputfeature sample/graph.feature 17 | 18 | #### Options 19 | Check out optional arguments such as AttriRank with prior, different similarity kernels by: 20 | 21 | python src/main.py --help 22 | 23 | ### Inputs 24 | Supported graph format is the edgelist: 25 | 26 | node_from node_to 27 | 28 | Supported feature format is the table (Comma-Separated Values): 29 | 30 | node_i, feat_dim_1, feat_dim_2, ... 31 | 32 | Default settings for graph are directed and unweighted. 33 | 34 | ### Output 35 | 36 | A comma-separated table of ranking scores with columns: [node_id, damp1, damp2, ...] 37 | 38 | node_id,0.2,0.5,0.8 39 | 0,score_1,score_2,score_3 40 | ... 41 | 42 | where score_1 is the ranking score of node 0 using AttriRank with damp 0.2. 43 | 44 | ## Requirements 45 | Install all dependencies: 46 | 47 | pip install -r requirements.txt 48 | 49 | ## Citing 50 | 51 | If you find *AttriRank* useful in your research, please consider citing the paper: 52 | 53 | @inproceedings{Hsu:2017:URU:3018661.3018668, 54 | author = {Hsu, Chin-Chi and Lai, Yi-An and Chen, Wen-Hao and Feng, Ming-Han and Lin, Shou-De}, 55 | title = {Unsupervised Ranking Using Graph Structures and Node Attributes}, 56 | booktitle = {Proceedings of the Tenth ACM International Conference on Web Search and Data Mining}, 57 | series = {WSDM '17}, 58 | year = {2017}, 59 | } 60 | 61 | ## Miscellaneous 62 | 63 | If having any questions about the paper, please contact us at .
64 | If having any questions about codes, please contact us at . 65 | -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implementation of AttriRank. 3 | 4 | Author: Yi-An Lai 5 | 6 | For more details, refer to the paper: 7 | Unsupervised Ranking using Graph Structures and Node Attributes 8 | Chin-Chi Hsu, Yi-An Lai, Wen-Hao Chen, Ming-Han Feng, and Shou-De Lin 9 | Web Search and Data Mining (WSDM), 2017 10 | """ 11 | 12 | import argparse 13 | import numpy as np 14 | import pandas as pd 15 | 16 | from AttriRank import AttriRank 17 | 18 | 19 | def parse_args(): 20 | ''' 21 | Parses AttriRank arguments. 22 | ''' 23 | parser = argparse.ArgumentParser(description="Run AttriRank.") 24 | 25 | parser.add_argument('--inputgraph', nargs='?', 26 | default='sample/graph.edgelist', 27 | help='Input graph path') 28 | 29 | parser.add_argument('--inputfeature', nargs='?', 30 | default='sample/graph.feature', 31 | help='Input feature path') 32 | 33 | parser.add_argument('--output', nargs='?', default='graph.rankscore', 34 | help='Output rankscore path') 35 | 36 | parser.add_argument('--kernel', default='rbf_ap', 37 | help='Kernel: rbf_ap, rbf, cos, euc, sigmoid') 38 | 39 | parser.add_argument('--damp', nargs='*', default=[0.5], type=float, 40 | help='damping parameters') 41 | 42 | parser.add_argument('--totalrank', dest='totalrank', action='store_true', 43 | help='Use TotalRank or not. Default is False.') 44 | parser.set_defaults(totalrank=False) 45 | 46 | parser.add_argument('--alpha', type=float, default=1.0, 47 | help='alpha of beta distribution. Default is 1.0.') 48 | 49 | parser.add_argument('--beta', type=float, default=1.0, 50 | help='beta of beta distribution. Default is 1.0.') 51 | 52 | parser.add_argument('--matrix', dest='matrix', action='store_true', 53 | help='Using original Q matrix. Default is False.') 54 | parser.set_defaults(matrix=False) 55 | 56 | parser.add_argument('--print_every', type=int, default=1000, 57 | help='Print TotalRank process. Default is 1000.') 58 | 59 | parser.add_argument('--itermax', type=int, default=100000, 60 | help='Number of max iterations. Default is 100000.') 61 | 62 | parser.add_argument('--weighted', dest='weighted', action='store_true', 63 | help='Specifying (un)weighted. Default is unweighted.') 64 | parser.set_defaults(weighted=False) 65 | 66 | parser.add_argument('--undirected', dest='directed', action='store_false', 67 | help='Graph is (un)directed. Default is directed.') 68 | parser.set_defaults(directed=True) 69 | 70 | return parser.parse_args() 71 | 72 | 73 | def load_graph(filename): 74 | """Read the graph into numpy array""" 75 | return pd.read_csv(filename, sep=' ', header=None).values 76 | 77 | 78 | def load_features(filename): 79 | """Read the features into numpy array, first column as index""" 80 | return pd.read_csv(filename, header=None).set_index(0).values 81 | 82 | 83 | def main(args): 84 | """ 85 | Pipeline for unsupervised ranking using graph and node features 86 | """ 87 | graph = load_graph(args.inputgraph) 88 | feat = load_features(args.inputfeature) 89 | N = len(feat) 90 | 91 | if not args.directed: 92 | graph = np.concatenate((graph, graph[:, [1, 0]])) 93 | 94 | AR = AttriRank(graph, feat, itermax=args.itermax, weighted=args.weighted, 95 | nodeCount=N) 96 | 97 | scores = AR.runModel(factors=args.damp, kernel=args.kernel, 98 | Matrix=args.matrix, TotalRank=args.totalrank, 99 | alpha=args.alpha, beta=args.beta, 100 | print_every=args.print_every) 101 | 102 | df = pd.DataFrame(data=scores) 103 | df.to_csv(args.output, float_format='%.16f', index_label='node_id') 104 | 105 | 106 | args = parse_args() 107 | main(args) 108 | -------------------------------------------------------------------------------- /tests/test_attrirank.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from unittest import TestCase 4 | 5 | from scipy.sparse import csr_matrix 6 | from sklearn import preprocessing 7 | from sklearn.metrics.pairwise import (rbf_kernel, cosine_similarity, 8 | sigmoid_kernel, euclidean_distances) 9 | from collections import defaultdict 10 | 11 | import numpy as np 12 | import sys 13 | import os 14 | 15 | sys.path.append(os.getcwd() + '/src') # noqa 16 | 17 | from AttriRank import AttriRank 18 | 19 | 20 | class TestAttriRank(TestCase): 21 | 22 | def setUp(self): 23 | self.node = 1000 24 | nodefrom = np.random.choice(self.node, 20 * self.node) 25 | nodeto = np.random.choice(self.node, 20 * self.node) 26 | self.fake_graph = np.array(list(zip(nodefrom, nodeto))) 27 | self.fake_features = np.random.randn(self.node, 20) 28 | 29 | def reset_vec(self, kernel='rbf_ap'): 30 | feat = preprocessing.scale(self.fake_features) 31 | count = feat.shape[1] 32 | if kernel == 'rbf': 33 | temp = rbf_kernel(feat, gamma=1.0 / count).sum(axis=0) 34 | elif kernel == 'cos': 35 | temp = ((cosine_similarity(feat) + 1) / 2.0).sum(axis=0) 36 | elif kernel == 'euc': 37 | temp = (1.0 / (euclidean_distances(feat) + 1)).sum(axis=0) 38 | elif kernel == 'sigmoid': 39 | Sig = sigmoid_kernel(feat, coef0=0, gamma=1.0 / count) 40 | temp = ((Sig + 1.0) / 2.0).sum(axis=0) 41 | elif kernel == 'rbf_ap': 42 | gamma = 1.0 / count 43 | expVec = np.exp(- gamma * np.einsum("ij, ij -> i", feat, feat)) 44 | feaVec = np.einsum("i, ij -> j", expVec, feat) * (2.0 * gamma) 45 | outMat = np.einsum("i,ij,ik->jk", expVec, feat, feat) 46 | outMat *= (2.0 * gamma ** 2) 47 | 48 | first = expVec * np.sum(expVec) 49 | second = np.einsum("i, j, ij -> i", expVec, feaVec, feat) 50 | third = np.einsum("i, jk, ij, ik -> i", expVec, outMat, feat, feat) 51 | temp = first + second + third 52 | 53 | return (temp / np.sum(temp)) 54 | 55 | def reset_mat(self): 56 | feat = preprocessing.scale(self.fake_features) 57 | RBF = rbf_kernel(feat, gamma=1.0 / feat.shape[1]) 58 | return RBF / RBF.sum(axis=0) 59 | 60 | def trans_mat(self, weighted=True): 61 | links = defaultdict(int) 62 | for nodefrom, nodeto in self.fake_graph: 63 | links[(nodefrom, nodeto)] += 1.0 64 | 65 | en_col_row = [[], [], []] 66 | for key, val in links.items(): 67 | val = val if weighted else 1 68 | en_col_row[0].append(val) 69 | en_col_row[1].append(key[0]) 70 | en_col_row[2].append(key[1]) 71 | 72 | traMat = csr_matrix((en_col_row[0], (en_col_row[2], en_col_row[1])), 73 | shape=(self.node, self.node)) 74 | traMat = traMat.multiply(csr_matrix(1.0 / traMat.sum(axis=0))) 75 | col_sum = np.array(traMat.sum(axis=0))[0] 76 | dangVec = np.arange(col_sum.shape[0])[col_sum == 0] 77 | 78 | return traMat, dangVec 79 | 80 | def PageRank(self, damp, Matrix=False, kernel='rbf_ap'): 81 | traMat, dang = self.trans_mat() 82 | if Matrix: 83 | reMat = self.reset_mat() 84 | reVec = self.reset_vec(kernel=kernel) 85 | 86 | if damp == 0: 87 | return reVec, {} 88 | 89 | track = {} 90 | track[damp] = [] 91 | result = np.ones(self.node) / self.node 92 | 93 | for i in range(1000000): 94 | dangScore = np.sum(result[dang]) * reVec 95 | tele = reMat.dot(result) if Matrix else reVec 96 | new = (1.0 - damp) * tele + damp * (traMat.dot(result) + dangScore) 97 | if np.linalg.norm(new - result) < 1e-10: 98 | break 99 | 100 | result = new 101 | track[damp].append(result) 102 | 103 | return result, track 104 | 105 | def totalrank(self, alpha=1, beta=1): 106 | traMat, dang = self.trans_mat() 107 | reVec = self.reset_vec() 108 | 109 | rho_t = reVec * beta / (alpha + beta) 110 | pi_t = reVec * beta / (alpha + beta) 111 | 112 | for iterat in range(100000): 113 | P_rho = (traMat.dot(rho_t) + np.sum(rho_t[dang]) * reVec) 114 | rho_next = P_rho * (iterat+alpha) / (iterat+1+alpha+beta) 115 | pi_t += rho_next 116 | if np.linalg.norm(rho_next) < 1e-10: 117 | break 118 | 119 | rho_t = rho_next 120 | 121 | return pi_t 122 | 123 | def run_model(self, damps, TotalRank=False, alpha=1, beta=1, 124 | Matrix=False, kernel='rbf_ap'): 125 | scores = {} 126 | if TotalRank: 127 | scores['total'] = list(self.totalrank(alpha=alpha, beta=beta)) 128 | else: 129 | for damp in damps: 130 | score, _ = self.PageRank(damp, kernel=kernel, Matrix=Matrix) 131 | scores[str(damp)] = list(score) 132 | 133 | return scores 134 | 135 | def test_ResetProbVec(self): 136 | for kernel in ['rbf', 'cos', 'euc', 'sigmoid', 'rbf_ap']: 137 | AR = AttriRank(self.fake_graph, self.fake_features, 138 | nodeCount=self.node) 139 | AR.ResetProbVec(kernel=kernel) 140 | scores = AR.resetProbVec.ravel() 141 | answers = self.reset_vec(kernel) 142 | assert np.linalg.norm(answers - scores) < 1e-10 143 | 144 | def test_ResetProbMat(self): 145 | AR = AttriRank(self.fake_graph, self.fake_features, 146 | nodeCount=self.node) 147 | AR.ResetProbMat() 148 | scores = AR.resetProbMat.ravel() 149 | answers = self.reset_mat().ravel() 150 | assert np.linalg.norm(answers - scores) < 1e-10 151 | 152 | def test_TransMat(self): 153 | AR = AttriRank(self.fake_graph, self.fake_features, 154 | nodeCount=self.node) 155 | AR.TransMat() 156 | scores = AR.transMat.toarray().ravel() 157 | answers_mat, answers_dang = self.trans_mat() 158 | assert np.linalg.norm(answers_mat.toarray().ravel() - scores) < 1e-10 159 | assert np.linalg.norm(answers_dang - AR.dangVec) < 1e-10 160 | 161 | AR = AttriRank(self.fake_graph, self.fake_features, 162 | nodeCount=self.node, weighted=False) 163 | AR.TransMat() 164 | scores = AR.transMat.toarray().ravel() 165 | answers_mat, answers_dang = self.trans_mat(weighted=False) 166 | assert np.linalg.norm(answers_mat.toarray().ravel() - scores) < 1e-10 167 | assert np.linalg.norm(answers_dang - AR.dangVec) < 1e-10 168 | 169 | def test_runPageRank(self): 170 | AR = AttriRank(self.fake_graph, self.fake_features, 171 | nodeCount=self.node) 172 | AR.track = True 173 | scores = AR.runPageRank(damp=0.85) 174 | track = np.array(AR.track_scores[0.85]) 175 | answers, ans_track = self.PageRank(damp=0.85) 176 | ans_track = np.array(ans_track[0.85]) 177 | assert np.linalg.norm(answers - scores) < 1e-10 178 | assert np.linalg.norm(ans_track - track) < 1e-10 179 | 180 | AR = AttriRank(self.fake_graph, self.fake_features, 181 | nodeCount=self.node) 182 | AR.track = True 183 | AR.Matrix = True 184 | scores = AR.runPageRank(damp=0.85) 185 | track = np.array(AR.track_scores[0.85]) 186 | answers, ans_track = self.PageRank(damp=0.85, Matrix=True) 187 | ans_track = np.array(ans_track[0.85]) 188 | assert np.linalg.norm(answers - scores) < 1e-10 189 | assert np.linalg.norm(ans_track - track) < 1e-10 190 | 191 | def test_TotalRank(self): 192 | AR = AttriRank(self.fake_graph, self.fake_features, 193 | nodeCount=self.node) 194 | TR_scores = AR.TotalRank() 195 | answers = self.totalrank() 196 | assert np.linalg.norm(answers - TR_scores) < 1e-10 197 | 198 | TR_scores = AR.TotalRank(alpha=2, beta=4) 199 | answers = self.totalrank(alpha=2, beta=4) 200 | assert np.linalg.norm(answers - TR_scores) < 1e-10 201 | 202 | TR_scores = AR.TotalRank(alpha=0.9, beta=0.8) 203 | answers = self.totalrank(alpha=0.9, beta=0.8) 204 | assert np.linalg.norm(answers - TR_scores) < 1e-10 205 | 206 | def test_runModel(self): 207 | AR = AttriRank(self.fake_graph, self.fake_features, 208 | nodeCount=self.node) 209 | damps = [i/10.0 for i in range(10)] 210 | scores = AR.runModel(damps, kernel='cos') 211 | scores = np.array([scores[str(d)] for d in damps]) 212 | answers = self.run_model(damps, kernel='cos') 213 | answers = np.array([answers[str(d)] for d in damps]) 214 | assert np.linalg.norm(answers - scores) < 1e-10 215 | 216 | scores = AR.runModel(damps, TotalRank=True, alpha=3, beta=4) 217 | scores = np.array(scores['total']) 218 | answers = self.run_model(damps, TotalRank=True, alpha=3, beta=4) 219 | answers = np.array(answers['total']) 220 | assert np.linalg.norm(answers - scores) < 1e-10 221 | 222 | scores = AR.runModel(damps, Matrix=True) 223 | scores = np.array([scores[str(d)] for d in damps]) 224 | answers = self.run_model(damps, Matrix=True) 225 | answers = np.array([answers[str(d)] for d in damps]) 226 | assert np.linalg.norm(answers - scores) < 1e-10 227 | -------------------------------------------------------------------------------- /src/AttriRank.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | from scipy.sparse import csr_matrix 5 | from sklearn import preprocessing 6 | from sklearn.metrics.pairwise import (rbf_kernel, cosine_similarity, 7 | sigmoid_kernel, euclidean_distances) 8 | from collections import defaultdict 9 | 10 | 11 | class AttriRank(object): 12 | convergenceThreshold = 1e-10 13 | Matrix = False 14 | track = False 15 | scores = {} 16 | print_every = 1000 17 | track_scores = {} 18 | 19 | def __init__(self, graph, featureMatrix, itermax=100000, 20 | weighted=True, nodeCount=None): 21 | """ 22 | Standardize input features and set the basic parameters 23 | graph: [[node_from, node_to], ...] 24 | featureMatrix: N * d matrix; i-th node's feature is the i-th row 25 | itermax: maximum iterations 26 | weighted: transition Matrix weighted by number of links 27 | """ 28 | 29 | self.graph = np.array(graph) 30 | self.featMat = preprocessing.scale(np.array(featureMatrix) / 100.0) 31 | self.featCount = self.featMat.shape[1] 32 | 33 | if nodeCount is None: 34 | self.nodeCount = graph.max() + 1 35 | else: 36 | self.nodeCount = nodeCount 37 | 38 | self.iterationMax = itermax 39 | self.weighted = weighted 40 | 41 | def ResetProbVec(self, kernel='rbf_ap'): 42 | """ 43 | Calculate the reset probability vector with assigned kernel 44 | rbf: Radial basis function 45 | cos: (cosine similarity + 1) / 2.0 46 | euc: 1.0 / (1 + euclidean distances) 47 | sigmoid: (tanh(gamma ) + 1) / 2.0 48 | rbf_ap: Taylor-expansion approximated Radial basis function 49 | """ 50 | 51 | if kernel == 'rbf': 52 | RBF = rbf_kernel(self.featMat, gamma=1.0 / self.featCount) 53 | RBF = RBF.sum(axis=0) 54 | resetProbVec = RBF / np.sum(RBF) 55 | 56 | elif kernel == 'cos': 57 | Cos = (cosine_similarity(self.featMat) + 1) / 2.0 58 | Cos = Cos.sum(axis=0) 59 | resetProbVec = Cos / np.sum(Cos) 60 | 61 | elif kernel == 'euc': 62 | Euc = 1.0 / (euclidean_distances(self.featMat) + 1) 63 | Euc = Euc.sum(axis=0) 64 | resetProbVec = Euc / np.sum(Euc) 65 | 66 | elif kernel == 'sigmoid': 67 | gamma = 1.0 / self.featCount 68 | Sig = sigmoid_kernel(self.featMat, coef0=0, gamma=gamma) 69 | Sig = (Sig + 1.0) / 2.0 70 | Sig = Sig.sum(axis=0) 71 | resetProbVec = Sig / np.sum(Sig) 72 | 73 | elif kernel == 'rbf_ap': 74 | parameter = 1.0 / self.featCount 75 | # w 76 | lengths = np.einsum("ij, ij -> i", self.featMat, self.featMat) 77 | expNormVector = np.exp(- parameter * lengths) 78 | # y 79 | f_normVec = np.einsum("i, ij -> j", expNormVector, self.featMat) 80 | featureNormVector = f_normVec * (2.0 * parameter) 81 | # Z 82 | outerMat = np.einsum("i, ij, ik -> jk", expNormVector, 83 | self.featMat, self.featMat) 84 | featureOuterNorm = outerMat * (2.0 * parameter ** 2) 85 | # r' 86 | first = expNormVector * np.sum(expNormVector) 87 | second = np.einsum("i, j, ij -> i", expNormVector, 88 | featureNormVector, self.featMat) 89 | third = np.einsum("i, jk, ij, ik -> i", expNormVector, 90 | featureOuterNorm, self.featMat, self.featMat) 91 | resetProbVec = first + second + third 92 | # r 93 | resetProbVec /= np.sum(resetProbVec) 94 | 95 | self.resetProbVec = resetProbVec 96 | 97 | def ResetProbMat(self): 98 | """Calculate the Q transition Matrix with RBF kernel""" 99 | parameter = 1.0 / self.featCount 100 | RBF = rbf_kernel(self.featMat, gamma=parameter) 101 | self.resetProbMat = RBF / RBF.sum(axis=0) 102 | 103 | def TransMat(self): 104 | """Construct transition matrix""" 105 | links = defaultdict(int) 106 | 107 | for nodefrom, nodeto in self.graph: 108 | 109 | if self.weighted: 110 | links[(nodefrom, nodeto)] += 1.0 111 | 112 | else: 113 | links[(nodefrom, nodeto)] = 1.0 114 | 115 | entryList = list() 116 | rowList = list() 117 | columnList = list() 118 | 119 | for key, val in links.items(): 120 | entryList.append(val) 121 | columnList.append(key[0]) 122 | rowList.append(key[1]) 123 | 124 | # transition matrix 125 | traMat = csr_matrix((entryList, (rowList, columnList)), 126 | shape=(self.nodeCount, self.nodeCount)) 127 | self.transMat = traMat.multiply(csr_matrix(1.0 / traMat.sum(axis=0))) 128 | 129 | # find dangling nodes 130 | col_sum = np.array(traMat.sum(axis=0))[0] 131 | self.dangVec = np.arange(col_sum.shape[0])[col_sum == 0] 132 | 133 | def runPageRank(self, damp=0.85, do=True, doTrans=True, kernel='rbf_ap'): 134 | """ 135 | do: whether to compute the reset probability vector 136 | doTrans: whether to compute the transition matrix 137 | """ 138 | if doTrans: 139 | self.TransMat() 140 | print("\tGenerate transition matrix") 141 | 142 | if do: 143 | if self.Matrix: 144 | self.ResetProbMat() 145 | print("\tGenerate matrix Q") 146 | else: 147 | print("\tGenerate reset probability vector") 148 | self.ResetProbVec(kernel=kernel) 149 | 150 | if damp == 0: 151 | scoreVector = self.resetProbVec 152 | return scoreVector 153 | 154 | # record the scores of each update 155 | self.track_scores[damp] = [] 156 | scoreVector = np.ones(self.nodeCount) / self.nodeCount 157 | 158 | for iteration in range(self.iterationMax): 159 | leak_scores = np.sum(scoreVector[self.dangVec]) 160 | dangScore = leak_scores * self.resetProbVec 161 | 162 | if self.Matrix: 163 | teleport_prob = self.resetProbMat.dot(scoreVector) 164 | else: 165 | teleport_prob = self.resetProbVec 166 | 167 | newScoreVector = (1.0 - damp) * teleport_prob + \ 168 | damp * (self.transMat.dot(scoreVector) + dangScore) 169 | error = np.linalg.norm(newScoreVector - scoreVector) 170 | 171 | if error < self.convergenceThreshold: 172 | break 173 | 174 | scoreVector = newScoreVector 175 | if self.track: 176 | self.track_scores[damp].append(scoreVector) 177 | 178 | return scoreVector 179 | 180 | def TotalRank(self, alpha=1, beta=1, kernel='rbf_ap'): 181 | """ 182 | Implementation of TotalRank with beta distribution as the prior 183 | (alpha, beta): parameters for the beta distribution 184 | """ 185 | print("\tGenerate transition matrix and reset probability vector") 186 | self.TransMat() 187 | self.ResetProbVec(kernel=kernel) 188 | 189 | rho_t = self.resetProbVec * beta / (alpha + beta) 190 | pi_t = self.resetProbVec * beta / (alpha + beta) 191 | 192 | for iteration in range(self.iterationMax): 193 | dangScore = np.sum(rho_t[self.dangVec]) * self.resetProbVec 194 | P_rho = (self.transMat.dot(rho_t) + dangScore) 195 | rho_next = P_rho * (iteration + alpha) / (iteration+1+alpha+beta) 196 | pi_t += rho_next 197 | error = np.linalg.norm(rho_next) 198 | 199 | if iteration % self.print_every == (self.print_every - 1): 200 | print("\tIteration %d:\t%.10f" % (iteration + 1, error)) 201 | 202 | if error < self.convergenceThreshold: 203 | break 204 | 205 | rho_t = rho_next 206 | if self.track: 207 | self.track_scores['total'].append(pi_t) 208 | 209 | return pi_t 210 | 211 | def runModel(self, factors=[0.85], Matrix=False, track=False, 212 | TotalRank=False, alpha=1, beta=1, print_every=1000, 213 | kernel='rbf_ap'): 214 | """ 215 | Give a list of damping factors to work with 216 | return a dict: key=(damp factor); value=(scores of each node) 217 | Matrix: use the exact Q or approximated r (True for Q) 218 | track: record the score vector at each iteration during updating 219 | """ 220 | self.Matrix = Matrix 221 | self.track = track 222 | self.print_every = print_every 223 | scores = {} 224 | 225 | if TotalRank: 226 | print("Run AttriRank with prior...") 227 | scores['total'] = list(self.TotalRank(alpha=alpha, beta=beta)) 228 | else: 229 | do = True 230 | doTrans = True 231 | for dampFac in factors: 232 | print("Run AttriRank, damp:", dampFac) 233 | score_vec = self.runPageRank(dampFac, do=do, doTrans=doTrans, 234 | kernel=kernel) 235 | 236 | # already have reset vector and transition matrix 237 | do = False 238 | doTrans = False 239 | scores[str(dampFac)] = list(score_vec) 240 | print("\tDone.") 241 | 242 | self.scores = scores 243 | 244 | return scores 245 | -------------------------------------------------------------------------------- /AttriRank_inC/edge.txt: -------------------------------------------------------------------------------- 1 | 5 95 2 | 5 147 3 | 5 771 4 | 5 878 5 | 8 154 6 | 8 175 7 | 12 47 8 | 12 195 9 | 18 874 10 | 28 929 11 | 32 216 12 | 47 879 13 | 51 94 14 | 58 131 15 | 59 51 16 | 59 94 17 | 59 164 18 | 62 1 19 | 62 3 20 | 62 4 21 | 62 5 22 | 62 6 23 | 62 7 24 | 62 8 25 | 62 9 26 | 62 10 27 | 62 11 28 | 62 12 29 | 62 17 30 | 62 18 31 | 62 19 32 | 62 21 33 | 62 23 34 | 62 24 35 | 62 26 36 | 62 28 37 | 62 32 38 | 62 34 39 | 62 35 40 | 62 36 41 | 62 37 42 | 62 38 43 | 62 39 44 | 62 40 45 | 62 41 46 | 62 42 47 | 62 43 48 | 62 44 49 | 62 45 50 | 62 46 51 | 62 47 52 | 62 48 53 | 62 49 54 | 62 50 55 | 62 51 56 | 62 52 57 | 62 53 58 | 62 54 59 | 62 55 60 | 62 56 61 | 62 57 62 | 62 58 63 | 62 59 64 | 62 60 65 | 62 61 66 | 62 63 67 | 62 64 68 | 62 65 69 | 62 66 70 | 62 67 71 | 62 68 72 | 62 69 73 | 62 70 74 | 62 71 75 | 62 72 76 | 62 74 77 | 62 75 78 | 62 77 79 | 62 78 80 | 62 79 81 | 62 82 82 | 62 88 83 | 62 90 84 | 62 91 85 | 62 92 86 | 62 93 87 | 62 94 88 | 62 95 89 | 62 96 90 | 62 99 91 | 62 100 92 | 62 101 93 | 62 103 94 | 62 104 95 | 62 105 96 | 62 106 97 | 62 107 98 | 62 109 99 | 62 111 100 | 62 112 101 | 62 113 102 | 62 114 103 | 62 115 104 | 62 116 105 | 62 117 106 | 62 118 107 | 62 119 108 | 62 121 109 | 62 123 110 | 62 124 111 | 62 126 112 | 62 127 113 | 62 128 114 | 62 130 115 | 62 131 116 | 62 132 117 | 62 133 118 | 62 135 119 | 62 136 120 | 62 138 121 | 62 142 122 | 62 144 123 | 62 147 124 | 62 148 125 | 62 149 126 | 62 150 127 | 62 151 128 | 62 152 129 | 62 153 130 | 62 154 131 | 62 155 132 | 62 156 133 | 62 157 134 | 62 159 135 | 62 160 136 | 62 161 137 | 62 162 138 | 62 164 139 | 62 165 140 | 62 166 141 | 62 167 142 | 62 169 143 | 62 171 144 | 62 172 145 | 62 173 146 | 62 175 147 | 62 176 148 | 62 178 149 | 62 183 150 | 62 186 151 | 62 187 152 | 62 188 153 | 62 189 154 | 62 190 155 | 62 191 156 | 62 192 157 | 62 193 158 | 62 194 159 | 62 195 160 | 62 196 161 | 62 197 162 | 62 198 163 | 62 199 164 | 62 201 165 | 62 202 166 | 62 203 167 | 62 205 168 | 62 208 169 | 62 209 170 | 62 210 171 | 62 211 172 | 62 212 173 | 62 213 174 | 62 214 175 | 62 215 176 | 62 216 177 | 62 217 178 | 62 218 179 | 62 219 180 | 62 220 181 | 62 222 182 | 62 252 183 | 62 629 184 | 62 771 185 | 62 805 186 | 62 874 187 | 62 879 188 | 62 886 189 | 62 949 190 | 63 21 191 | 63 138 192 | 63 164 193 | 63 213 194 | 71 49 195 | 71 151 196 | 75 169 197 | 75 252 198 | 78 194 199 | 78 212 200 | 82 214 201 | 104 929 202 | 107 18 203 | 112 39 204 | 112 44 205 | 112 57 206 | 112 213 207 | 113 805 208 | 113 874 209 | 113 886 210 | 131 58 211 | 131 128 212 | 138 63 213 | 138 213 214 | 138 886 215 | 140 143 216 | 147 5 217 | 147 95 218 | 147 771 219 | 153 175 220 | 164 214 221 | 166 949 222 | 169 61 223 | 169 75 224 | 169 252 225 | 171 114 226 | 175 153 227 | 189 926 228 | 194 78 229 | 194 212 230 | 196 119 231 | 196 162 232 | 196 805 233 | 196 874 234 | 196 879 235 | 196 886 236 | 199 67 237 | 199 88 238 | 203 150 239 | 215 203 240 | 219 135 241 | 219 201 242 | 224 626 243 | 249 308 244 | 249 349 245 | 249 562 246 | 249 622 247 | 249 669 248 | 249 754 249 | 249 934 250 | 249 990 251 | 251 311 252 | 251 407 253 | 251 510 254 | 251 540 255 | 251 541 256 | 251 593 257 | 251 686 258 | 251 687 259 | 251 780 260 | 251 819 261 | 251 855 262 | 251 860 263 | 251 901 264 | 251 996 265 | 252 169 266 | 283 976 267 | 290 158 268 | 294 557 269 | 303 366 270 | 308 378 271 | 311 251 272 | 311 407 273 | 311 510 274 | 311 540 275 | 311 541 276 | 311 593 277 | 311 686 278 | 311 687 279 | 311 780 280 | 311 819 281 | 311 855 282 | 311 860 283 | 311 901 284 | 311 996 285 | 313 396 286 | 316 676 287 | 316 688 288 | 316 808 289 | 316 913 290 | 318 364 291 | 318 476 292 | 318 519 293 | 318 696 294 | 321 378 295 | 332 378 296 | 352 378 297 | 356 713 298 | 378 308 299 | 378 634 300 | 378 669 301 | 378 707 302 | 378 976 303 | 386 434 304 | 396 313 305 | 400 345 306 | 402 383 307 | 407 251 308 | 407 311 309 | 407 510 310 | 407 540 311 | 407 541 312 | 407 593 313 | 407 686 314 | 407 687 315 | 407 780 316 | 407 819 317 | 407 855 318 | 407 860 319 | 407 901 320 | 407 996 321 | 411 873 322 | 432 433 323 | 433 432 324 | 434 386 325 | 439 441 326 | 439 442 327 | 440 439 328 | 440 441 329 | 440 442 330 | 441 439 331 | 441 442 332 | 442 439 333 | 442 441 334 | 447 720 335 | 448 479 336 | 455 454 337 | 476 318 338 | 479 713 339 | 484 818 340 | 489 928 341 | 510 251 342 | 510 311 343 | 510 407 344 | 510 540 345 | 510 541 346 | 510 593 347 | 510 686 348 | 510 687 349 | 510 780 350 | 510 819 351 | 510 855 352 | 510 860 353 | 510 901 354 | 510 996 355 | 513 404 356 | 519 318 357 | 519 364 358 | 534 535 359 | 540 251 360 | 540 311 361 | 540 407 362 | 540 510 363 | 540 541 364 | 540 593 365 | 540 686 366 | 540 687 367 | 540 780 368 | 540 819 369 | 540 855 370 | 540 860 371 | 540 901 372 | 540 996 373 | 541 251 374 | 541 311 375 | 541 407 376 | 541 510 377 | 541 540 378 | 541 593 379 | 541 686 380 | 541 687 381 | 541 780 382 | 541 819 383 | 541 855 384 | 541 860 385 | 541 901 386 | 541 996 387 | 593 251 388 | 593 311 389 | 593 407 390 | 593 510 391 | 593 540 392 | 593 541 393 | 593 686 394 | 593 687 395 | 593 780 396 | 593 819 397 | 593 855 398 | 593 860 399 | 593 901 400 | 593 996 401 | 601 856 402 | 605 576 403 | 607 308 404 | 607 378 405 | 607 578 406 | 607 611 407 | 608 502 408 | 610 607 409 | 610 611 410 | 611 607 411 | 611 610 412 | 611 714 413 | 620 623 414 | 665 319 415 | 665 479 416 | 667 532 417 | 676 316 418 | 676 688 419 | 676 808 420 | 676 913 421 | 681 621 422 | 686 251 423 | 686 311 424 | 686 407 425 | 686 510 426 | 686 540 427 | 686 541 428 | 686 593 429 | 686 687 430 | 686 780 431 | 686 819 432 | 686 855 433 | 686 860 434 | 686 901 435 | 686 996 436 | 687 251 437 | 687 311 438 | 687 407 439 | 687 510 440 | 687 540 441 | 687 541 442 | 687 593 443 | 687 686 444 | 687 780 445 | 687 819 446 | 687 855 447 | 687 860 448 | 687 901 449 | 687 996 450 | 707 308 451 | 707 378 452 | 707 708 453 | 707 734 454 | 707 735 455 | 720 356 456 | 720 713 457 | 721 725 458 | 722 605 459 | 722 725 460 | 723 622 461 | 725 721 462 | 771 5 463 | 771 95 464 | 771 147 465 | 771 164 466 | 780 251 467 | 780 311 468 | 780 407 469 | 780 510 470 | 780 540 471 | 780 541 472 | 780 593 473 | 780 686 474 | 780 687 475 | 780 819 476 | 780 855 477 | 780 860 478 | 780 901 479 | 780 996 480 | 798 146 481 | 798 873 482 | 802 651 483 | 805 18 484 | 805 874 485 | 813 844 486 | 818 484 487 | 819 251 488 | 819 311 489 | 819 407 490 | 819 510 491 | 819 540 492 | 819 541 493 | 819 593 494 | 819 686 495 | 819 687 496 | 819 780 497 | 819 855 498 | 819 860 499 | 819 901 500 | 819 996 501 | 832 831 502 | 855 251 503 | 855 311 504 | 855 407 505 | 855 510 506 | 855 540 507 | 855 541 508 | 855 593 509 | 855 686 510 | 855 687 511 | 855 780 512 | 855 819 513 | 855 860 514 | 855 901 515 | 855 996 516 | 856 601 517 | 860 251 518 | 860 311 519 | 860 407 520 | 860 510 521 | 860 540 522 | 860 541 523 | 860 593 524 | 860 686 525 | 860 687 526 | 860 780 527 | 860 819 528 | 860 855 529 | 860 901 530 | 860 996 531 | 861 862 532 | 862 861 533 | 874 18 534 | 874 805 535 | 874 886 536 | 878 21 537 | 878 23 538 | 878 28 539 | 878 35 540 | 878 45 541 | 878 59 542 | 878 62 543 | 878 68 544 | 878 82 545 | 878 92 546 | 878 93 547 | 878 114 548 | 878 153 549 | 878 164 550 | 878 166 551 | 878 187 552 | 878 189 553 | 878 194 554 | 878 219 555 | 878 771 556 | 878 886 557 | 886 805 558 | 886 874 559 | 899 321 560 | 899 474 561 | 901 251 562 | 901 311 563 | 901 407 564 | 901 510 565 | 901 540 566 | 901 541 567 | 901 593 568 | 901 686 569 | 901 687 570 | 901 780 571 | 901 819 572 | 901 855 573 | 901 860 574 | 901 996 575 | 911 912 576 | 912 665 577 | 912 911 578 | 921 922 579 | 921 923 580 | 921 924 581 | 921 925 582 | 921 926 583 | 921 927 584 | 921 928 585 | 921 929 586 | 921 930 587 | 921 931 588 | 921 932 589 | 921 933 590 | 921 934 591 | 921 935 592 | 921 936 593 | 921 937 594 | 921 938 595 | 921 939 596 | 921 940 597 | 921 941 598 | 922 921 599 | 922 923 600 | 922 924 601 | 922 925 602 | 922 926 603 | 922 927 604 | 922 928 605 | 922 929 606 | 922 931 607 | 922 932 608 | 922 933 609 | 922 934 610 | 922 935 611 | 922 936 612 | 922 937 613 | 922 938 614 | 922 939 615 | 922 940 616 | 922 941 617 | 923 489 618 | 923 921 619 | 923 922 620 | 923 924 621 | 923 925 622 | 923 926 623 | 923 927 624 | 923 928 625 | 923 929 626 | 923 931 627 | 923 932 628 | 923 933 629 | 923 934 630 | 923 935 631 | 923 936 632 | 923 937 633 | 923 938 634 | 923 939 635 | 923 940 636 | 923 941 637 | 924 921 638 | 924 922 639 | 924 923 640 | 924 925 641 | 924 926 642 | 924 927 643 | 924 928 644 | 924 929 645 | 924 931 646 | 924 932 647 | 924 933 648 | 924 934 649 | 924 935 650 | 924 936 651 | 924 937 652 | 924 938 653 | 924 939 654 | 924 940 655 | 924 941 656 | 924 942 657 | 925 921 658 | 925 922 659 | 925 923 660 | 925 924 661 | 925 926 662 | 925 927 663 | 925 928 664 | 925 929 665 | 925 931 666 | 925 932 667 | 925 933 668 | 925 934 669 | 925 935 670 | 925 936 671 | 925 937 672 | 925 938 673 | 925 939 674 | 925 940 675 | 925 941 676 | 926 921 677 | 926 922 678 | 926 923 679 | 926 924 680 | 926 925 681 | 926 927 682 | 926 928 683 | 926 929 684 | 926 931 685 | 926 932 686 | 926 933 687 | 926 934 688 | 926 935 689 | 926 936 690 | 926 937 691 | 926 938 692 | 926 939 693 | 926 940 694 | 926 941 695 | 927 921 696 | 927 922 697 | 927 923 698 | 927 924 699 | 927 925 700 | 927 926 701 | 927 928 702 | 927 929 703 | 927 931 704 | 927 932 705 | 927 933 706 | 927 934 707 | 927 935 708 | 927 936 709 | 927 937 710 | 927 938 711 | 927 939 712 | 927 940 713 | 927 941 714 | 928 921 715 | 928 922 716 | 928 923 717 | 928 924 718 | 928 925 719 | 928 926 720 | 928 927 721 | 928 929 722 | 928 931 723 | 928 932 724 | 928 933 725 | 928 934 726 | 928 935 727 | 928 936 728 | 928 937 729 | 928 938 730 | 928 939 731 | 928 940 732 | 928 941 733 | 929 28 734 | 929 69 735 | 929 102 736 | 929 104 737 | 929 157 738 | 929 896 739 | 929 921 740 | 929 922 741 | 929 923 742 | 929 924 743 | 929 925 744 | 929 926 745 | 929 927 746 | 929 928 747 | 929 931 748 | 929 932 749 | 929 933 750 | 929 934 751 | 929 935 752 | 929 936 753 | 929 937 754 | 929 938 755 | 929 939 756 | 929 940 757 | 929 941 758 | 930 921 759 | 930 922 760 | 930 923 761 | 930 924 762 | 930 925 763 | 930 926 764 | 930 927 765 | 930 928 766 | 930 929 767 | 930 931 768 | 930 932 769 | 930 933 770 | 930 934 771 | 930 935 772 | 930 936 773 | 930 937 774 | 930 938 775 | 930 939 776 | 930 940 777 | 930 941 778 | 931 921 779 | 931 922 780 | 931 923 781 | 931 924 782 | 931 925 783 | 931 926 784 | 931 927 785 | 931 928 786 | 931 929 787 | 931 932 788 | 931 933 789 | 931 934 790 | 931 935 791 | 931 936 792 | 931 937 793 | 931 938 794 | 931 939 795 | 931 940 796 | 931 941 797 | 932 921 798 | 932 922 799 | 932 923 800 | 932 924 801 | 932 925 802 | 932 926 803 | 932 927 804 | 932 928 805 | 932 929 806 | 932 931 807 | 932 933 808 | 932 934 809 | 932 935 810 | 932 936 811 | 932 937 812 | 932 938 813 | 932 939 814 | 932 940 815 | 932 941 816 | 933 921 817 | 933 922 818 | 933 923 819 | 933 924 820 | 933 925 821 | 933 926 822 | 933 927 823 | 933 928 824 | 933 929 825 | 933 931 826 | 933 932 827 | 933 934 828 | 933 935 829 | 933 936 830 | 933 937 831 | 933 938 832 | 933 939 833 | 933 940 834 | 933 941 835 | 934 169 836 | 934 252 837 | 934 921 838 | 934 922 839 | 934 923 840 | 934 924 841 | 934 925 842 | 934 926 843 | 934 927 844 | 934 928 845 | 934 929 846 | 934 931 847 | 934 932 848 | 934 933 849 | 934 935 850 | 934 936 851 | 934 937 852 | 934 938 853 | 934 939 854 | 934 940 855 | 934 941 856 | 935 809 857 | 935 921 858 | 935 922 859 | 935 923 860 | 935 924 861 | 935 925 862 | 935 926 863 | 935 927 864 | 935 928 865 | 935 929 866 | 935 931 867 | 935 932 868 | 935 933 869 | 935 934 870 | 935 936 871 | 935 937 872 | 935 938 873 | 935 939 874 | 935 940 875 | 935 941 876 | 936 921 877 | 936 922 878 | 936 923 879 | 936 924 880 | 936 925 881 | 936 926 882 | 936 927 883 | 936 928 884 | 936 929 885 | 936 931 886 | 936 932 887 | 936 933 888 | 936 934 889 | 936 935 890 | 936 937 891 | 936 938 892 | 936 939 893 | 936 940 894 | 936 941 895 | 937 921 896 | 937 922 897 | 937 923 898 | 937 924 899 | 937 925 900 | 937 926 901 | 937 927 902 | 937 928 903 | 937 929 904 | 937 931 905 | 937 932 906 | 937 933 907 | 937 934 908 | 937 935 909 | 937 936 910 | 937 938 911 | 937 939 912 | 937 940 913 | 937 941 914 | 938 79 915 | 938 921 916 | 938 922 917 | 938 923 918 | 938 924 919 | 938 925 920 | 938 926 921 | 938 927 922 | 938 928 923 | 938 929 924 | 938 931 925 | 938 932 926 | 938 933 927 | 938 934 928 | 938 935 929 | 938 936 930 | 938 937 931 | 938 939 932 | 938 940 933 | 938 941 934 | 939 921 935 | 939 922 936 | 939 923 937 | 939 924 938 | 939 925 939 | 939 926 940 | 939 927 941 | 939 928 942 | 939 929 943 | 939 931 944 | 939 932 945 | 939 933 946 | 939 934 947 | 939 935 948 | 939 936 949 | 939 937 950 | 939 938 951 | 939 940 952 | 939 941 953 | 940 921 954 | 940 922 955 | 940 923 956 | 940 924 957 | 940 925 958 | 940 926 959 | 940 927 960 | 940 928 961 | 940 929 962 | 940 931 963 | 940 932 964 | 940 933 965 | 940 934 966 | 940 935 967 | 940 936 968 | 940 937 969 | 940 938 970 | 940 939 971 | 940 941 972 | 941 921 973 | 941 922 974 | 941 923 975 | 941 924 976 | 941 925 977 | 941 926 978 | 941 927 979 | 941 928 980 | 941 929 981 | 941 931 982 | 941 932 983 | 941 933 984 | 941 934 985 | 941 935 986 | 941 936 987 | 941 937 988 | 941 938 989 | 941 939 990 | 941 940 991 | 953 954 992 | 954 953 993 | 964 146 994 | 964 479 995 | 990 140 996 | 990 249 997 | 990 308 998 | 990 319 999 | 990 326 1000 | 990 543 1001 | 990 579 1002 | 990 669 1003 | 990 754 1004 | 990 934 1005 | 992 274 1006 | 996 251 1007 | 996 311 1008 | 996 407 1009 | 996 510 1010 | 996 540 1011 | 996 541 1012 | 996 593 1013 | 996 686 1014 | 996 687 1015 | 996 780 1016 | 996 819 1017 | 996 855 1018 | 996 860 1019 | 996 901 1020 | -------------------------------------------------------------------------------- /sample/graph.edgelist: -------------------------------------------------------------------------------- 1 | 5 95 2 | 5 147 3 | 5 771 4 | 5 878 5 | 8 154 6 | 8 175 7 | 12 47 8 | 12 195 9 | 18 874 10 | 28 929 11 | 32 216 12 | 47 879 13 | 51 94 14 | 58 131 15 | 59 51 16 | 59 94 17 | 59 164 18 | 62 1 19 | 62 3 20 | 62 4 21 | 62 5 22 | 62 6 23 | 62 7 24 | 62 8 25 | 62 9 26 | 62 10 27 | 62 11 28 | 62 12 29 | 62 17 30 | 62 18 31 | 62 19 32 | 62 21 33 | 62 23 34 | 62 24 35 | 62 26 36 | 62 28 37 | 62 32 38 | 62 34 39 | 62 35 40 | 62 36 41 | 62 37 42 | 62 38 43 | 62 39 44 | 62 40 45 | 62 41 46 | 62 42 47 | 62 43 48 | 62 44 49 | 62 45 50 | 62 46 51 | 62 47 52 | 62 48 53 | 62 49 54 | 62 50 55 | 62 51 56 | 62 52 57 | 62 53 58 | 62 54 59 | 62 55 60 | 62 56 61 | 62 57 62 | 62 58 63 | 62 59 64 | 62 60 65 | 62 61 66 | 62 63 67 | 62 64 68 | 62 65 69 | 62 66 70 | 62 67 71 | 62 68 72 | 62 69 73 | 62 70 74 | 62 71 75 | 62 72 76 | 62 74 77 | 62 75 78 | 62 77 79 | 62 78 80 | 62 79 81 | 62 82 82 | 62 88 83 | 62 90 84 | 62 91 85 | 62 92 86 | 62 93 87 | 62 94 88 | 62 95 89 | 62 96 90 | 62 99 91 | 62 100 92 | 62 101 93 | 62 103 94 | 62 104 95 | 62 105 96 | 62 106 97 | 62 107 98 | 62 109 99 | 62 111 100 | 62 112 101 | 62 113 102 | 62 114 103 | 62 115 104 | 62 116 105 | 62 117 106 | 62 118 107 | 62 119 108 | 62 121 109 | 62 123 110 | 62 124 111 | 62 126 112 | 62 127 113 | 62 128 114 | 62 130 115 | 62 131 116 | 62 132 117 | 62 133 118 | 62 135 119 | 62 136 120 | 62 138 121 | 62 142 122 | 62 144 123 | 62 147 124 | 62 148 125 | 62 149 126 | 62 150 127 | 62 151 128 | 62 152 129 | 62 153 130 | 62 154 131 | 62 155 132 | 62 156 133 | 62 157 134 | 62 159 135 | 62 160 136 | 62 161 137 | 62 162 138 | 62 164 139 | 62 165 140 | 62 166 141 | 62 167 142 | 62 169 143 | 62 171 144 | 62 172 145 | 62 173 146 | 62 175 147 | 62 176 148 | 62 178 149 | 62 183 150 | 62 186 151 | 62 187 152 | 62 188 153 | 62 189 154 | 62 190 155 | 62 191 156 | 62 192 157 | 62 193 158 | 62 194 159 | 62 195 160 | 62 196 161 | 62 197 162 | 62 198 163 | 62 199 164 | 62 201 165 | 62 202 166 | 62 203 167 | 62 205 168 | 62 208 169 | 62 209 170 | 62 210 171 | 62 211 172 | 62 212 173 | 62 213 174 | 62 214 175 | 62 215 176 | 62 216 177 | 62 217 178 | 62 218 179 | 62 219 180 | 62 220 181 | 62 222 182 | 62 252 183 | 62 629 184 | 62 771 185 | 62 805 186 | 62 874 187 | 62 879 188 | 62 886 189 | 62 949 190 | 63 21 191 | 63 138 192 | 63 164 193 | 63 213 194 | 71 49 195 | 71 151 196 | 75 169 197 | 75 252 198 | 78 194 199 | 78 212 200 | 82 214 201 | 104 929 202 | 107 18 203 | 112 39 204 | 112 44 205 | 112 57 206 | 112 213 207 | 113 805 208 | 113 874 209 | 113 886 210 | 131 58 211 | 131 128 212 | 138 63 213 | 138 213 214 | 138 886 215 | 140 143 216 | 147 5 217 | 147 95 218 | 147 771 219 | 153 175 220 | 164 214 221 | 166 949 222 | 169 61 223 | 169 75 224 | 169 252 225 | 171 114 226 | 175 153 227 | 189 926 228 | 194 78 229 | 194 212 230 | 196 119 231 | 196 162 232 | 196 805 233 | 196 874 234 | 196 879 235 | 196 886 236 | 199 67 237 | 199 88 238 | 203 150 239 | 215 203 240 | 219 135 241 | 219 201 242 | 224 626 243 | 249 308 244 | 249 349 245 | 249 562 246 | 249 622 247 | 249 669 248 | 249 754 249 | 249 934 250 | 249 990 251 | 251 311 252 | 251 407 253 | 251 510 254 | 251 540 255 | 251 541 256 | 251 593 257 | 251 686 258 | 251 687 259 | 251 780 260 | 251 819 261 | 251 855 262 | 251 860 263 | 251 901 264 | 251 996 265 | 252 169 266 | 283 976 267 | 290 158 268 | 294 557 269 | 303 366 270 | 308 378 271 | 311 251 272 | 311 407 273 | 311 510 274 | 311 540 275 | 311 541 276 | 311 593 277 | 311 686 278 | 311 687 279 | 311 780 280 | 311 819 281 | 311 855 282 | 311 860 283 | 311 901 284 | 311 996 285 | 313 396 286 | 316 676 287 | 316 688 288 | 316 808 289 | 316 913 290 | 318 364 291 | 318 476 292 | 318 519 293 | 318 696 294 | 321 378 295 | 332 378 296 | 352 378 297 | 356 713 298 | 378 308 299 | 378 634 300 | 378 669 301 | 378 707 302 | 378 976 303 | 386 434 304 | 396 313 305 | 400 345 306 | 402 383 307 | 407 251 308 | 407 311 309 | 407 510 310 | 407 540 311 | 407 541 312 | 407 593 313 | 407 686 314 | 407 687 315 | 407 780 316 | 407 819 317 | 407 855 318 | 407 860 319 | 407 901 320 | 407 996 321 | 411 873 322 | 432 433 323 | 433 432 324 | 434 386 325 | 439 441 326 | 439 442 327 | 440 439 328 | 440 441 329 | 440 442 330 | 441 439 331 | 441 442 332 | 442 439 333 | 442 441 334 | 447 720 335 | 448 479 336 | 455 454 337 | 476 318 338 | 479 713 339 | 484 818 340 | 489 928 341 | 510 251 342 | 510 311 343 | 510 407 344 | 510 540 345 | 510 541 346 | 510 593 347 | 510 686 348 | 510 687 349 | 510 780 350 | 510 819 351 | 510 855 352 | 510 860 353 | 510 901 354 | 510 996 355 | 513 404 356 | 519 318 357 | 519 364 358 | 534 535 359 | 540 251 360 | 540 311 361 | 540 407 362 | 540 510 363 | 540 541 364 | 540 593 365 | 540 686 366 | 540 687 367 | 540 780 368 | 540 819 369 | 540 855 370 | 540 860 371 | 540 901 372 | 540 996 373 | 541 251 374 | 541 311 375 | 541 407 376 | 541 510 377 | 541 540 378 | 541 593 379 | 541 686 380 | 541 687 381 | 541 780 382 | 541 819 383 | 541 855 384 | 541 860 385 | 541 901 386 | 541 996 387 | 593 251 388 | 593 311 389 | 593 407 390 | 593 510 391 | 593 540 392 | 593 541 393 | 593 686 394 | 593 687 395 | 593 780 396 | 593 819 397 | 593 855 398 | 593 860 399 | 593 901 400 | 593 996 401 | 601 856 402 | 605 576 403 | 607 308 404 | 607 378 405 | 607 578 406 | 607 611 407 | 608 502 408 | 610 607 409 | 610 611 410 | 611 607 411 | 611 610 412 | 611 714 413 | 620 623 414 | 665 319 415 | 665 479 416 | 667 532 417 | 676 316 418 | 676 688 419 | 676 808 420 | 676 913 421 | 681 621 422 | 686 251 423 | 686 311 424 | 686 407 425 | 686 510 426 | 686 540 427 | 686 541 428 | 686 593 429 | 686 687 430 | 686 780 431 | 686 819 432 | 686 855 433 | 686 860 434 | 686 901 435 | 686 996 436 | 687 251 437 | 687 311 438 | 687 407 439 | 687 510 440 | 687 540 441 | 687 541 442 | 687 593 443 | 687 686 444 | 687 780 445 | 687 819 446 | 687 855 447 | 687 860 448 | 687 901 449 | 687 996 450 | 707 308 451 | 707 378 452 | 707 708 453 | 707 734 454 | 707 735 455 | 720 356 456 | 720 713 457 | 721 725 458 | 722 605 459 | 722 725 460 | 723 622 461 | 725 721 462 | 771 5 463 | 771 95 464 | 771 147 465 | 771 164 466 | 780 251 467 | 780 311 468 | 780 407 469 | 780 510 470 | 780 540 471 | 780 541 472 | 780 593 473 | 780 686 474 | 780 687 475 | 780 819 476 | 780 855 477 | 780 860 478 | 780 901 479 | 780 996 480 | 798 146 481 | 798 873 482 | 802 651 483 | 805 18 484 | 805 874 485 | 813 844 486 | 818 484 487 | 819 251 488 | 819 311 489 | 819 407 490 | 819 510 491 | 819 540 492 | 819 541 493 | 819 593 494 | 819 686 495 | 819 687 496 | 819 780 497 | 819 855 498 | 819 860 499 | 819 901 500 | 819 996 501 | 832 831 502 | 855 251 503 | 855 311 504 | 855 407 505 | 855 510 506 | 855 540 507 | 855 541 508 | 855 593 509 | 855 686 510 | 855 687 511 | 855 780 512 | 855 819 513 | 855 860 514 | 855 901 515 | 855 996 516 | 856 601 517 | 860 251 518 | 860 311 519 | 860 407 520 | 860 510 521 | 860 540 522 | 860 541 523 | 860 593 524 | 860 686 525 | 860 687 526 | 860 780 527 | 860 819 528 | 860 855 529 | 860 901 530 | 860 996 531 | 861 862 532 | 862 861 533 | 874 18 534 | 874 805 535 | 874 886 536 | 878 21 537 | 878 23 538 | 878 28 539 | 878 35 540 | 878 45 541 | 878 59 542 | 878 62 543 | 878 68 544 | 878 82 545 | 878 92 546 | 878 93 547 | 878 114 548 | 878 153 549 | 878 164 550 | 878 166 551 | 878 187 552 | 878 189 553 | 878 194 554 | 878 219 555 | 878 771 556 | 878 886 557 | 886 805 558 | 886 874 559 | 899 321 560 | 899 474 561 | 901 251 562 | 901 311 563 | 901 407 564 | 901 510 565 | 901 540 566 | 901 541 567 | 901 593 568 | 901 686 569 | 901 687 570 | 901 780 571 | 901 819 572 | 901 855 573 | 901 860 574 | 901 996 575 | 911 912 576 | 912 665 577 | 912 911 578 | 921 922 579 | 921 923 580 | 921 924 581 | 921 925 582 | 921 926 583 | 921 927 584 | 921 928 585 | 921 929 586 | 921 930 587 | 921 931 588 | 921 932 589 | 921 933 590 | 921 934 591 | 921 935 592 | 921 936 593 | 921 937 594 | 921 938 595 | 921 939 596 | 921 940 597 | 921 941 598 | 922 921 599 | 922 923 600 | 922 924 601 | 922 925 602 | 922 926 603 | 922 927 604 | 922 928 605 | 922 929 606 | 922 931 607 | 922 932 608 | 922 933 609 | 922 934 610 | 922 935 611 | 922 936 612 | 922 937 613 | 922 938 614 | 922 939 615 | 922 940 616 | 922 941 617 | 923 489 618 | 923 921 619 | 923 922 620 | 923 924 621 | 923 925 622 | 923 926 623 | 923 927 624 | 923 928 625 | 923 929 626 | 923 931 627 | 923 932 628 | 923 933 629 | 923 934 630 | 923 935 631 | 923 936 632 | 923 937 633 | 923 938 634 | 923 939 635 | 923 940 636 | 923 941 637 | 924 921 638 | 924 922 639 | 924 923 640 | 924 925 641 | 924 926 642 | 924 927 643 | 924 928 644 | 924 929 645 | 924 931 646 | 924 932 647 | 924 933 648 | 924 934 649 | 924 935 650 | 924 936 651 | 924 937 652 | 924 938 653 | 924 939 654 | 924 940 655 | 924 941 656 | 924 942 657 | 925 921 658 | 925 922 659 | 925 923 660 | 925 924 661 | 925 926 662 | 925 927 663 | 925 928 664 | 925 929 665 | 925 931 666 | 925 932 667 | 925 933 668 | 925 934 669 | 925 935 670 | 925 936 671 | 925 937 672 | 925 938 673 | 925 939 674 | 925 940 675 | 925 941 676 | 926 921 677 | 926 922 678 | 926 923 679 | 926 924 680 | 926 925 681 | 926 927 682 | 926 928 683 | 926 929 684 | 926 931 685 | 926 932 686 | 926 933 687 | 926 934 688 | 926 935 689 | 926 936 690 | 926 937 691 | 926 938 692 | 926 939 693 | 926 940 694 | 926 941 695 | 927 921 696 | 927 922 697 | 927 923 698 | 927 924 699 | 927 925 700 | 927 926 701 | 927 928 702 | 927 929 703 | 927 931 704 | 927 932 705 | 927 933 706 | 927 934 707 | 927 935 708 | 927 936 709 | 927 937 710 | 927 938 711 | 927 939 712 | 927 940 713 | 927 941 714 | 928 921 715 | 928 922 716 | 928 923 717 | 928 924 718 | 928 925 719 | 928 926 720 | 928 927 721 | 928 929 722 | 928 931 723 | 928 932 724 | 928 933 725 | 928 934 726 | 928 935 727 | 928 936 728 | 928 937 729 | 928 938 730 | 928 939 731 | 928 940 732 | 928 941 733 | 929 28 734 | 929 69 735 | 929 102 736 | 929 104 737 | 929 157 738 | 929 896 739 | 929 921 740 | 929 922 741 | 929 923 742 | 929 924 743 | 929 925 744 | 929 926 745 | 929 927 746 | 929 928 747 | 929 931 748 | 929 932 749 | 929 933 750 | 929 934 751 | 929 935 752 | 929 936 753 | 929 937 754 | 929 938 755 | 929 939 756 | 929 940 757 | 929 941 758 | 930 921 759 | 930 922 760 | 930 923 761 | 930 924 762 | 930 925 763 | 930 926 764 | 930 927 765 | 930 928 766 | 930 929 767 | 930 931 768 | 930 932 769 | 930 933 770 | 930 934 771 | 930 935 772 | 930 936 773 | 930 937 774 | 930 938 775 | 930 939 776 | 930 940 777 | 930 941 778 | 931 921 779 | 931 922 780 | 931 923 781 | 931 924 782 | 931 925 783 | 931 926 784 | 931 927 785 | 931 928 786 | 931 929 787 | 931 932 788 | 931 933 789 | 931 934 790 | 931 935 791 | 931 936 792 | 931 937 793 | 931 938 794 | 931 939 795 | 931 940 796 | 931 941 797 | 932 921 798 | 932 922 799 | 932 923 800 | 932 924 801 | 932 925 802 | 932 926 803 | 932 927 804 | 932 928 805 | 932 929 806 | 932 931 807 | 932 933 808 | 932 934 809 | 932 935 810 | 932 936 811 | 932 937 812 | 932 938 813 | 932 939 814 | 932 940 815 | 932 941 816 | 933 921 817 | 933 922 818 | 933 923 819 | 933 924 820 | 933 925 821 | 933 926 822 | 933 927 823 | 933 928 824 | 933 929 825 | 933 931 826 | 933 932 827 | 933 934 828 | 933 935 829 | 933 936 830 | 933 937 831 | 933 938 832 | 933 939 833 | 933 940 834 | 933 941 835 | 934 169 836 | 934 252 837 | 934 921 838 | 934 922 839 | 934 923 840 | 934 924 841 | 934 925 842 | 934 926 843 | 934 927 844 | 934 928 845 | 934 929 846 | 934 931 847 | 934 932 848 | 934 933 849 | 934 935 850 | 934 936 851 | 934 937 852 | 934 938 853 | 934 939 854 | 934 940 855 | 934 941 856 | 935 809 857 | 935 921 858 | 935 922 859 | 935 923 860 | 935 924 861 | 935 925 862 | 935 926 863 | 935 927 864 | 935 928 865 | 935 929 866 | 935 931 867 | 935 932 868 | 935 933 869 | 935 934 870 | 935 936 871 | 935 937 872 | 935 938 873 | 935 939 874 | 935 940 875 | 935 941 876 | 936 921 877 | 936 922 878 | 936 923 879 | 936 924 880 | 936 925 881 | 936 926 882 | 936 927 883 | 936 928 884 | 936 929 885 | 936 931 886 | 936 932 887 | 936 933 888 | 936 934 889 | 936 935 890 | 936 937 891 | 936 938 892 | 936 939 893 | 936 940 894 | 936 941 895 | 937 921 896 | 937 922 897 | 937 923 898 | 937 924 899 | 937 925 900 | 937 926 901 | 937 927 902 | 937 928 903 | 937 929 904 | 937 931 905 | 937 932 906 | 937 933 907 | 937 934 908 | 937 935 909 | 937 936 910 | 937 938 911 | 937 939 912 | 937 940 913 | 937 941 914 | 938 79 915 | 938 921 916 | 938 922 917 | 938 923 918 | 938 924 919 | 938 925 920 | 938 926 921 | 938 927 922 | 938 928 923 | 938 929 924 | 938 931 925 | 938 932 926 | 938 933 927 | 938 934 928 | 938 935 929 | 938 936 930 | 938 937 931 | 938 939 932 | 938 940 933 | 938 941 934 | 939 921 935 | 939 922 936 | 939 923 937 | 939 924 938 | 939 925 939 | 939 926 940 | 939 927 941 | 939 928 942 | 939 929 943 | 939 931 944 | 939 932 945 | 939 933 946 | 939 934 947 | 939 935 948 | 939 936 949 | 939 937 950 | 939 938 951 | 939 940 952 | 939 941 953 | 940 921 954 | 940 922 955 | 940 923 956 | 940 924 957 | 940 925 958 | 940 926 959 | 940 927 960 | 940 928 961 | 940 929 962 | 940 931 963 | 940 932 964 | 940 933 965 | 940 934 966 | 940 935 967 | 940 936 968 | 940 937 969 | 940 938 970 | 940 939 971 | 940 941 972 | 941 921 973 | 941 922 974 | 941 923 975 | 941 924 976 | 941 925 977 | 941 926 978 | 941 927 979 | 941 928 980 | 941 929 981 | 941 931 982 | 941 932 983 | 941 933 984 | 941 934 985 | 941 935 986 | 941 936 987 | 941 937 988 | 941 938 989 | 941 939 990 | 941 940 991 | 953 954 992 | 954 953 993 | 964 146 994 | 964 479 995 | 990 140 996 | 990 249 997 | 990 308 998 | 990 319 999 | 990 326 1000 | 990 543 1001 | 990 579 1002 | 990 669 1003 | 990 754 1004 | 990 934 1005 | 992 274 1006 | 996 251 1007 | 996 311 1008 | 996 407 1009 | 996 510 1010 | 996 540 1011 | 996 541 1012 | 996 593 1013 | 996 686 1014 | 996 687 1015 | 996 780 1016 | 996 819 1017 | 996 855 1018 | 996 860 1019 | 996 901 1020 | -------------------------------------------------------------------------------- /AttriRank_inC/AttriRank.cpp: -------------------------------------------------------------------------------- 1 | /* ========================================================================================================= 2 | 3 | C++ Implementation of AttriRank 4 | Author: Ming-Han Feng 5 | 6 | for more details, please refer to the paper: 7 | Unsupervised Ranking using Graph Structures and Node Attributes 8 | Chin-Chi Hsu, Yi-An Lai, Wen-Hao Chen, Ming-Han Feng, and Shou-De Lin 9 | Web Search and Data Mining (WSDM), 2017 10 | 11 | === Requirements: g++ === 12 | compile: g++ -std=c++11 -O2 AttriRank.cpp -o AttriRank 13 | usage: AttriRank EdgeFile AttriFile [options] 14 | 15 | << options >> 16 | --unweighted (none) graph is unweighted (default: weighted) 17 | --undirected (none) graph is undirected (default: directed) 18 | -k, --kernel [rbf_ap|rbf|cosine] kernel used in AttriRank (default: rbf_ap) 19 | -i, --iter [MaximumIterations] maximum number of iterations in power method (default: 100) 20 | -c, --conv [ConvergenceThreshold] the convergence threshold in power method (default: 1.0e-6) 21 | -d, --damp [start,step,end] damping factor (default: 0.0,0.2,1.0) 22 | -t, --total [alpha,beta] TotalRank parameters (default: 1,1) 23 | 24 | e.g. AttriRank graph.edge graph.attri -d 0.7,0.02,0.9 -t 1e-9 --unweighted 25 | e.g. AttriRank edge.txt attri.txt --undirected -i 200 -k rbf 26 | 27 | === EdgeFile format === 28 | EachLine: NodeFromID NodeToID (weight) 29 | Note: the weight is set to 1.0 in weighted version if there is no third value provided in a line 30 | 31 | e.g. 0 1 32 | 2 3 33 | 2 4 34 | e.g. 1 2 0.1 35 | 3 0 0.5 36 | 3 1 3 37 | 38 | === AttriFile format === 39 | FirstLine: AttributesCount 40 | Remaining: NodeID AttriIndex:AttriValue ... 41 | Note: unspecified entries will be set to 0.0 42 | 43 | e.g. 1606 44 | 41407 34:1 33:1 32:1 31:1 27:1 28:8 29:1 30:1 45 | 41380 17:240 16:1 114:2 8:2250 7:1 14:1 60:1 0:1 121:1 120:3 15:35 61:2 9:1 12:12 13:1 46 | e.g. 234 47 | 2 5:0.85 6:-1.43 7:1.84 8:5.64 10:9.27 11:9.18 48 | 1 0:1.79 1:1.79 2:0.00 3:0.00 4:1.00 5:1.00 6:-2.83 49 | 50 | === Output format === 51 | FileName: attrirank_(DampingFactor).txt / attrirank_total.txt 52 | EachLine: NodeID AttriRankScore 53 | 54 | === Miscellaneous === 55 | This implementation uses L1-Norm to check convergence, NodeCount * ConvergenceThreshold. 56 | 57 | ========================================================================================================= */ 58 | 59 | #include 60 | #include 61 | #include 62 | #include 63 | #include 64 | #include 65 | #include 66 | #include 67 | #define MAXLINELEN 8000 68 | #define MAXPATHLEN 160 69 | 70 | struct Node { 71 | static std::forward_list dangle; 72 | static std::forward_list normal; 73 | static std::valarray piNew; 74 | static std::valarray piOld; 75 | int id; 76 | std::unordered_map outedge; 77 | std::valarray attriVector; 78 | double *xOld; 79 | double *xNew; 80 | 81 | Node(int &i, int &count, std::forward_list &nodeList): id(i) { 82 | attriVector = std::valarray(0.0, count); 83 | nodeList.push_front(this); 84 | } 85 | ~Node() {} 86 | void setTransition(void) { 87 | xOld = &piOld[id]; 88 | xNew = &piNew[id]; 89 | if (outedge.empty()) { 90 | dangle.push_front(this); 91 | } else { 92 | normal.push_front(this); 93 | double sum = 0.0; 94 | for (auto &x: outedge) 95 | sum += x.second; 96 | for (auto &x: outedge) 97 | x.second /= sum; 98 | } 99 | return; 100 | } 101 | }; 102 | std::forward_list Node::dangle; 103 | std::forward_list Node::normal; 104 | std::valarray Node::piNew; 105 | std::valarray Node::piOld; 106 | 107 | void runAttriRank(const std::valarray &provec, const double &damp, const int &maxiter, const double threshold) { 108 | printf("\tDampingFactor: %.2f\n", damp); 109 | if (damp == 0.0) { 110 | Node::piNew = provec; 111 | return; 112 | } 113 | Node::piOld = 1.0 / static_cast(provec.size()); 114 | for (int iteration = 1; iteration <= maxiter; ++iteration) { 115 | const double&& dangleSum = [] () { double s = 0.0; for (Node *v: Node::dangle) s += *v->xOld; return s; } (); 116 | Node::piNew = (dangleSum * damp + (1.0 - damp)) * provec; 117 | for (Node *v: Node::normal) { 118 | const double&& dampScore = damp * *v->xOld; 119 | for (auto &x: v->outedge) 120 | *x.first->xNew += x.second * dampScore; 121 | } 122 | const double&& err = std::abs(Node::piNew - Node::piOld).sum(); 123 | if (err < threshold) return; 124 | Node::piOld = Node::piNew; 125 | } 126 | printf("\t\tfailed to converge in %d iterations.\n", maxiter); 127 | return; 128 | } 129 | 130 | void runTotalRank(const std::valarray &provec, const int &alpha, const int &beta, const int &maxiter, const double threshold) { 131 | printf("\tTotalRank: (alpha=%d, beta=%d)\n", alpha, beta); 132 | // Node::piOld is used as pho_current in this approach. 133 | Node::piNew = Node::piOld = (static_cast(beta) / static_cast(alpha + beta)) * provec; 134 | for (int iteration = 1; iteration <= maxiter; ++iteration) { 135 | const double&& dangleSum = [] () { double s = 0.0; for (Node *v: Node::dangle) s += *v->xOld; return s; } (); 136 | std::valarray&& pho = dangleSum * provec; 137 | for (Node *v: Node::normal) { 138 | for (auto &x: v->outedge) 139 | pho[x.first->id] += x.second * *v->xOld; 140 | } 141 | pho *= static_cast(iteration + alpha - 1) / static_cast(iteration + alpha + beta); 142 | Node::piNew += pho; 143 | const double&& err = pho.sum(); 144 | if (err < threshold) return; 145 | Node::piOld = pho; 146 | } 147 | printf("\t\tfailed to converge in %d iterations.\n", maxiter); 148 | return; 149 | } 150 | 151 | void outputFile(const char *fileName, std::forward_list &nodeList) { 152 | nodeList.sort([] (Node *a, Node *b) { return (*a->xNew > *b->xNew); }); 153 | FILE *fp = fopen(fileName, "w"); 154 | for (Node *v: nodeList) 155 | fprintf(fp, "%d %e\n", v->id, *v->xNew); 156 | fclose(fp); 157 | return; 158 | } 159 | 160 | inline int wrongFormat(char *opt) { 161 | printf(">>> option '%s' needs parameter(s)\n", opt); 162 | return 0; 163 | } 164 | 165 | int main(int argc, char **argv) { 166 | if (argc < 3) { 167 | printf(">>> The program needs at least 2 arguments: EdgeFile & AttriFile\n"); 168 | return 0; 169 | } 170 | char argKernel[MAXPATHLEN] = "rbf_ap"; 171 | bool unweighted = false; 172 | bool undirected = false; 173 | double converg = 1.0e-6; 174 | double damp[3] = { 0.0, 0.2, 1.0 }; 175 | int param[2] = { 1, 1 }; 176 | int maxiter = 100; 177 | for (int i = 3; i < argc; ++i) { 178 | if ((strcmp("-d", argv[i]) == 0) or (strcmp("--damp", argv[i]) == 0)) { 179 | if (++i >= argc) return wrongFormat(argv[i - 1]); 180 | if (3 != sscanf(argv[i], "%lf,%lf,%lf", &damp[0], &damp[1], &damp[2])) { 181 | damp[0] = 0.0; damp[1] = 0.2; damp[2] = 1.0; 182 | } 183 | if (damp[0] < 0.0) damp[0] = 0.0; 184 | if (damp[2] > 1.0) damp[2] = 1.0; 185 | } else if (strcmp("--unweighted", argv[i]) == 0) { 186 | unweighted = true; 187 | } else if (strcmp("--undirected", argv[i]) == 0) { 188 | undirected = true; 189 | } else if ((strcmp("-k", argv[i]) == 0) or (strcmp("--kernel", argv[i]) == 0)) { 190 | if (++i >= argc) return wrongFormat(argv[i - 1]); 191 | strncpy(argKernel, argv[i], MAXPATHLEN - 1); 192 | } else if ((strcmp("-i", argv[i]) == 0) or (strcmp("--iter", argv[i]) == 0)) { 193 | if (++i >= argc) return wrongFormat(argv[i - 1]); 194 | maxiter = atoi(argv[i]); 195 | if (maxiter < 0) maxiter = 100; 196 | } else if ((strcmp("-c", argv[i]) == 0) or (strcmp("--conv", argv[i]) == 0)) { 197 | if (++i >= argc) return wrongFormat(argv[i - 1]); 198 | converg = atof(argv[i]); 199 | if (converg < 0) converg = 1.0e-6; 200 | } else if ((strcmp("-t", argv[i]) == 0) or (strcmp("--total", argv[i]) == 0)) { 201 | if (++i >= argc) return wrongFormat(argv[i - 1]); 202 | if (2 != sscanf(argv[i], "%d,%d", ¶m[0], ¶m[1])) { 203 | param[0] = 1; param[1] = 1; 204 | } 205 | if (param[0] < 0) param[0] = 1; 206 | if (param[1] < 0) param[1] = 1; 207 | } else { 208 | printf("\tunknown argument: %s\n", argv[i]); 209 | } 210 | } 211 | printf("[GraphType] %s + %s\n", unweighted ? "unweighted" : "weighted", undirected ? "undirected" : "directed"); 212 | printf("[MaxIterations] %d\n", maxiter); 213 | printf("[ConvThreshold] %.2e\n", converg); 214 | // args parse end 215 | std::unordered_map nodes; 216 | std::forward_list nodeList; 217 | /* AttriFile */ 218 | int attriCount; 219 | { 220 | int u, a, s, i; 221 | double f; char buff[MAXLINELEN]; 222 | FILE *fp = fopen(argv[2], "r"); 223 | fgets(buff, MAXLINELEN - 1, fp); 224 | sscanf(buff, "%d", &attriCount); 225 | printf("AttriCount: %d\n", attriCount); 226 | while (fgets(buff, MAXLINELEN - 1, fp) != NULL) { 227 | sscanf(buff, "%d%n", &u, &i); 228 | if (nodes.count(u) == 0) nodes[u] = new Node(u, attriCount, nodeList); 229 | while (sscanf(buff + i, "%d:%lf%n", &a, &f, &s) == 2) { 230 | nodes[u]->attriVector[a] = f; 231 | i += s; 232 | } 233 | } 234 | fclose(fp); 235 | } 236 | /* EdgeFile */ 237 | { 238 | int u, v, arg; 239 | double w; char buff[MAXLINELEN]; 240 | FILE *fp = fopen(argv[1], "r"); 241 | while (fgets(buff, MAXLINELEN - 1, fp) != NULL) { 242 | arg = sscanf(buff, "%d %d %lf", &u, &v, &w); 243 | if (nodes.count(u) == 0) nodes[u] = new Node(u, attriCount, nodeList); 244 | if (nodes.count(v) == 0) nodes[v] = new Node(v, attriCount, nodeList); 245 | if (unweighted) { 246 | nodes[u]->outedge[nodes[v]] = 1.0; 247 | if (undirected) 248 | nodes[v]->outedge[nodes[u]] = 1.0; 249 | } else { 250 | if (arg == 2) w = 1.0; 251 | nodes[u]->outedge[nodes[v]] += w; 252 | if (undirected) 253 | nodes[v]->outedge[nodes[u]] += w; 254 | } 255 | } 256 | fclose(fp); 257 | } 258 | const double&& nodeCount = static_cast(nodes.size()); 259 | /* Standardization */ 260 | for (int i = 0; i < attriCount; ++i) { 261 | double e1x = 0.0; 262 | double e2x = 0.0; 263 | for (Node *v: nodeList) { 264 | e1x += v->attriVector[i]; 265 | e2x += v->attriVector[i] * v->attriVector[i]; 266 | } 267 | const double&& mean = e1x / nodeCount; 268 | const double&& std = std::sqrt(e2x / nodeCount - mean * mean); 269 | for (Node *v: nodeList) 270 | v->attriVector[i] = (std > 0.0) ? ((v->attriVector[i] - mean) / std) : 0.0; 271 | } 272 | /* TransitionMatrix */ 273 | printf("Generate Transition Matrix\n"); 274 | Node::piNew = std::valarray(nodes.size()); 275 | Node::piOld = std::valarray(nodes.size()); 276 | for (Node *v: nodeList) 277 | v->setTransition(); 278 | const double&& gamma = 1.0 / attriCount; 279 | std::valarray resetVec(0.0, nodes.size()); 280 | int transProcess = static_cast(nodes.size()); 281 | if (strcmp("rbf", argKernel) == 0) { 282 | printf("\tusing RBF kernel\n"); 283 | for (auto it1 = nodeList.begin(); it1 != nodeList.end(); ++it1) { 284 | resetVec[(*it1)->id] += 1.0; 285 | for (auto it2 = std::next(it1); it2 != nodeList.end(); ++it2) { 286 | double&& s12 = [&gamma] (const Node *a, const Node *b) { 287 | return std::exp(-gamma * std::pow(a->attriVector - b->attriVector, 2.0).sum()); 288 | } (*it1, *it2); 289 | resetVec[(*it1)->id] += s12; 290 | resetVec[(*it2)->id] += s12; 291 | } 292 | printf("\r\tremain: %7d", --transProcess); 293 | } 294 | } else if (strcmp("cosine", argKernel) == 0) { 295 | printf("\tusing Cosine similarity kernel\n"); 296 | std::valarray unitSum(0.0, attriCount); 297 | for (Node *v: nodeList) { 298 | v->attriVector /= std::sqrt(std::pow(v->attriVector, 2.0).sum()); 299 | unitSum += v->attriVector; 300 | printf("\r\tremain(1/2): %7d", --transProcess); 301 | } 302 | transProcess = static_cast(nodes.size()); 303 | for (Node *v: nodeList) { 304 | resetVec[v->id] = ((v->attriVector * unitSum).sum() + nodeCount) / 2.0; 305 | printf("\r\tremain(2/2): %7d", --transProcess); 306 | } 307 | } else { 308 | printf("\tusing RBF kernel (approximation)\n"); 309 | std::valarray scalarW(nodes.size()); 310 | for (Node *v: nodeList) 311 | scalarW[v->id] = std::exp(-gamma * std::pow(v->attriVector, 2.0).sum()); 312 | std::valarray vectorB(0.0, attriCount); 313 | std::valarray *matrixC = new std::valarray[attriCount]; 314 | for (int i = 0; i < attriCount; ++i) 315 | matrixC[i] = std::valarray(0.0, attriCount); 316 | for (Node *v: nodeList) { 317 | std::valarray&& wx = scalarW[v->id] * v->attriVector; 318 | vectorB += wx; 319 | for (int i = 0; i < attriCount; ++i) 320 | matrixC[i] += wx * v->attriVector[i]; 321 | printf("\r\tremain(1/2): %7d", --transProcess); 322 | } 323 | vectorB *= 2.0 * gamma; 324 | for (int i = 0; i < attriCount; ++i) 325 | matrixC[i] *= 2.0 * gamma * gamma; 326 | double&& scalarA = scalarW.sum(); 327 | transProcess = static_cast(nodes.size()); 328 | for (Node *v: nodeList) { 329 | std::valarray Cx(attriCount); 330 | for (int i = 0; i < attriCount; ++i) 331 | Cx[i] = (matrixC[i] * v->attriVector).sum(); 332 | resetVec[v->id] = scalarW[v->id] * (scalarA + (v->attriVector * (vectorB + Cx)).sum()); 333 | printf("\r\tremain(2/2): %7d", --transProcess); 334 | } 335 | delete[] matrixC; 336 | } 337 | putchar('\n'); resetVec /= resetVec.sum(); 338 | /* AttriRank */ 339 | printf("Run AttriRank Model\n"); 340 | for (double df = damp[0]; df <= damp[2]; df += damp[1]) { 341 | runAttriRank(resetVec, df, maxiter, nodeCount * converg); 342 | char fileName[40]; 343 | sprintf(fileName, "attrirank_%.3f.txt", df); 344 | outputFile(fileName, nodeList); 345 | } 346 | runTotalRank(resetVec, param[0], param[1], maxiter, nodeCount * converg); 347 | outputFile("attrirank_total.txt", nodeList); 348 | return 0; 349 | } 350 | --------------------------------------------------------------------------------