├── attrirank.pdf
├── .travis.yml
├── requirements.txt
├── README.md
├── src
├── main.py
└── AttriRank.py
├── tests
└── test_attrirank.py
├── AttriRank_inC
├── edge.txt
└── AttriRank.cpp
└── sample
└── graph.edgelist
/attrirank.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ntumslab/AttriRank/HEAD/attrirank.pdf
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | cache: pip
3 | sudo: required
4 |
5 |
6 | python:
7 | - "3.5"
8 |
9 |
10 | before_install:
11 | - pip install -U pip
12 | - pip install wheel
13 | - pip install coveralls
14 | - sudo apt-get update
15 |
16 |
17 | env:
18 | global:
19 | - PIP_WHEEL_DIR=$HOME/.cache/pip/wheels
20 | - PIP_FIND_LINKS=file://$HOME/.cache/pip/wheels
21 |
22 |
23 | install:
24 | - pip wheel -r requirements.txt
25 | - pip install -r requirements.txt
26 |
27 |
28 | script:
29 | - py.test . --cov=./
30 | - flake8 ./
31 |
32 |
33 | after_success:
34 | - coveralls
35 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # the modular source code checker: pep8, pyflakes and co
2 | # https://pypi.python.org/pypi/flake8/2.5.4
3 | flake8==2.5.4
4 |
5 | # pytest: simple powerful testing with Python
6 | # https://pypi.python.org/pypi/pytest/2.8.3
7 | pytest==2.8.3
8 |
9 | # Code coverage measurement for Python
10 | # https://pypi.python.org/pypi/coverage
11 | coverage==4.2
12 |
13 | # Pytest plugin for measuring coverage.
14 | # https://pypi.python.org/pypi/pytest-cov/2.2.0
15 | pytest-cov==2.2.0
16 |
17 | # Powerful extensions to the datetime module available in the Python standard library.
18 | # https://pypi.python.org/pypi/python-dateutil/2.5.3
19 | python-dateutil==2.5.3
20 |
21 | # NumPy: array processing for numbers, strings, records, and objects.
22 | # https://pypi.python.org/pypi/numpy
23 | numpy==1.11.1
24 |
25 | # World timezone definitions, modern and historical
26 | # https://pypi.python.org/pypi/pytz
27 | pytz==2016.7
28 |
29 | # Powerful data structures for data analysis, time series,and statistics
30 | # https://pypi.python.org/pypi/pandas/0.18.1
31 | pandas==0.18.1
32 |
33 | # SciPy: a ecosystem of open-source software for mathematics, science, and engineering.
34 | # https://pypi.python.org/pypi/scipy/0.18.0rc2
35 | scipy==0.18.0
36 |
37 | # Powerful Python module for machine learning
38 | # https://pypi.python.org/pypi/scikit-learn/0.17.1
39 | scikit-learn==0.17.1
40 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # AttriRank
2 | [](https://travis-ci.org/ntumslab/AttriRank)
3 |
4 | AttriRank is an unsupervised ranking model that considers not only graph structure but also the attributes of nodes.
5 |
6 | A reference implementation of *AttriRank* in the paper (please see the file - attrirank.pdf):
7 | > Unsupervised Ranking using Graph Structures and Node Attributes
8 | > Chin-Chi Hsu, Yi-An Lai, Wen-Hao Chen, Ming-Han Feng, and Shou-De Lin
9 | > Web Search and Data Mining (WSDM), 2017
10 |
11 | ## Usage
12 |
13 | ### Example
14 | Run AttriRank on sample graph with features, using damp [0.2, 0.5, 0.8]:
15 |
16 | python src/main.py --damp 0.2 0.5 0.8 --inputgraph sample/graph.edgelist --inputfeature sample/graph.feature
17 |
18 | #### Options
19 | Check out optional arguments such as AttriRank with prior, different similarity kernels by:
20 |
21 | python src/main.py --help
22 |
23 | ### Inputs
24 | Supported graph format is the edgelist:
25 |
26 | node_from node_to
27 |
28 | Supported feature format is the table (Comma-Separated Values):
29 |
30 | node_i, feat_dim_1, feat_dim_2, ...
31 |
32 | Default settings for graph are directed and unweighted.
33 |
34 | ### Output
35 |
36 | A comma-separated table of ranking scores with columns: [node_id, damp1, damp2, ...]
37 |
38 | node_id,0.2,0.5,0.8
39 | 0,score_1,score_2,score_3
40 | ...
41 |
42 | where score_1 is the ranking score of node 0 using AttriRank with damp 0.2.
43 |
44 | ## Requirements
45 | Install all dependencies:
46 |
47 | pip install -r requirements.txt
48 |
49 | ## Citing
50 |
51 | If you find *AttriRank* useful in your research, please consider citing the paper:
52 |
53 | @inproceedings{Hsu:2017:URU:3018661.3018668,
54 | author = {Hsu, Chin-Chi and Lai, Yi-An and Chen, Wen-Hao and Feng, Ming-Han and Lin, Shou-De},
55 | title = {Unsupervised Ranking Using Graph Structures and Node Attributes},
56 | booktitle = {Proceedings of the Tenth ACM International Conference on Web Search and Data Mining},
57 | series = {WSDM '17},
58 | year = {2017},
59 | }
60 |
61 | ## Miscellaneous
62 |
63 | If having any questions about the paper, please contact us at .
64 | If having any questions about codes, please contact us at .
65 |
--------------------------------------------------------------------------------
/src/main.py:
--------------------------------------------------------------------------------
1 | """
2 | Implementation of AttriRank.
3 |
4 | Author: Yi-An Lai
5 |
6 | For more details, refer to the paper:
7 | Unsupervised Ranking using Graph Structures and Node Attributes
8 | Chin-Chi Hsu, Yi-An Lai, Wen-Hao Chen, Ming-Han Feng, and Shou-De Lin
9 | Web Search and Data Mining (WSDM), 2017
10 | """
11 |
12 | import argparse
13 | import numpy as np
14 | import pandas as pd
15 |
16 | from AttriRank import AttriRank
17 |
18 |
19 | def parse_args():
20 | '''
21 | Parses AttriRank arguments.
22 | '''
23 | parser = argparse.ArgumentParser(description="Run AttriRank.")
24 |
25 | parser.add_argument('--inputgraph', nargs='?',
26 | default='sample/graph.edgelist',
27 | help='Input graph path')
28 |
29 | parser.add_argument('--inputfeature', nargs='?',
30 | default='sample/graph.feature',
31 | help='Input feature path')
32 |
33 | parser.add_argument('--output', nargs='?', default='graph.rankscore',
34 | help='Output rankscore path')
35 |
36 | parser.add_argument('--kernel', default='rbf_ap',
37 | help='Kernel: rbf_ap, rbf, cos, euc, sigmoid')
38 |
39 | parser.add_argument('--damp', nargs='*', default=[0.5], type=float,
40 | help='damping parameters')
41 |
42 | parser.add_argument('--totalrank', dest='totalrank', action='store_true',
43 | help='Use TotalRank or not. Default is False.')
44 | parser.set_defaults(totalrank=False)
45 |
46 | parser.add_argument('--alpha', type=float, default=1.0,
47 | help='alpha of beta distribution. Default is 1.0.')
48 |
49 | parser.add_argument('--beta', type=float, default=1.0,
50 | help='beta of beta distribution. Default is 1.0.')
51 |
52 | parser.add_argument('--matrix', dest='matrix', action='store_true',
53 | help='Using original Q matrix. Default is False.')
54 | parser.set_defaults(matrix=False)
55 |
56 | parser.add_argument('--print_every', type=int, default=1000,
57 | help='Print TotalRank process. Default is 1000.')
58 |
59 | parser.add_argument('--itermax', type=int, default=100000,
60 | help='Number of max iterations. Default is 100000.')
61 |
62 | parser.add_argument('--weighted', dest='weighted', action='store_true',
63 | help='Specifying (un)weighted. Default is unweighted.')
64 | parser.set_defaults(weighted=False)
65 |
66 | parser.add_argument('--undirected', dest='directed', action='store_false',
67 | help='Graph is (un)directed. Default is directed.')
68 | parser.set_defaults(directed=True)
69 |
70 | return parser.parse_args()
71 |
72 |
73 | def load_graph(filename):
74 | """Read the graph into numpy array"""
75 | return pd.read_csv(filename, sep=' ', header=None).values
76 |
77 |
78 | def load_features(filename):
79 | """Read the features into numpy array, first column as index"""
80 | return pd.read_csv(filename, header=None).set_index(0).values
81 |
82 |
83 | def main(args):
84 | """
85 | Pipeline for unsupervised ranking using graph and node features
86 | """
87 | graph = load_graph(args.inputgraph)
88 | feat = load_features(args.inputfeature)
89 | N = len(feat)
90 |
91 | if not args.directed:
92 | graph = np.concatenate((graph, graph[:, [1, 0]]))
93 |
94 | AR = AttriRank(graph, feat, itermax=args.itermax, weighted=args.weighted,
95 | nodeCount=N)
96 |
97 | scores = AR.runModel(factors=args.damp, kernel=args.kernel,
98 | Matrix=args.matrix, TotalRank=args.totalrank,
99 | alpha=args.alpha, beta=args.beta,
100 | print_every=args.print_every)
101 |
102 | df = pd.DataFrame(data=scores)
103 | df.to_csv(args.output, float_format='%.16f', index_label='node_id')
104 |
105 |
106 | args = parse_args()
107 | main(args)
108 |
--------------------------------------------------------------------------------
/tests/test_attrirank.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from unittest import TestCase
4 |
5 | from scipy.sparse import csr_matrix
6 | from sklearn import preprocessing
7 | from sklearn.metrics.pairwise import (rbf_kernel, cosine_similarity,
8 | sigmoid_kernel, euclidean_distances)
9 | from collections import defaultdict
10 |
11 | import numpy as np
12 | import sys
13 | import os
14 |
15 | sys.path.append(os.getcwd() + '/src') # noqa
16 |
17 | from AttriRank import AttriRank
18 |
19 |
20 | class TestAttriRank(TestCase):
21 |
22 | def setUp(self):
23 | self.node = 1000
24 | nodefrom = np.random.choice(self.node, 20 * self.node)
25 | nodeto = np.random.choice(self.node, 20 * self.node)
26 | self.fake_graph = np.array(list(zip(nodefrom, nodeto)))
27 | self.fake_features = np.random.randn(self.node, 20)
28 |
29 | def reset_vec(self, kernel='rbf_ap'):
30 | feat = preprocessing.scale(self.fake_features)
31 | count = feat.shape[1]
32 | if kernel == 'rbf':
33 | temp = rbf_kernel(feat, gamma=1.0 / count).sum(axis=0)
34 | elif kernel == 'cos':
35 | temp = ((cosine_similarity(feat) + 1) / 2.0).sum(axis=0)
36 | elif kernel == 'euc':
37 | temp = (1.0 / (euclidean_distances(feat) + 1)).sum(axis=0)
38 | elif kernel == 'sigmoid':
39 | Sig = sigmoid_kernel(feat, coef0=0, gamma=1.0 / count)
40 | temp = ((Sig + 1.0) / 2.0).sum(axis=0)
41 | elif kernel == 'rbf_ap':
42 | gamma = 1.0 / count
43 | expVec = np.exp(- gamma * np.einsum("ij, ij -> i", feat, feat))
44 | feaVec = np.einsum("i, ij -> j", expVec, feat) * (2.0 * gamma)
45 | outMat = np.einsum("i,ij,ik->jk", expVec, feat, feat)
46 | outMat *= (2.0 * gamma ** 2)
47 |
48 | first = expVec * np.sum(expVec)
49 | second = np.einsum("i, j, ij -> i", expVec, feaVec, feat)
50 | third = np.einsum("i, jk, ij, ik -> i", expVec, outMat, feat, feat)
51 | temp = first + second + third
52 |
53 | return (temp / np.sum(temp))
54 |
55 | def reset_mat(self):
56 | feat = preprocessing.scale(self.fake_features)
57 | RBF = rbf_kernel(feat, gamma=1.0 / feat.shape[1])
58 | return RBF / RBF.sum(axis=0)
59 |
60 | def trans_mat(self, weighted=True):
61 | links = defaultdict(int)
62 | for nodefrom, nodeto in self.fake_graph:
63 | links[(nodefrom, nodeto)] += 1.0
64 |
65 | en_col_row = [[], [], []]
66 | for key, val in links.items():
67 | val = val if weighted else 1
68 | en_col_row[0].append(val)
69 | en_col_row[1].append(key[0])
70 | en_col_row[2].append(key[1])
71 |
72 | traMat = csr_matrix((en_col_row[0], (en_col_row[2], en_col_row[1])),
73 | shape=(self.node, self.node))
74 | traMat = traMat.multiply(csr_matrix(1.0 / traMat.sum(axis=0)))
75 | col_sum = np.array(traMat.sum(axis=0))[0]
76 | dangVec = np.arange(col_sum.shape[0])[col_sum == 0]
77 |
78 | return traMat, dangVec
79 |
80 | def PageRank(self, damp, Matrix=False, kernel='rbf_ap'):
81 | traMat, dang = self.trans_mat()
82 | if Matrix:
83 | reMat = self.reset_mat()
84 | reVec = self.reset_vec(kernel=kernel)
85 |
86 | if damp == 0:
87 | return reVec, {}
88 |
89 | track = {}
90 | track[damp] = []
91 | result = np.ones(self.node) / self.node
92 |
93 | for i in range(1000000):
94 | dangScore = np.sum(result[dang]) * reVec
95 | tele = reMat.dot(result) if Matrix else reVec
96 | new = (1.0 - damp) * tele + damp * (traMat.dot(result) + dangScore)
97 | if np.linalg.norm(new - result) < 1e-10:
98 | break
99 |
100 | result = new
101 | track[damp].append(result)
102 |
103 | return result, track
104 |
105 | def totalrank(self, alpha=1, beta=1):
106 | traMat, dang = self.trans_mat()
107 | reVec = self.reset_vec()
108 |
109 | rho_t = reVec * beta / (alpha + beta)
110 | pi_t = reVec * beta / (alpha + beta)
111 |
112 | for iterat in range(100000):
113 | P_rho = (traMat.dot(rho_t) + np.sum(rho_t[dang]) * reVec)
114 | rho_next = P_rho * (iterat+alpha) / (iterat+1+alpha+beta)
115 | pi_t += rho_next
116 | if np.linalg.norm(rho_next) < 1e-10:
117 | break
118 |
119 | rho_t = rho_next
120 |
121 | return pi_t
122 |
123 | def run_model(self, damps, TotalRank=False, alpha=1, beta=1,
124 | Matrix=False, kernel='rbf_ap'):
125 | scores = {}
126 | if TotalRank:
127 | scores['total'] = list(self.totalrank(alpha=alpha, beta=beta))
128 | else:
129 | for damp in damps:
130 | score, _ = self.PageRank(damp, kernel=kernel, Matrix=Matrix)
131 | scores[str(damp)] = list(score)
132 |
133 | return scores
134 |
135 | def test_ResetProbVec(self):
136 | for kernel in ['rbf', 'cos', 'euc', 'sigmoid', 'rbf_ap']:
137 | AR = AttriRank(self.fake_graph, self.fake_features,
138 | nodeCount=self.node)
139 | AR.ResetProbVec(kernel=kernel)
140 | scores = AR.resetProbVec.ravel()
141 | answers = self.reset_vec(kernel)
142 | assert np.linalg.norm(answers - scores) < 1e-10
143 |
144 | def test_ResetProbMat(self):
145 | AR = AttriRank(self.fake_graph, self.fake_features,
146 | nodeCount=self.node)
147 | AR.ResetProbMat()
148 | scores = AR.resetProbMat.ravel()
149 | answers = self.reset_mat().ravel()
150 | assert np.linalg.norm(answers - scores) < 1e-10
151 |
152 | def test_TransMat(self):
153 | AR = AttriRank(self.fake_graph, self.fake_features,
154 | nodeCount=self.node)
155 | AR.TransMat()
156 | scores = AR.transMat.toarray().ravel()
157 | answers_mat, answers_dang = self.trans_mat()
158 | assert np.linalg.norm(answers_mat.toarray().ravel() - scores) < 1e-10
159 | assert np.linalg.norm(answers_dang - AR.dangVec) < 1e-10
160 |
161 | AR = AttriRank(self.fake_graph, self.fake_features,
162 | nodeCount=self.node, weighted=False)
163 | AR.TransMat()
164 | scores = AR.transMat.toarray().ravel()
165 | answers_mat, answers_dang = self.trans_mat(weighted=False)
166 | assert np.linalg.norm(answers_mat.toarray().ravel() - scores) < 1e-10
167 | assert np.linalg.norm(answers_dang - AR.dangVec) < 1e-10
168 |
169 | def test_runPageRank(self):
170 | AR = AttriRank(self.fake_graph, self.fake_features,
171 | nodeCount=self.node)
172 | AR.track = True
173 | scores = AR.runPageRank(damp=0.85)
174 | track = np.array(AR.track_scores[0.85])
175 | answers, ans_track = self.PageRank(damp=0.85)
176 | ans_track = np.array(ans_track[0.85])
177 | assert np.linalg.norm(answers - scores) < 1e-10
178 | assert np.linalg.norm(ans_track - track) < 1e-10
179 |
180 | AR = AttriRank(self.fake_graph, self.fake_features,
181 | nodeCount=self.node)
182 | AR.track = True
183 | AR.Matrix = True
184 | scores = AR.runPageRank(damp=0.85)
185 | track = np.array(AR.track_scores[0.85])
186 | answers, ans_track = self.PageRank(damp=0.85, Matrix=True)
187 | ans_track = np.array(ans_track[0.85])
188 | assert np.linalg.norm(answers - scores) < 1e-10
189 | assert np.linalg.norm(ans_track - track) < 1e-10
190 |
191 | def test_TotalRank(self):
192 | AR = AttriRank(self.fake_graph, self.fake_features,
193 | nodeCount=self.node)
194 | TR_scores = AR.TotalRank()
195 | answers = self.totalrank()
196 | assert np.linalg.norm(answers - TR_scores) < 1e-10
197 |
198 | TR_scores = AR.TotalRank(alpha=2, beta=4)
199 | answers = self.totalrank(alpha=2, beta=4)
200 | assert np.linalg.norm(answers - TR_scores) < 1e-10
201 |
202 | TR_scores = AR.TotalRank(alpha=0.9, beta=0.8)
203 | answers = self.totalrank(alpha=0.9, beta=0.8)
204 | assert np.linalg.norm(answers - TR_scores) < 1e-10
205 |
206 | def test_runModel(self):
207 | AR = AttriRank(self.fake_graph, self.fake_features,
208 | nodeCount=self.node)
209 | damps = [i/10.0 for i in range(10)]
210 | scores = AR.runModel(damps, kernel='cos')
211 | scores = np.array([scores[str(d)] for d in damps])
212 | answers = self.run_model(damps, kernel='cos')
213 | answers = np.array([answers[str(d)] for d in damps])
214 | assert np.linalg.norm(answers - scores) < 1e-10
215 |
216 | scores = AR.runModel(damps, TotalRank=True, alpha=3, beta=4)
217 | scores = np.array(scores['total'])
218 | answers = self.run_model(damps, TotalRank=True, alpha=3, beta=4)
219 | answers = np.array(answers['total'])
220 | assert np.linalg.norm(answers - scores) < 1e-10
221 |
222 | scores = AR.runModel(damps, Matrix=True)
223 | scores = np.array([scores[str(d)] for d in damps])
224 | answers = self.run_model(damps, Matrix=True)
225 | answers = np.array([answers[str(d)] for d in damps])
226 | assert np.linalg.norm(answers - scores) < 1e-10
227 |
--------------------------------------------------------------------------------
/src/AttriRank.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import numpy as np
4 | from scipy.sparse import csr_matrix
5 | from sklearn import preprocessing
6 | from sklearn.metrics.pairwise import (rbf_kernel, cosine_similarity,
7 | sigmoid_kernel, euclidean_distances)
8 | from collections import defaultdict
9 |
10 |
11 | class AttriRank(object):
12 | convergenceThreshold = 1e-10
13 | Matrix = False
14 | track = False
15 | scores = {}
16 | print_every = 1000
17 | track_scores = {}
18 |
19 | def __init__(self, graph, featureMatrix, itermax=100000,
20 | weighted=True, nodeCount=None):
21 | """
22 | Standardize input features and set the basic parameters
23 | graph: [[node_from, node_to], ...]
24 | featureMatrix: N * d matrix; i-th node's feature is the i-th row
25 | itermax: maximum iterations
26 | weighted: transition Matrix weighted by number of links
27 | """
28 |
29 | self.graph = np.array(graph)
30 | self.featMat = preprocessing.scale(np.array(featureMatrix) / 100.0)
31 | self.featCount = self.featMat.shape[1]
32 |
33 | if nodeCount is None:
34 | self.nodeCount = graph.max() + 1
35 | else:
36 | self.nodeCount = nodeCount
37 |
38 | self.iterationMax = itermax
39 | self.weighted = weighted
40 |
41 | def ResetProbVec(self, kernel='rbf_ap'):
42 | """
43 | Calculate the reset probability vector with assigned kernel
44 | rbf: Radial basis function
45 | cos: (cosine similarity + 1) / 2.0
46 | euc: 1.0 / (1 + euclidean distances)
47 | sigmoid: (tanh(gamma ) + 1) / 2.0
48 | rbf_ap: Taylor-expansion approximated Radial basis function
49 | """
50 |
51 | if kernel == 'rbf':
52 | RBF = rbf_kernel(self.featMat, gamma=1.0 / self.featCount)
53 | RBF = RBF.sum(axis=0)
54 | resetProbVec = RBF / np.sum(RBF)
55 |
56 | elif kernel == 'cos':
57 | Cos = (cosine_similarity(self.featMat) + 1) / 2.0
58 | Cos = Cos.sum(axis=0)
59 | resetProbVec = Cos / np.sum(Cos)
60 |
61 | elif kernel == 'euc':
62 | Euc = 1.0 / (euclidean_distances(self.featMat) + 1)
63 | Euc = Euc.sum(axis=0)
64 | resetProbVec = Euc / np.sum(Euc)
65 |
66 | elif kernel == 'sigmoid':
67 | gamma = 1.0 / self.featCount
68 | Sig = sigmoid_kernel(self.featMat, coef0=0, gamma=gamma)
69 | Sig = (Sig + 1.0) / 2.0
70 | Sig = Sig.sum(axis=0)
71 | resetProbVec = Sig / np.sum(Sig)
72 |
73 | elif kernel == 'rbf_ap':
74 | parameter = 1.0 / self.featCount
75 | # w
76 | lengths = np.einsum("ij, ij -> i", self.featMat, self.featMat)
77 | expNormVector = np.exp(- parameter * lengths)
78 | # y
79 | f_normVec = np.einsum("i, ij -> j", expNormVector, self.featMat)
80 | featureNormVector = f_normVec * (2.0 * parameter)
81 | # Z
82 | outerMat = np.einsum("i, ij, ik -> jk", expNormVector,
83 | self.featMat, self.featMat)
84 | featureOuterNorm = outerMat * (2.0 * parameter ** 2)
85 | # r'
86 | first = expNormVector * np.sum(expNormVector)
87 | second = np.einsum("i, j, ij -> i", expNormVector,
88 | featureNormVector, self.featMat)
89 | third = np.einsum("i, jk, ij, ik -> i", expNormVector,
90 | featureOuterNorm, self.featMat, self.featMat)
91 | resetProbVec = first + second + third
92 | # r
93 | resetProbVec /= np.sum(resetProbVec)
94 |
95 | self.resetProbVec = resetProbVec
96 |
97 | def ResetProbMat(self):
98 | """Calculate the Q transition Matrix with RBF kernel"""
99 | parameter = 1.0 / self.featCount
100 | RBF = rbf_kernel(self.featMat, gamma=parameter)
101 | self.resetProbMat = RBF / RBF.sum(axis=0)
102 |
103 | def TransMat(self):
104 | """Construct transition matrix"""
105 | links = defaultdict(int)
106 |
107 | for nodefrom, nodeto in self.graph:
108 |
109 | if self.weighted:
110 | links[(nodefrom, nodeto)] += 1.0
111 |
112 | else:
113 | links[(nodefrom, nodeto)] = 1.0
114 |
115 | entryList = list()
116 | rowList = list()
117 | columnList = list()
118 |
119 | for key, val in links.items():
120 | entryList.append(val)
121 | columnList.append(key[0])
122 | rowList.append(key[1])
123 |
124 | # transition matrix
125 | traMat = csr_matrix((entryList, (rowList, columnList)),
126 | shape=(self.nodeCount, self.nodeCount))
127 | self.transMat = traMat.multiply(csr_matrix(1.0 / traMat.sum(axis=0)))
128 |
129 | # find dangling nodes
130 | col_sum = np.array(traMat.sum(axis=0))[0]
131 | self.dangVec = np.arange(col_sum.shape[0])[col_sum == 0]
132 |
133 | def runPageRank(self, damp=0.85, do=True, doTrans=True, kernel='rbf_ap'):
134 | """
135 | do: whether to compute the reset probability vector
136 | doTrans: whether to compute the transition matrix
137 | """
138 | if doTrans:
139 | self.TransMat()
140 | print("\tGenerate transition matrix")
141 |
142 | if do:
143 | if self.Matrix:
144 | self.ResetProbMat()
145 | print("\tGenerate matrix Q")
146 | else:
147 | print("\tGenerate reset probability vector")
148 | self.ResetProbVec(kernel=kernel)
149 |
150 | if damp == 0:
151 | scoreVector = self.resetProbVec
152 | return scoreVector
153 |
154 | # record the scores of each update
155 | self.track_scores[damp] = []
156 | scoreVector = np.ones(self.nodeCount) / self.nodeCount
157 |
158 | for iteration in range(self.iterationMax):
159 | leak_scores = np.sum(scoreVector[self.dangVec])
160 | dangScore = leak_scores * self.resetProbVec
161 |
162 | if self.Matrix:
163 | teleport_prob = self.resetProbMat.dot(scoreVector)
164 | else:
165 | teleport_prob = self.resetProbVec
166 |
167 | newScoreVector = (1.0 - damp) * teleport_prob + \
168 | damp * (self.transMat.dot(scoreVector) + dangScore)
169 | error = np.linalg.norm(newScoreVector - scoreVector)
170 |
171 | if error < self.convergenceThreshold:
172 | break
173 |
174 | scoreVector = newScoreVector
175 | if self.track:
176 | self.track_scores[damp].append(scoreVector)
177 |
178 | return scoreVector
179 |
180 | def TotalRank(self, alpha=1, beta=1, kernel='rbf_ap'):
181 | """
182 | Implementation of TotalRank with beta distribution as the prior
183 | (alpha, beta): parameters for the beta distribution
184 | """
185 | print("\tGenerate transition matrix and reset probability vector")
186 | self.TransMat()
187 | self.ResetProbVec(kernel=kernel)
188 |
189 | rho_t = self.resetProbVec * beta / (alpha + beta)
190 | pi_t = self.resetProbVec * beta / (alpha + beta)
191 |
192 | for iteration in range(self.iterationMax):
193 | dangScore = np.sum(rho_t[self.dangVec]) * self.resetProbVec
194 | P_rho = (self.transMat.dot(rho_t) + dangScore)
195 | rho_next = P_rho * (iteration + alpha) / (iteration+1+alpha+beta)
196 | pi_t += rho_next
197 | error = np.linalg.norm(rho_next)
198 |
199 | if iteration % self.print_every == (self.print_every - 1):
200 | print("\tIteration %d:\t%.10f" % (iteration + 1, error))
201 |
202 | if error < self.convergenceThreshold:
203 | break
204 |
205 | rho_t = rho_next
206 | if self.track:
207 | self.track_scores['total'].append(pi_t)
208 |
209 | return pi_t
210 |
211 | def runModel(self, factors=[0.85], Matrix=False, track=False,
212 | TotalRank=False, alpha=1, beta=1, print_every=1000,
213 | kernel='rbf_ap'):
214 | """
215 | Give a list of damping factors to work with
216 | return a dict: key=(damp factor); value=(scores of each node)
217 | Matrix: use the exact Q or approximated r (True for Q)
218 | track: record the score vector at each iteration during updating
219 | """
220 | self.Matrix = Matrix
221 | self.track = track
222 | self.print_every = print_every
223 | scores = {}
224 |
225 | if TotalRank:
226 | print("Run AttriRank with prior...")
227 | scores['total'] = list(self.TotalRank(alpha=alpha, beta=beta))
228 | else:
229 | do = True
230 | doTrans = True
231 | for dampFac in factors:
232 | print("Run AttriRank, damp:", dampFac)
233 | score_vec = self.runPageRank(dampFac, do=do, doTrans=doTrans,
234 | kernel=kernel)
235 |
236 | # already have reset vector and transition matrix
237 | do = False
238 | doTrans = False
239 | scores[str(dampFac)] = list(score_vec)
240 | print("\tDone.")
241 |
242 | self.scores = scores
243 |
244 | return scores
245 |
--------------------------------------------------------------------------------
/AttriRank_inC/edge.txt:
--------------------------------------------------------------------------------
1 | 5 95
2 | 5 147
3 | 5 771
4 | 5 878
5 | 8 154
6 | 8 175
7 | 12 47
8 | 12 195
9 | 18 874
10 | 28 929
11 | 32 216
12 | 47 879
13 | 51 94
14 | 58 131
15 | 59 51
16 | 59 94
17 | 59 164
18 | 62 1
19 | 62 3
20 | 62 4
21 | 62 5
22 | 62 6
23 | 62 7
24 | 62 8
25 | 62 9
26 | 62 10
27 | 62 11
28 | 62 12
29 | 62 17
30 | 62 18
31 | 62 19
32 | 62 21
33 | 62 23
34 | 62 24
35 | 62 26
36 | 62 28
37 | 62 32
38 | 62 34
39 | 62 35
40 | 62 36
41 | 62 37
42 | 62 38
43 | 62 39
44 | 62 40
45 | 62 41
46 | 62 42
47 | 62 43
48 | 62 44
49 | 62 45
50 | 62 46
51 | 62 47
52 | 62 48
53 | 62 49
54 | 62 50
55 | 62 51
56 | 62 52
57 | 62 53
58 | 62 54
59 | 62 55
60 | 62 56
61 | 62 57
62 | 62 58
63 | 62 59
64 | 62 60
65 | 62 61
66 | 62 63
67 | 62 64
68 | 62 65
69 | 62 66
70 | 62 67
71 | 62 68
72 | 62 69
73 | 62 70
74 | 62 71
75 | 62 72
76 | 62 74
77 | 62 75
78 | 62 77
79 | 62 78
80 | 62 79
81 | 62 82
82 | 62 88
83 | 62 90
84 | 62 91
85 | 62 92
86 | 62 93
87 | 62 94
88 | 62 95
89 | 62 96
90 | 62 99
91 | 62 100
92 | 62 101
93 | 62 103
94 | 62 104
95 | 62 105
96 | 62 106
97 | 62 107
98 | 62 109
99 | 62 111
100 | 62 112
101 | 62 113
102 | 62 114
103 | 62 115
104 | 62 116
105 | 62 117
106 | 62 118
107 | 62 119
108 | 62 121
109 | 62 123
110 | 62 124
111 | 62 126
112 | 62 127
113 | 62 128
114 | 62 130
115 | 62 131
116 | 62 132
117 | 62 133
118 | 62 135
119 | 62 136
120 | 62 138
121 | 62 142
122 | 62 144
123 | 62 147
124 | 62 148
125 | 62 149
126 | 62 150
127 | 62 151
128 | 62 152
129 | 62 153
130 | 62 154
131 | 62 155
132 | 62 156
133 | 62 157
134 | 62 159
135 | 62 160
136 | 62 161
137 | 62 162
138 | 62 164
139 | 62 165
140 | 62 166
141 | 62 167
142 | 62 169
143 | 62 171
144 | 62 172
145 | 62 173
146 | 62 175
147 | 62 176
148 | 62 178
149 | 62 183
150 | 62 186
151 | 62 187
152 | 62 188
153 | 62 189
154 | 62 190
155 | 62 191
156 | 62 192
157 | 62 193
158 | 62 194
159 | 62 195
160 | 62 196
161 | 62 197
162 | 62 198
163 | 62 199
164 | 62 201
165 | 62 202
166 | 62 203
167 | 62 205
168 | 62 208
169 | 62 209
170 | 62 210
171 | 62 211
172 | 62 212
173 | 62 213
174 | 62 214
175 | 62 215
176 | 62 216
177 | 62 217
178 | 62 218
179 | 62 219
180 | 62 220
181 | 62 222
182 | 62 252
183 | 62 629
184 | 62 771
185 | 62 805
186 | 62 874
187 | 62 879
188 | 62 886
189 | 62 949
190 | 63 21
191 | 63 138
192 | 63 164
193 | 63 213
194 | 71 49
195 | 71 151
196 | 75 169
197 | 75 252
198 | 78 194
199 | 78 212
200 | 82 214
201 | 104 929
202 | 107 18
203 | 112 39
204 | 112 44
205 | 112 57
206 | 112 213
207 | 113 805
208 | 113 874
209 | 113 886
210 | 131 58
211 | 131 128
212 | 138 63
213 | 138 213
214 | 138 886
215 | 140 143
216 | 147 5
217 | 147 95
218 | 147 771
219 | 153 175
220 | 164 214
221 | 166 949
222 | 169 61
223 | 169 75
224 | 169 252
225 | 171 114
226 | 175 153
227 | 189 926
228 | 194 78
229 | 194 212
230 | 196 119
231 | 196 162
232 | 196 805
233 | 196 874
234 | 196 879
235 | 196 886
236 | 199 67
237 | 199 88
238 | 203 150
239 | 215 203
240 | 219 135
241 | 219 201
242 | 224 626
243 | 249 308
244 | 249 349
245 | 249 562
246 | 249 622
247 | 249 669
248 | 249 754
249 | 249 934
250 | 249 990
251 | 251 311
252 | 251 407
253 | 251 510
254 | 251 540
255 | 251 541
256 | 251 593
257 | 251 686
258 | 251 687
259 | 251 780
260 | 251 819
261 | 251 855
262 | 251 860
263 | 251 901
264 | 251 996
265 | 252 169
266 | 283 976
267 | 290 158
268 | 294 557
269 | 303 366
270 | 308 378
271 | 311 251
272 | 311 407
273 | 311 510
274 | 311 540
275 | 311 541
276 | 311 593
277 | 311 686
278 | 311 687
279 | 311 780
280 | 311 819
281 | 311 855
282 | 311 860
283 | 311 901
284 | 311 996
285 | 313 396
286 | 316 676
287 | 316 688
288 | 316 808
289 | 316 913
290 | 318 364
291 | 318 476
292 | 318 519
293 | 318 696
294 | 321 378
295 | 332 378
296 | 352 378
297 | 356 713
298 | 378 308
299 | 378 634
300 | 378 669
301 | 378 707
302 | 378 976
303 | 386 434
304 | 396 313
305 | 400 345
306 | 402 383
307 | 407 251
308 | 407 311
309 | 407 510
310 | 407 540
311 | 407 541
312 | 407 593
313 | 407 686
314 | 407 687
315 | 407 780
316 | 407 819
317 | 407 855
318 | 407 860
319 | 407 901
320 | 407 996
321 | 411 873
322 | 432 433
323 | 433 432
324 | 434 386
325 | 439 441
326 | 439 442
327 | 440 439
328 | 440 441
329 | 440 442
330 | 441 439
331 | 441 442
332 | 442 439
333 | 442 441
334 | 447 720
335 | 448 479
336 | 455 454
337 | 476 318
338 | 479 713
339 | 484 818
340 | 489 928
341 | 510 251
342 | 510 311
343 | 510 407
344 | 510 540
345 | 510 541
346 | 510 593
347 | 510 686
348 | 510 687
349 | 510 780
350 | 510 819
351 | 510 855
352 | 510 860
353 | 510 901
354 | 510 996
355 | 513 404
356 | 519 318
357 | 519 364
358 | 534 535
359 | 540 251
360 | 540 311
361 | 540 407
362 | 540 510
363 | 540 541
364 | 540 593
365 | 540 686
366 | 540 687
367 | 540 780
368 | 540 819
369 | 540 855
370 | 540 860
371 | 540 901
372 | 540 996
373 | 541 251
374 | 541 311
375 | 541 407
376 | 541 510
377 | 541 540
378 | 541 593
379 | 541 686
380 | 541 687
381 | 541 780
382 | 541 819
383 | 541 855
384 | 541 860
385 | 541 901
386 | 541 996
387 | 593 251
388 | 593 311
389 | 593 407
390 | 593 510
391 | 593 540
392 | 593 541
393 | 593 686
394 | 593 687
395 | 593 780
396 | 593 819
397 | 593 855
398 | 593 860
399 | 593 901
400 | 593 996
401 | 601 856
402 | 605 576
403 | 607 308
404 | 607 378
405 | 607 578
406 | 607 611
407 | 608 502
408 | 610 607
409 | 610 611
410 | 611 607
411 | 611 610
412 | 611 714
413 | 620 623
414 | 665 319
415 | 665 479
416 | 667 532
417 | 676 316
418 | 676 688
419 | 676 808
420 | 676 913
421 | 681 621
422 | 686 251
423 | 686 311
424 | 686 407
425 | 686 510
426 | 686 540
427 | 686 541
428 | 686 593
429 | 686 687
430 | 686 780
431 | 686 819
432 | 686 855
433 | 686 860
434 | 686 901
435 | 686 996
436 | 687 251
437 | 687 311
438 | 687 407
439 | 687 510
440 | 687 540
441 | 687 541
442 | 687 593
443 | 687 686
444 | 687 780
445 | 687 819
446 | 687 855
447 | 687 860
448 | 687 901
449 | 687 996
450 | 707 308
451 | 707 378
452 | 707 708
453 | 707 734
454 | 707 735
455 | 720 356
456 | 720 713
457 | 721 725
458 | 722 605
459 | 722 725
460 | 723 622
461 | 725 721
462 | 771 5
463 | 771 95
464 | 771 147
465 | 771 164
466 | 780 251
467 | 780 311
468 | 780 407
469 | 780 510
470 | 780 540
471 | 780 541
472 | 780 593
473 | 780 686
474 | 780 687
475 | 780 819
476 | 780 855
477 | 780 860
478 | 780 901
479 | 780 996
480 | 798 146
481 | 798 873
482 | 802 651
483 | 805 18
484 | 805 874
485 | 813 844
486 | 818 484
487 | 819 251
488 | 819 311
489 | 819 407
490 | 819 510
491 | 819 540
492 | 819 541
493 | 819 593
494 | 819 686
495 | 819 687
496 | 819 780
497 | 819 855
498 | 819 860
499 | 819 901
500 | 819 996
501 | 832 831
502 | 855 251
503 | 855 311
504 | 855 407
505 | 855 510
506 | 855 540
507 | 855 541
508 | 855 593
509 | 855 686
510 | 855 687
511 | 855 780
512 | 855 819
513 | 855 860
514 | 855 901
515 | 855 996
516 | 856 601
517 | 860 251
518 | 860 311
519 | 860 407
520 | 860 510
521 | 860 540
522 | 860 541
523 | 860 593
524 | 860 686
525 | 860 687
526 | 860 780
527 | 860 819
528 | 860 855
529 | 860 901
530 | 860 996
531 | 861 862
532 | 862 861
533 | 874 18
534 | 874 805
535 | 874 886
536 | 878 21
537 | 878 23
538 | 878 28
539 | 878 35
540 | 878 45
541 | 878 59
542 | 878 62
543 | 878 68
544 | 878 82
545 | 878 92
546 | 878 93
547 | 878 114
548 | 878 153
549 | 878 164
550 | 878 166
551 | 878 187
552 | 878 189
553 | 878 194
554 | 878 219
555 | 878 771
556 | 878 886
557 | 886 805
558 | 886 874
559 | 899 321
560 | 899 474
561 | 901 251
562 | 901 311
563 | 901 407
564 | 901 510
565 | 901 540
566 | 901 541
567 | 901 593
568 | 901 686
569 | 901 687
570 | 901 780
571 | 901 819
572 | 901 855
573 | 901 860
574 | 901 996
575 | 911 912
576 | 912 665
577 | 912 911
578 | 921 922
579 | 921 923
580 | 921 924
581 | 921 925
582 | 921 926
583 | 921 927
584 | 921 928
585 | 921 929
586 | 921 930
587 | 921 931
588 | 921 932
589 | 921 933
590 | 921 934
591 | 921 935
592 | 921 936
593 | 921 937
594 | 921 938
595 | 921 939
596 | 921 940
597 | 921 941
598 | 922 921
599 | 922 923
600 | 922 924
601 | 922 925
602 | 922 926
603 | 922 927
604 | 922 928
605 | 922 929
606 | 922 931
607 | 922 932
608 | 922 933
609 | 922 934
610 | 922 935
611 | 922 936
612 | 922 937
613 | 922 938
614 | 922 939
615 | 922 940
616 | 922 941
617 | 923 489
618 | 923 921
619 | 923 922
620 | 923 924
621 | 923 925
622 | 923 926
623 | 923 927
624 | 923 928
625 | 923 929
626 | 923 931
627 | 923 932
628 | 923 933
629 | 923 934
630 | 923 935
631 | 923 936
632 | 923 937
633 | 923 938
634 | 923 939
635 | 923 940
636 | 923 941
637 | 924 921
638 | 924 922
639 | 924 923
640 | 924 925
641 | 924 926
642 | 924 927
643 | 924 928
644 | 924 929
645 | 924 931
646 | 924 932
647 | 924 933
648 | 924 934
649 | 924 935
650 | 924 936
651 | 924 937
652 | 924 938
653 | 924 939
654 | 924 940
655 | 924 941
656 | 924 942
657 | 925 921
658 | 925 922
659 | 925 923
660 | 925 924
661 | 925 926
662 | 925 927
663 | 925 928
664 | 925 929
665 | 925 931
666 | 925 932
667 | 925 933
668 | 925 934
669 | 925 935
670 | 925 936
671 | 925 937
672 | 925 938
673 | 925 939
674 | 925 940
675 | 925 941
676 | 926 921
677 | 926 922
678 | 926 923
679 | 926 924
680 | 926 925
681 | 926 927
682 | 926 928
683 | 926 929
684 | 926 931
685 | 926 932
686 | 926 933
687 | 926 934
688 | 926 935
689 | 926 936
690 | 926 937
691 | 926 938
692 | 926 939
693 | 926 940
694 | 926 941
695 | 927 921
696 | 927 922
697 | 927 923
698 | 927 924
699 | 927 925
700 | 927 926
701 | 927 928
702 | 927 929
703 | 927 931
704 | 927 932
705 | 927 933
706 | 927 934
707 | 927 935
708 | 927 936
709 | 927 937
710 | 927 938
711 | 927 939
712 | 927 940
713 | 927 941
714 | 928 921
715 | 928 922
716 | 928 923
717 | 928 924
718 | 928 925
719 | 928 926
720 | 928 927
721 | 928 929
722 | 928 931
723 | 928 932
724 | 928 933
725 | 928 934
726 | 928 935
727 | 928 936
728 | 928 937
729 | 928 938
730 | 928 939
731 | 928 940
732 | 928 941
733 | 929 28
734 | 929 69
735 | 929 102
736 | 929 104
737 | 929 157
738 | 929 896
739 | 929 921
740 | 929 922
741 | 929 923
742 | 929 924
743 | 929 925
744 | 929 926
745 | 929 927
746 | 929 928
747 | 929 931
748 | 929 932
749 | 929 933
750 | 929 934
751 | 929 935
752 | 929 936
753 | 929 937
754 | 929 938
755 | 929 939
756 | 929 940
757 | 929 941
758 | 930 921
759 | 930 922
760 | 930 923
761 | 930 924
762 | 930 925
763 | 930 926
764 | 930 927
765 | 930 928
766 | 930 929
767 | 930 931
768 | 930 932
769 | 930 933
770 | 930 934
771 | 930 935
772 | 930 936
773 | 930 937
774 | 930 938
775 | 930 939
776 | 930 940
777 | 930 941
778 | 931 921
779 | 931 922
780 | 931 923
781 | 931 924
782 | 931 925
783 | 931 926
784 | 931 927
785 | 931 928
786 | 931 929
787 | 931 932
788 | 931 933
789 | 931 934
790 | 931 935
791 | 931 936
792 | 931 937
793 | 931 938
794 | 931 939
795 | 931 940
796 | 931 941
797 | 932 921
798 | 932 922
799 | 932 923
800 | 932 924
801 | 932 925
802 | 932 926
803 | 932 927
804 | 932 928
805 | 932 929
806 | 932 931
807 | 932 933
808 | 932 934
809 | 932 935
810 | 932 936
811 | 932 937
812 | 932 938
813 | 932 939
814 | 932 940
815 | 932 941
816 | 933 921
817 | 933 922
818 | 933 923
819 | 933 924
820 | 933 925
821 | 933 926
822 | 933 927
823 | 933 928
824 | 933 929
825 | 933 931
826 | 933 932
827 | 933 934
828 | 933 935
829 | 933 936
830 | 933 937
831 | 933 938
832 | 933 939
833 | 933 940
834 | 933 941
835 | 934 169
836 | 934 252
837 | 934 921
838 | 934 922
839 | 934 923
840 | 934 924
841 | 934 925
842 | 934 926
843 | 934 927
844 | 934 928
845 | 934 929
846 | 934 931
847 | 934 932
848 | 934 933
849 | 934 935
850 | 934 936
851 | 934 937
852 | 934 938
853 | 934 939
854 | 934 940
855 | 934 941
856 | 935 809
857 | 935 921
858 | 935 922
859 | 935 923
860 | 935 924
861 | 935 925
862 | 935 926
863 | 935 927
864 | 935 928
865 | 935 929
866 | 935 931
867 | 935 932
868 | 935 933
869 | 935 934
870 | 935 936
871 | 935 937
872 | 935 938
873 | 935 939
874 | 935 940
875 | 935 941
876 | 936 921
877 | 936 922
878 | 936 923
879 | 936 924
880 | 936 925
881 | 936 926
882 | 936 927
883 | 936 928
884 | 936 929
885 | 936 931
886 | 936 932
887 | 936 933
888 | 936 934
889 | 936 935
890 | 936 937
891 | 936 938
892 | 936 939
893 | 936 940
894 | 936 941
895 | 937 921
896 | 937 922
897 | 937 923
898 | 937 924
899 | 937 925
900 | 937 926
901 | 937 927
902 | 937 928
903 | 937 929
904 | 937 931
905 | 937 932
906 | 937 933
907 | 937 934
908 | 937 935
909 | 937 936
910 | 937 938
911 | 937 939
912 | 937 940
913 | 937 941
914 | 938 79
915 | 938 921
916 | 938 922
917 | 938 923
918 | 938 924
919 | 938 925
920 | 938 926
921 | 938 927
922 | 938 928
923 | 938 929
924 | 938 931
925 | 938 932
926 | 938 933
927 | 938 934
928 | 938 935
929 | 938 936
930 | 938 937
931 | 938 939
932 | 938 940
933 | 938 941
934 | 939 921
935 | 939 922
936 | 939 923
937 | 939 924
938 | 939 925
939 | 939 926
940 | 939 927
941 | 939 928
942 | 939 929
943 | 939 931
944 | 939 932
945 | 939 933
946 | 939 934
947 | 939 935
948 | 939 936
949 | 939 937
950 | 939 938
951 | 939 940
952 | 939 941
953 | 940 921
954 | 940 922
955 | 940 923
956 | 940 924
957 | 940 925
958 | 940 926
959 | 940 927
960 | 940 928
961 | 940 929
962 | 940 931
963 | 940 932
964 | 940 933
965 | 940 934
966 | 940 935
967 | 940 936
968 | 940 937
969 | 940 938
970 | 940 939
971 | 940 941
972 | 941 921
973 | 941 922
974 | 941 923
975 | 941 924
976 | 941 925
977 | 941 926
978 | 941 927
979 | 941 928
980 | 941 929
981 | 941 931
982 | 941 932
983 | 941 933
984 | 941 934
985 | 941 935
986 | 941 936
987 | 941 937
988 | 941 938
989 | 941 939
990 | 941 940
991 | 953 954
992 | 954 953
993 | 964 146
994 | 964 479
995 | 990 140
996 | 990 249
997 | 990 308
998 | 990 319
999 | 990 326
1000 | 990 543
1001 | 990 579
1002 | 990 669
1003 | 990 754
1004 | 990 934
1005 | 992 274
1006 | 996 251
1007 | 996 311
1008 | 996 407
1009 | 996 510
1010 | 996 540
1011 | 996 541
1012 | 996 593
1013 | 996 686
1014 | 996 687
1015 | 996 780
1016 | 996 819
1017 | 996 855
1018 | 996 860
1019 | 996 901
1020 |
--------------------------------------------------------------------------------
/sample/graph.edgelist:
--------------------------------------------------------------------------------
1 | 5 95
2 | 5 147
3 | 5 771
4 | 5 878
5 | 8 154
6 | 8 175
7 | 12 47
8 | 12 195
9 | 18 874
10 | 28 929
11 | 32 216
12 | 47 879
13 | 51 94
14 | 58 131
15 | 59 51
16 | 59 94
17 | 59 164
18 | 62 1
19 | 62 3
20 | 62 4
21 | 62 5
22 | 62 6
23 | 62 7
24 | 62 8
25 | 62 9
26 | 62 10
27 | 62 11
28 | 62 12
29 | 62 17
30 | 62 18
31 | 62 19
32 | 62 21
33 | 62 23
34 | 62 24
35 | 62 26
36 | 62 28
37 | 62 32
38 | 62 34
39 | 62 35
40 | 62 36
41 | 62 37
42 | 62 38
43 | 62 39
44 | 62 40
45 | 62 41
46 | 62 42
47 | 62 43
48 | 62 44
49 | 62 45
50 | 62 46
51 | 62 47
52 | 62 48
53 | 62 49
54 | 62 50
55 | 62 51
56 | 62 52
57 | 62 53
58 | 62 54
59 | 62 55
60 | 62 56
61 | 62 57
62 | 62 58
63 | 62 59
64 | 62 60
65 | 62 61
66 | 62 63
67 | 62 64
68 | 62 65
69 | 62 66
70 | 62 67
71 | 62 68
72 | 62 69
73 | 62 70
74 | 62 71
75 | 62 72
76 | 62 74
77 | 62 75
78 | 62 77
79 | 62 78
80 | 62 79
81 | 62 82
82 | 62 88
83 | 62 90
84 | 62 91
85 | 62 92
86 | 62 93
87 | 62 94
88 | 62 95
89 | 62 96
90 | 62 99
91 | 62 100
92 | 62 101
93 | 62 103
94 | 62 104
95 | 62 105
96 | 62 106
97 | 62 107
98 | 62 109
99 | 62 111
100 | 62 112
101 | 62 113
102 | 62 114
103 | 62 115
104 | 62 116
105 | 62 117
106 | 62 118
107 | 62 119
108 | 62 121
109 | 62 123
110 | 62 124
111 | 62 126
112 | 62 127
113 | 62 128
114 | 62 130
115 | 62 131
116 | 62 132
117 | 62 133
118 | 62 135
119 | 62 136
120 | 62 138
121 | 62 142
122 | 62 144
123 | 62 147
124 | 62 148
125 | 62 149
126 | 62 150
127 | 62 151
128 | 62 152
129 | 62 153
130 | 62 154
131 | 62 155
132 | 62 156
133 | 62 157
134 | 62 159
135 | 62 160
136 | 62 161
137 | 62 162
138 | 62 164
139 | 62 165
140 | 62 166
141 | 62 167
142 | 62 169
143 | 62 171
144 | 62 172
145 | 62 173
146 | 62 175
147 | 62 176
148 | 62 178
149 | 62 183
150 | 62 186
151 | 62 187
152 | 62 188
153 | 62 189
154 | 62 190
155 | 62 191
156 | 62 192
157 | 62 193
158 | 62 194
159 | 62 195
160 | 62 196
161 | 62 197
162 | 62 198
163 | 62 199
164 | 62 201
165 | 62 202
166 | 62 203
167 | 62 205
168 | 62 208
169 | 62 209
170 | 62 210
171 | 62 211
172 | 62 212
173 | 62 213
174 | 62 214
175 | 62 215
176 | 62 216
177 | 62 217
178 | 62 218
179 | 62 219
180 | 62 220
181 | 62 222
182 | 62 252
183 | 62 629
184 | 62 771
185 | 62 805
186 | 62 874
187 | 62 879
188 | 62 886
189 | 62 949
190 | 63 21
191 | 63 138
192 | 63 164
193 | 63 213
194 | 71 49
195 | 71 151
196 | 75 169
197 | 75 252
198 | 78 194
199 | 78 212
200 | 82 214
201 | 104 929
202 | 107 18
203 | 112 39
204 | 112 44
205 | 112 57
206 | 112 213
207 | 113 805
208 | 113 874
209 | 113 886
210 | 131 58
211 | 131 128
212 | 138 63
213 | 138 213
214 | 138 886
215 | 140 143
216 | 147 5
217 | 147 95
218 | 147 771
219 | 153 175
220 | 164 214
221 | 166 949
222 | 169 61
223 | 169 75
224 | 169 252
225 | 171 114
226 | 175 153
227 | 189 926
228 | 194 78
229 | 194 212
230 | 196 119
231 | 196 162
232 | 196 805
233 | 196 874
234 | 196 879
235 | 196 886
236 | 199 67
237 | 199 88
238 | 203 150
239 | 215 203
240 | 219 135
241 | 219 201
242 | 224 626
243 | 249 308
244 | 249 349
245 | 249 562
246 | 249 622
247 | 249 669
248 | 249 754
249 | 249 934
250 | 249 990
251 | 251 311
252 | 251 407
253 | 251 510
254 | 251 540
255 | 251 541
256 | 251 593
257 | 251 686
258 | 251 687
259 | 251 780
260 | 251 819
261 | 251 855
262 | 251 860
263 | 251 901
264 | 251 996
265 | 252 169
266 | 283 976
267 | 290 158
268 | 294 557
269 | 303 366
270 | 308 378
271 | 311 251
272 | 311 407
273 | 311 510
274 | 311 540
275 | 311 541
276 | 311 593
277 | 311 686
278 | 311 687
279 | 311 780
280 | 311 819
281 | 311 855
282 | 311 860
283 | 311 901
284 | 311 996
285 | 313 396
286 | 316 676
287 | 316 688
288 | 316 808
289 | 316 913
290 | 318 364
291 | 318 476
292 | 318 519
293 | 318 696
294 | 321 378
295 | 332 378
296 | 352 378
297 | 356 713
298 | 378 308
299 | 378 634
300 | 378 669
301 | 378 707
302 | 378 976
303 | 386 434
304 | 396 313
305 | 400 345
306 | 402 383
307 | 407 251
308 | 407 311
309 | 407 510
310 | 407 540
311 | 407 541
312 | 407 593
313 | 407 686
314 | 407 687
315 | 407 780
316 | 407 819
317 | 407 855
318 | 407 860
319 | 407 901
320 | 407 996
321 | 411 873
322 | 432 433
323 | 433 432
324 | 434 386
325 | 439 441
326 | 439 442
327 | 440 439
328 | 440 441
329 | 440 442
330 | 441 439
331 | 441 442
332 | 442 439
333 | 442 441
334 | 447 720
335 | 448 479
336 | 455 454
337 | 476 318
338 | 479 713
339 | 484 818
340 | 489 928
341 | 510 251
342 | 510 311
343 | 510 407
344 | 510 540
345 | 510 541
346 | 510 593
347 | 510 686
348 | 510 687
349 | 510 780
350 | 510 819
351 | 510 855
352 | 510 860
353 | 510 901
354 | 510 996
355 | 513 404
356 | 519 318
357 | 519 364
358 | 534 535
359 | 540 251
360 | 540 311
361 | 540 407
362 | 540 510
363 | 540 541
364 | 540 593
365 | 540 686
366 | 540 687
367 | 540 780
368 | 540 819
369 | 540 855
370 | 540 860
371 | 540 901
372 | 540 996
373 | 541 251
374 | 541 311
375 | 541 407
376 | 541 510
377 | 541 540
378 | 541 593
379 | 541 686
380 | 541 687
381 | 541 780
382 | 541 819
383 | 541 855
384 | 541 860
385 | 541 901
386 | 541 996
387 | 593 251
388 | 593 311
389 | 593 407
390 | 593 510
391 | 593 540
392 | 593 541
393 | 593 686
394 | 593 687
395 | 593 780
396 | 593 819
397 | 593 855
398 | 593 860
399 | 593 901
400 | 593 996
401 | 601 856
402 | 605 576
403 | 607 308
404 | 607 378
405 | 607 578
406 | 607 611
407 | 608 502
408 | 610 607
409 | 610 611
410 | 611 607
411 | 611 610
412 | 611 714
413 | 620 623
414 | 665 319
415 | 665 479
416 | 667 532
417 | 676 316
418 | 676 688
419 | 676 808
420 | 676 913
421 | 681 621
422 | 686 251
423 | 686 311
424 | 686 407
425 | 686 510
426 | 686 540
427 | 686 541
428 | 686 593
429 | 686 687
430 | 686 780
431 | 686 819
432 | 686 855
433 | 686 860
434 | 686 901
435 | 686 996
436 | 687 251
437 | 687 311
438 | 687 407
439 | 687 510
440 | 687 540
441 | 687 541
442 | 687 593
443 | 687 686
444 | 687 780
445 | 687 819
446 | 687 855
447 | 687 860
448 | 687 901
449 | 687 996
450 | 707 308
451 | 707 378
452 | 707 708
453 | 707 734
454 | 707 735
455 | 720 356
456 | 720 713
457 | 721 725
458 | 722 605
459 | 722 725
460 | 723 622
461 | 725 721
462 | 771 5
463 | 771 95
464 | 771 147
465 | 771 164
466 | 780 251
467 | 780 311
468 | 780 407
469 | 780 510
470 | 780 540
471 | 780 541
472 | 780 593
473 | 780 686
474 | 780 687
475 | 780 819
476 | 780 855
477 | 780 860
478 | 780 901
479 | 780 996
480 | 798 146
481 | 798 873
482 | 802 651
483 | 805 18
484 | 805 874
485 | 813 844
486 | 818 484
487 | 819 251
488 | 819 311
489 | 819 407
490 | 819 510
491 | 819 540
492 | 819 541
493 | 819 593
494 | 819 686
495 | 819 687
496 | 819 780
497 | 819 855
498 | 819 860
499 | 819 901
500 | 819 996
501 | 832 831
502 | 855 251
503 | 855 311
504 | 855 407
505 | 855 510
506 | 855 540
507 | 855 541
508 | 855 593
509 | 855 686
510 | 855 687
511 | 855 780
512 | 855 819
513 | 855 860
514 | 855 901
515 | 855 996
516 | 856 601
517 | 860 251
518 | 860 311
519 | 860 407
520 | 860 510
521 | 860 540
522 | 860 541
523 | 860 593
524 | 860 686
525 | 860 687
526 | 860 780
527 | 860 819
528 | 860 855
529 | 860 901
530 | 860 996
531 | 861 862
532 | 862 861
533 | 874 18
534 | 874 805
535 | 874 886
536 | 878 21
537 | 878 23
538 | 878 28
539 | 878 35
540 | 878 45
541 | 878 59
542 | 878 62
543 | 878 68
544 | 878 82
545 | 878 92
546 | 878 93
547 | 878 114
548 | 878 153
549 | 878 164
550 | 878 166
551 | 878 187
552 | 878 189
553 | 878 194
554 | 878 219
555 | 878 771
556 | 878 886
557 | 886 805
558 | 886 874
559 | 899 321
560 | 899 474
561 | 901 251
562 | 901 311
563 | 901 407
564 | 901 510
565 | 901 540
566 | 901 541
567 | 901 593
568 | 901 686
569 | 901 687
570 | 901 780
571 | 901 819
572 | 901 855
573 | 901 860
574 | 901 996
575 | 911 912
576 | 912 665
577 | 912 911
578 | 921 922
579 | 921 923
580 | 921 924
581 | 921 925
582 | 921 926
583 | 921 927
584 | 921 928
585 | 921 929
586 | 921 930
587 | 921 931
588 | 921 932
589 | 921 933
590 | 921 934
591 | 921 935
592 | 921 936
593 | 921 937
594 | 921 938
595 | 921 939
596 | 921 940
597 | 921 941
598 | 922 921
599 | 922 923
600 | 922 924
601 | 922 925
602 | 922 926
603 | 922 927
604 | 922 928
605 | 922 929
606 | 922 931
607 | 922 932
608 | 922 933
609 | 922 934
610 | 922 935
611 | 922 936
612 | 922 937
613 | 922 938
614 | 922 939
615 | 922 940
616 | 922 941
617 | 923 489
618 | 923 921
619 | 923 922
620 | 923 924
621 | 923 925
622 | 923 926
623 | 923 927
624 | 923 928
625 | 923 929
626 | 923 931
627 | 923 932
628 | 923 933
629 | 923 934
630 | 923 935
631 | 923 936
632 | 923 937
633 | 923 938
634 | 923 939
635 | 923 940
636 | 923 941
637 | 924 921
638 | 924 922
639 | 924 923
640 | 924 925
641 | 924 926
642 | 924 927
643 | 924 928
644 | 924 929
645 | 924 931
646 | 924 932
647 | 924 933
648 | 924 934
649 | 924 935
650 | 924 936
651 | 924 937
652 | 924 938
653 | 924 939
654 | 924 940
655 | 924 941
656 | 924 942
657 | 925 921
658 | 925 922
659 | 925 923
660 | 925 924
661 | 925 926
662 | 925 927
663 | 925 928
664 | 925 929
665 | 925 931
666 | 925 932
667 | 925 933
668 | 925 934
669 | 925 935
670 | 925 936
671 | 925 937
672 | 925 938
673 | 925 939
674 | 925 940
675 | 925 941
676 | 926 921
677 | 926 922
678 | 926 923
679 | 926 924
680 | 926 925
681 | 926 927
682 | 926 928
683 | 926 929
684 | 926 931
685 | 926 932
686 | 926 933
687 | 926 934
688 | 926 935
689 | 926 936
690 | 926 937
691 | 926 938
692 | 926 939
693 | 926 940
694 | 926 941
695 | 927 921
696 | 927 922
697 | 927 923
698 | 927 924
699 | 927 925
700 | 927 926
701 | 927 928
702 | 927 929
703 | 927 931
704 | 927 932
705 | 927 933
706 | 927 934
707 | 927 935
708 | 927 936
709 | 927 937
710 | 927 938
711 | 927 939
712 | 927 940
713 | 927 941
714 | 928 921
715 | 928 922
716 | 928 923
717 | 928 924
718 | 928 925
719 | 928 926
720 | 928 927
721 | 928 929
722 | 928 931
723 | 928 932
724 | 928 933
725 | 928 934
726 | 928 935
727 | 928 936
728 | 928 937
729 | 928 938
730 | 928 939
731 | 928 940
732 | 928 941
733 | 929 28
734 | 929 69
735 | 929 102
736 | 929 104
737 | 929 157
738 | 929 896
739 | 929 921
740 | 929 922
741 | 929 923
742 | 929 924
743 | 929 925
744 | 929 926
745 | 929 927
746 | 929 928
747 | 929 931
748 | 929 932
749 | 929 933
750 | 929 934
751 | 929 935
752 | 929 936
753 | 929 937
754 | 929 938
755 | 929 939
756 | 929 940
757 | 929 941
758 | 930 921
759 | 930 922
760 | 930 923
761 | 930 924
762 | 930 925
763 | 930 926
764 | 930 927
765 | 930 928
766 | 930 929
767 | 930 931
768 | 930 932
769 | 930 933
770 | 930 934
771 | 930 935
772 | 930 936
773 | 930 937
774 | 930 938
775 | 930 939
776 | 930 940
777 | 930 941
778 | 931 921
779 | 931 922
780 | 931 923
781 | 931 924
782 | 931 925
783 | 931 926
784 | 931 927
785 | 931 928
786 | 931 929
787 | 931 932
788 | 931 933
789 | 931 934
790 | 931 935
791 | 931 936
792 | 931 937
793 | 931 938
794 | 931 939
795 | 931 940
796 | 931 941
797 | 932 921
798 | 932 922
799 | 932 923
800 | 932 924
801 | 932 925
802 | 932 926
803 | 932 927
804 | 932 928
805 | 932 929
806 | 932 931
807 | 932 933
808 | 932 934
809 | 932 935
810 | 932 936
811 | 932 937
812 | 932 938
813 | 932 939
814 | 932 940
815 | 932 941
816 | 933 921
817 | 933 922
818 | 933 923
819 | 933 924
820 | 933 925
821 | 933 926
822 | 933 927
823 | 933 928
824 | 933 929
825 | 933 931
826 | 933 932
827 | 933 934
828 | 933 935
829 | 933 936
830 | 933 937
831 | 933 938
832 | 933 939
833 | 933 940
834 | 933 941
835 | 934 169
836 | 934 252
837 | 934 921
838 | 934 922
839 | 934 923
840 | 934 924
841 | 934 925
842 | 934 926
843 | 934 927
844 | 934 928
845 | 934 929
846 | 934 931
847 | 934 932
848 | 934 933
849 | 934 935
850 | 934 936
851 | 934 937
852 | 934 938
853 | 934 939
854 | 934 940
855 | 934 941
856 | 935 809
857 | 935 921
858 | 935 922
859 | 935 923
860 | 935 924
861 | 935 925
862 | 935 926
863 | 935 927
864 | 935 928
865 | 935 929
866 | 935 931
867 | 935 932
868 | 935 933
869 | 935 934
870 | 935 936
871 | 935 937
872 | 935 938
873 | 935 939
874 | 935 940
875 | 935 941
876 | 936 921
877 | 936 922
878 | 936 923
879 | 936 924
880 | 936 925
881 | 936 926
882 | 936 927
883 | 936 928
884 | 936 929
885 | 936 931
886 | 936 932
887 | 936 933
888 | 936 934
889 | 936 935
890 | 936 937
891 | 936 938
892 | 936 939
893 | 936 940
894 | 936 941
895 | 937 921
896 | 937 922
897 | 937 923
898 | 937 924
899 | 937 925
900 | 937 926
901 | 937 927
902 | 937 928
903 | 937 929
904 | 937 931
905 | 937 932
906 | 937 933
907 | 937 934
908 | 937 935
909 | 937 936
910 | 937 938
911 | 937 939
912 | 937 940
913 | 937 941
914 | 938 79
915 | 938 921
916 | 938 922
917 | 938 923
918 | 938 924
919 | 938 925
920 | 938 926
921 | 938 927
922 | 938 928
923 | 938 929
924 | 938 931
925 | 938 932
926 | 938 933
927 | 938 934
928 | 938 935
929 | 938 936
930 | 938 937
931 | 938 939
932 | 938 940
933 | 938 941
934 | 939 921
935 | 939 922
936 | 939 923
937 | 939 924
938 | 939 925
939 | 939 926
940 | 939 927
941 | 939 928
942 | 939 929
943 | 939 931
944 | 939 932
945 | 939 933
946 | 939 934
947 | 939 935
948 | 939 936
949 | 939 937
950 | 939 938
951 | 939 940
952 | 939 941
953 | 940 921
954 | 940 922
955 | 940 923
956 | 940 924
957 | 940 925
958 | 940 926
959 | 940 927
960 | 940 928
961 | 940 929
962 | 940 931
963 | 940 932
964 | 940 933
965 | 940 934
966 | 940 935
967 | 940 936
968 | 940 937
969 | 940 938
970 | 940 939
971 | 940 941
972 | 941 921
973 | 941 922
974 | 941 923
975 | 941 924
976 | 941 925
977 | 941 926
978 | 941 927
979 | 941 928
980 | 941 929
981 | 941 931
982 | 941 932
983 | 941 933
984 | 941 934
985 | 941 935
986 | 941 936
987 | 941 937
988 | 941 938
989 | 941 939
990 | 941 940
991 | 953 954
992 | 954 953
993 | 964 146
994 | 964 479
995 | 990 140
996 | 990 249
997 | 990 308
998 | 990 319
999 | 990 326
1000 | 990 543
1001 | 990 579
1002 | 990 669
1003 | 990 754
1004 | 990 934
1005 | 992 274
1006 | 996 251
1007 | 996 311
1008 | 996 407
1009 | 996 510
1010 | 996 540
1011 | 996 541
1012 | 996 593
1013 | 996 686
1014 | 996 687
1015 | 996 780
1016 | 996 819
1017 | 996 855
1018 | 996 860
1019 | 996 901
1020 |
--------------------------------------------------------------------------------
/AttriRank_inC/AttriRank.cpp:
--------------------------------------------------------------------------------
1 | /* =========================================================================================================
2 |
3 | C++ Implementation of AttriRank
4 | Author: Ming-Han Feng
5 |
6 | for more details, please refer to the paper:
7 | Unsupervised Ranking using Graph Structures and Node Attributes
8 | Chin-Chi Hsu, Yi-An Lai, Wen-Hao Chen, Ming-Han Feng, and Shou-De Lin
9 | Web Search and Data Mining (WSDM), 2017
10 |
11 | === Requirements: g++ ===
12 | compile: g++ -std=c++11 -O2 AttriRank.cpp -o AttriRank
13 | usage: AttriRank EdgeFile AttriFile [options]
14 |
15 | << options >>
16 | --unweighted (none) graph is unweighted (default: weighted)
17 | --undirected (none) graph is undirected (default: directed)
18 | -k, --kernel [rbf_ap|rbf|cosine] kernel used in AttriRank (default: rbf_ap)
19 | -i, --iter [MaximumIterations] maximum number of iterations in power method (default: 100)
20 | -c, --conv [ConvergenceThreshold] the convergence threshold in power method (default: 1.0e-6)
21 | -d, --damp [start,step,end] damping factor (default: 0.0,0.2,1.0)
22 | -t, --total [alpha,beta] TotalRank parameters (default: 1,1)
23 |
24 | e.g. AttriRank graph.edge graph.attri -d 0.7,0.02,0.9 -t 1e-9 --unweighted
25 | e.g. AttriRank edge.txt attri.txt --undirected -i 200 -k rbf
26 |
27 | === EdgeFile format ===
28 | EachLine: NodeFromID NodeToID (weight)
29 | Note: the weight is set to 1.0 in weighted version if there is no third value provided in a line
30 |
31 | e.g. 0 1
32 | 2 3
33 | 2 4
34 | e.g. 1 2 0.1
35 | 3 0 0.5
36 | 3 1 3
37 |
38 | === AttriFile format ===
39 | FirstLine: AttributesCount
40 | Remaining: NodeID AttriIndex:AttriValue ...
41 | Note: unspecified entries will be set to 0.0
42 |
43 | e.g. 1606
44 | 41407 34:1 33:1 32:1 31:1 27:1 28:8 29:1 30:1
45 | 41380 17:240 16:1 114:2 8:2250 7:1 14:1 60:1 0:1 121:1 120:3 15:35 61:2 9:1 12:12 13:1
46 | e.g. 234
47 | 2 5:0.85 6:-1.43 7:1.84 8:5.64 10:9.27 11:9.18
48 | 1 0:1.79 1:1.79 2:0.00 3:0.00 4:1.00 5:1.00 6:-2.83
49 |
50 | === Output format ===
51 | FileName: attrirank_(DampingFactor).txt / attrirank_total.txt
52 | EachLine: NodeID AttriRankScore
53 |
54 | === Miscellaneous ===
55 | This implementation uses L1-Norm to check convergence, NodeCount * ConvergenceThreshold.
56 |
57 | ========================================================================================================= */
58 |
59 | #include
60 | #include
61 | #include
62 | #include
63 | #include
64 | #include
65 | #include
66 | #include
67 | #define MAXLINELEN 8000
68 | #define MAXPATHLEN 160
69 |
70 | struct Node {
71 | static std::forward_list dangle;
72 | static std::forward_list normal;
73 | static std::valarray piNew;
74 | static std::valarray piOld;
75 | int id;
76 | std::unordered_map outedge;
77 | std::valarray attriVector;
78 | double *xOld;
79 | double *xNew;
80 |
81 | Node(int &i, int &count, std::forward_list &nodeList): id(i) {
82 | attriVector = std::valarray(0.0, count);
83 | nodeList.push_front(this);
84 | }
85 | ~Node() {}
86 | void setTransition(void) {
87 | xOld = &piOld[id];
88 | xNew = &piNew[id];
89 | if (outedge.empty()) {
90 | dangle.push_front(this);
91 | } else {
92 | normal.push_front(this);
93 | double sum = 0.0;
94 | for (auto &x: outedge)
95 | sum += x.second;
96 | for (auto &x: outedge)
97 | x.second /= sum;
98 | }
99 | return;
100 | }
101 | };
102 | std::forward_list Node::dangle;
103 | std::forward_list Node::normal;
104 | std::valarray Node::piNew;
105 | std::valarray Node::piOld;
106 |
107 | void runAttriRank(const std::valarray &provec, const double &damp, const int &maxiter, const double threshold) {
108 | printf("\tDampingFactor: %.2f\n", damp);
109 | if (damp == 0.0) {
110 | Node::piNew = provec;
111 | return;
112 | }
113 | Node::piOld = 1.0 / static_cast(provec.size());
114 | for (int iteration = 1; iteration <= maxiter; ++iteration) {
115 | const double&& dangleSum = [] () { double s = 0.0; for (Node *v: Node::dangle) s += *v->xOld; return s; } ();
116 | Node::piNew = (dangleSum * damp + (1.0 - damp)) * provec;
117 | for (Node *v: Node::normal) {
118 | const double&& dampScore = damp * *v->xOld;
119 | for (auto &x: v->outedge)
120 | *x.first->xNew += x.second * dampScore;
121 | }
122 | const double&& err = std::abs(Node::piNew - Node::piOld).sum();
123 | if (err < threshold) return;
124 | Node::piOld = Node::piNew;
125 | }
126 | printf("\t\tfailed to converge in %d iterations.\n", maxiter);
127 | return;
128 | }
129 |
130 | void runTotalRank(const std::valarray &provec, const int &alpha, const int &beta, const int &maxiter, const double threshold) {
131 | printf("\tTotalRank: (alpha=%d, beta=%d)\n", alpha, beta);
132 | // Node::piOld is used as pho_current in this approach.
133 | Node::piNew = Node::piOld = (static_cast(beta) / static_cast(alpha + beta)) * provec;
134 | for (int iteration = 1; iteration <= maxiter; ++iteration) {
135 | const double&& dangleSum = [] () { double s = 0.0; for (Node *v: Node::dangle) s += *v->xOld; return s; } ();
136 | std::valarray&& pho = dangleSum * provec;
137 | for (Node *v: Node::normal) {
138 | for (auto &x: v->outedge)
139 | pho[x.first->id] += x.second * *v->xOld;
140 | }
141 | pho *= static_cast(iteration + alpha - 1) / static_cast(iteration + alpha + beta);
142 | Node::piNew += pho;
143 | const double&& err = pho.sum();
144 | if (err < threshold) return;
145 | Node::piOld = pho;
146 | }
147 | printf("\t\tfailed to converge in %d iterations.\n", maxiter);
148 | return;
149 | }
150 |
151 | void outputFile(const char *fileName, std::forward_list &nodeList) {
152 | nodeList.sort([] (Node *a, Node *b) { return (*a->xNew > *b->xNew); });
153 | FILE *fp = fopen(fileName, "w");
154 | for (Node *v: nodeList)
155 | fprintf(fp, "%d %e\n", v->id, *v->xNew);
156 | fclose(fp);
157 | return;
158 | }
159 |
160 | inline int wrongFormat(char *opt) {
161 | printf(">>> option '%s' needs parameter(s)\n", opt);
162 | return 0;
163 | }
164 |
165 | int main(int argc, char **argv) {
166 | if (argc < 3) {
167 | printf(">>> The program needs at least 2 arguments: EdgeFile & AttriFile\n");
168 | return 0;
169 | }
170 | char argKernel[MAXPATHLEN] = "rbf_ap";
171 | bool unweighted = false;
172 | bool undirected = false;
173 | double converg = 1.0e-6;
174 | double damp[3] = { 0.0, 0.2, 1.0 };
175 | int param[2] = { 1, 1 };
176 | int maxiter = 100;
177 | for (int i = 3; i < argc; ++i) {
178 | if ((strcmp("-d", argv[i]) == 0) or (strcmp("--damp", argv[i]) == 0)) {
179 | if (++i >= argc) return wrongFormat(argv[i - 1]);
180 | if (3 != sscanf(argv[i], "%lf,%lf,%lf", &damp[0], &damp[1], &damp[2])) {
181 | damp[0] = 0.0; damp[1] = 0.2; damp[2] = 1.0;
182 | }
183 | if (damp[0] < 0.0) damp[0] = 0.0;
184 | if (damp[2] > 1.0) damp[2] = 1.0;
185 | } else if (strcmp("--unweighted", argv[i]) == 0) {
186 | unweighted = true;
187 | } else if (strcmp("--undirected", argv[i]) == 0) {
188 | undirected = true;
189 | } else if ((strcmp("-k", argv[i]) == 0) or (strcmp("--kernel", argv[i]) == 0)) {
190 | if (++i >= argc) return wrongFormat(argv[i - 1]);
191 | strncpy(argKernel, argv[i], MAXPATHLEN - 1);
192 | } else if ((strcmp("-i", argv[i]) == 0) or (strcmp("--iter", argv[i]) == 0)) {
193 | if (++i >= argc) return wrongFormat(argv[i - 1]);
194 | maxiter = atoi(argv[i]);
195 | if (maxiter < 0) maxiter = 100;
196 | } else if ((strcmp("-c", argv[i]) == 0) or (strcmp("--conv", argv[i]) == 0)) {
197 | if (++i >= argc) return wrongFormat(argv[i - 1]);
198 | converg = atof(argv[i]);
199 | if (converg < 0) converg = 1.0e-6;
200 | } else if ((strcmp("-t", argv[i]) == 0) or (strcmp("--total", argv[i]) == 0)) {
201 | if (++i >= argc) return wrongFormat(argv[i - 1]);
202 | if (2 != sscanf(argv[i], "%d,%d", ¶m[0], ¶m[1])) {
203 | param[0] = 1; param[1] = 1;
204 | }
205 | if (param[0] < 0) param[0] = 1;
206 | if (param[1] < 0) param[1] = 1;
207 | } else {
208 | printf("\tunknown argument: %s\n", argv[i]);
209 | }
210 | }
211 | printf("[GraphType] %s + %s\n", unweighted ? "unweighted" : "weighted", undirected ? "undirected" : "directed");
212 | printf("[MaxIterations] %d\n", maxiter);
213 | printf("[ConvThreshold] %.2e\n", converg);
214 | // args parse end
215 | std::unordered_map nodes;
216 | std::forward_list nodeList;
217 | /* AttriFile */
218 | int attriCount;
219 | {
220 | int u, a, s, i;
221 | double f; char buff[MAXLINELEN];
222 | FILE *fp = fopen(argv[2], "r");
223 | fgets(buff, MAXLINELEN - 1, fp);
224 | sscanf(buff, "%d", &attriCount);
225 | printf("AttriCount: %d\n", attriCount);
226 | while (fgets(buff, MAXLINELEN - 1, fp) != NULL) {
227 | sscanf(buff, "%d%n", &u, &i);
228 | if (nodes.count(u) == 0) nodes[u] = new Node(u, attriCount, nodeList);
229 | while (sscanf(buff + i, "%d:%lf%n", &a, &f, &s) == 2) {
230 | nodes[u]->attriVector[a] = f;
231 | i += s;
232 | }
233 | }
234 | fclose(fp);
235 | }
236 | /* EdgeFile */
237 | {
238 | int u, v, arg;
239 | double w; char buff[MAXLINELEN];
240 | FILE *fp = fopen(argv[1], "r");
241 | while (fgets(buff, MAXLINELEN - 1, fp) != NULL) {
242 | arg = sscanf(buff, "%d %d %lf", &u, &v, &w);
243 | if (nodes.count(u) == 0) nodes[u] = new Node(u, attriCount, nodeList);
244 | if (nodes.count(v) == 0) nodes[v] = new Node(v, attriCount, nodeList);
245 | if (unweighted) {
246 | nodes[u]->outedge[nodes[v]] = 1.0;
247 | if (undirected)
248 | nodes[v]->outedge[nodes[u]] = 1.0;
249 | } else {
250 | if (arg == 2) w = 1.0;
251 | nodes[u]->outedge[nodes[v]] += w;
252 | if (undirected)
253 | nodes[v]->outedge[nodes[u]] += w;
254 | }
255 | }
256 | fclose(fp);
257 | }
258 | const double&& nodeCount = static_cast(nodes.size());
259 | /* Standardization */
260 | for (int i = 0; i < attriCount; ++i) {
261 | double e1x = 0.0;
262 | double e2x = 0.0;
263 | for (Node *v: nodeList) {
264 | e1x += v->attriVector[i];
265 | e2x += v->attriVector[i] * v->attriVector[i];
266 | }
267 | const double&& mean = e1x / nodeCount;
268 | const double&& std = std::sqrt(e2x / nodeCount - mean * mean);
269 | for (Node *v: nodeList)
270 | v->attriVector[i] = (std > 0.0) ? ((v->attriVector[i] - mean) / std) : 0.0;
271 | }
272 | /* TransitionMatrix */
273 | printf("Generate Transition Matrix\n");
274 | Node::piNew = std::valarray(nodes.size());
275 | Node::piOld = std::valarray(nodes.size());
276 | for (Node *v: nodeList)
277 | v->setTransition();
278 | const double&& gamma = 1.0 / attriCount;
279 | std::valarray resetVec(0.0, nodes.size());
280 | int transProcess = static_cast(nodes.size());
281 | if (strcmp("rbf", argKernel) == 0) {
282 | printf("\tusing RBF kernel\n");
283 | for (auto it1 = nodeList.begin(); it1 != nodeList.end(); ++it1) {
284 | resetVec[(*it1)->id] += 1.0;
285 | for (auto it2 = std::next(it1); it2 != nodeList.end(); ++it2) {
286 | double&& s12 = [&gamma] (const Node *a, const Node *b) {
287 | return std::exp(-gamma * std::pow(a->attriVector - b->attriVector, 2.0).sum());
288 | } (*it1, *it2);
289 | resetVec[(*it1)->id] += s12;
290 | resetVec[(*it2)->id] += s12;
291 | }
292 | printf("\r\tremain: %7d", --transProcess);
293 | }
294 | } else if (strcmp("cosine", argKernel) == 0) {
295 | printf("\tusing Cosine similarity kernel\n");
296 | std::valarray unitSum(0.0, attriCount);
297 | for (Node *v: nodeList) {
298 | v->attriVector /= std::sqrt(std::pow(v->attriVector, 2.0).sum());
299 | unitSum += v->attriVector;
300 | printf("\r\tremain(1/2): %7d", --transProcess);
301 | }
302 | transProcess = static_cast(nodes.size());
303 | for (Node *v: nodeList) {
304 | resetVec[v->id] = ((v->attriVector * unitSum).sum() + nodeCount) / 2.0;
305 | printf("\r\tremain(2/2): %7d", --transProcess);
306 | }
307 | } else {
308 | printf("\tusing RBF kernel (approximation)\n");
309 | std::valarray scalarW(nodes.size());
310 | for (Node *v: nodeList)
311 | scalarW[v->id] = std::exp(-gamma * std::pow(v->attriVector, 2.0).sum());
312 | std::valarray vectorB(0.0, attriCount);
313 | std::valarray *matrixC = new std::valarray[attriCount];
314 | for (int i = 0; i < attriCount; ++i)
315 | matrixC[i] = std::valarray(0.0, attriCount);
316 | for (Node *v: nodeList) {
317 | std::valarray&& wx = scalarW[v->id] * v->attriVector;
318 | vectorB += wx;
319 | for (int i = 0; i < attriCount; ++i)
320 | matrixC[i] += wx * v->attriVector[i];
321 | printf("\r\tremain(1/2): %7d", --transProcess);
322 | }
323 | vectorB *= 2.0 * gamma;
324 | for (int i = 0; i < attriCount; ++i)
325 | matrixC[i] *= 2.0 * gamma * gamma;
326 | double&& scalarA = scalarW.sum();
327 | transProcess = static_cast(nodes.size());
328 | for (Node *v: nodeList) {
329 | std::valarray Cx(attriCount);
330 | for (int i = 0; i < attriCount; ++i)
331 | Cx[i] = (matrixC[i] * v->attriVector).sum();
332 | resetVec[v->id] = scalarW[v->id] * (scalarA + (v->attriVector * (vectorB + Cx)).sum());
333 | printf("\r\tremain(2/2): %7d", --transProcess);
334 | }
335 | delete[] matrixC;
336 | }
337 | putchar('\n'); resetVec /= resetVec.sum();
338 | /* AttriRank */
339 | printf("Run AttriRank Model\n");
340 | for (double df = damp[0]; df <= damp[2]; df += damp[1]) {
341 | runAttriRank(resetVec, df, maxiter, nodeCount * converg);
342 | char fileName[40];
343 | sprintf(fileName, "attrirank_%.3f.txt", df);
344 | outputFile(fileName, nodeList);
345 | }
346 | runTotalRank(resetVec, param[0], param[1], maxiter, nodeCount * converg);
347 | outputFile("attrirank_total.txt", nodeList);
348 | return 0;
349 | }
350 |
--------------------------------------------------------------------------------