├── .gitignore ├── BlogCatalog.mat ├── GraphUCB.py ├── README.md └── Simulation.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | Out/ 3 | *.pyc 4 | build/ 5 | dist/ 6 | *.egg-info/ 7 | -------------------------------------------------------------------------------- /BlogCatalog.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaize0409/GraphUCB_AnomalyDetection/68863b1210e0d60384a48f6ad5e4ca84b50886d6/BlogCatalog.mat -------------------------------------------------------------------------------- /GraphUCB.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import OrderedDict 3 | 4 | 5 | class GraphUCB: 6 | def __init__(self, context_dimension, arm_num, W, all_nodes, ALPHA, BETA, LAMBDA, RHO): 7 | self.context_dimension = context_dimension 8 | self.W = W 9 | self.arm_num = arm_num 10 | self.ALPHA = ALPHA 11 | self.BETA = BETA 12 | self.LAMBDA = LAMBDA 13 | self.RHO = RHO 14 | self.selected_nodes = [] 15 | self.all_nodes = all_nodes 16 | 17 | A = LAMBDA * np.identity(n=context_dimension) 18 | self.A1s = [A] * self.arm_num 19 | self.b1s = [np.zeros(context_dimension)] * self.arm_num 20 | self.A1Invs = [np.linalg.inv(A)] * self.arm_num 21 | 22 | self.A2s = [A] * self.arm_num 23 | self.b2s = [np.zeros(context_dimension)] * self.arm_num 24 | self.A2Invs = [np.linalg.inv(A)] * self.arm_num 25 | 26 | self.thetas1 = [np.zeros(shape=context_dimension)] * self.arm_num 27 | self.thetas2 = [np.zeros(shape=context_dimension)] * self.arm_num 28 | 29 | print ("Finish Initialization") 30 | 31 | def getProb(self, node): 32 | """ 33 | :param alpha: 34 | :param node: 35 | :return: 36 | """ 37 | arm_index = node.cluster 38 | mean1 = np.dot(self.thetas1[arm_index], node.contextFeatureVector) 39 | var1 = np.sqrt(np.dot(np.dot(node.contextFeatureVector, self.A1Invs[arm_index]), node.contextFeatureVector)) 40 | 41 | neighborsFeatureVectorList = self.getNeighborsFeatureVectorList(node) 42 | neighborsFeatureVector = self.subtract(node, neighborsFeatureVectorList) 43 | # neighborsFeatureVector = self.average(neighborsFeatureVectorList) 44 | 45 | mean2 = np.dot(self.thetas2[arm_index], neighborsFeatureVector) 46 | var2 = np.sqrt(np.dot(np.dot(neighborsFeatureVector, self.A2Invs[arm_index]), neighborsFeatureVector)) 47 | 48 | # pta = (mean1 + ALPHA * var1) * self.RHO + (ALPHA * var2 + mean2) * (1 - self.RHO) # + anomalyNeighborCount 49 | pta = (mean1 + self.ALPHA * var1) + (self.BETA * var2 + mean2) * self.RHO 50 | return pta 51 | 52 | def updateParameters(self, picked_node, reward): 53 | 54 | # indexList = self.W.T[picked_node.id] 55 | picked_arm_index = picked_node.cluster 56 | neighborsFeatureVectorList = self.getNeighborsFeatureVectorList(picked_node) 57 | neighborsFeatureVector = self.average(neighborsFeatureVectorList) 58 | # neighborsFeatureVector = self.subtract(picked_node, neighborsFeatureVectorList) 59 | 60 | self.A1s[picked_arm_index] = np.add(self.A1s[picked_arm_index], np.outer(picked_node.contextFeatureVector, picked_node.contextFeatureVector), casting="unsafe") 61 | tmp = reward - np.dot(self.thetas2[picked_arm_index], neighborsFeatureVector) * self.RHO 62 | # tmp = (reward - np.dot(self.thetas2[picked_arm_index], neighborsFeatureVector) * (1 - self.RHO)) / self.RHO 63 | 64 | self.b1s[picked_arm_index] = np.add(self.b1s[picked_arm_index], tmp * picked_node.contextFeatureVector, casting="unsafe") 65 | self.A1Invs[picked_arm_index] = np.linalg.inv(self.A1s[picked_arm_index]) 66 | self.thetas1[picked_arm_index] = np.dot(self.A1Invs[picked_arm_index], self.b1s[picked_arm_index]) 67 | 68 | self.A2s[picked_arm_index] = np.add(self.A2s[picked_arm_index], np.outer(neighborsFeatureVector, neighborsFeatureVector), casting="unsafe") 69 | tmp = (reward - np.dot(self.thetas1[picked_arm_index], picked_node.contextFeatureVector)) / self.RHO 70 | # tmp = (reward - self.RHO * np.dot(self.thetas1[picked_arm_index], picked_node.contextFeatureVector)) / (1 - self.RHO) 71 | 72 | self.b2s[picked_arm_index] = np.add(self.b2s[picked_arm_index], tmp * neighborsFeatureVector, casting="unsafe") 73 | self.A2Invs[picked_arm_index] = np.linalg.inv(self.A2s[picked_arm_index]) 74 | self.thetas2[picked_arm_index] = np.dot(self.A2Invs[picked_arm_index], self.b2s[picked_arm_index]) 75 | 76 | def decide(self, nodes): 77 | 78 | maxPTA = float("-inf") 79 | picked_node = None 80 | # picked_arm_index = None 81 | # self.getCothetas(nodes) 82 | 83 | for id, node in enumerate(nodes): 84 | if id in self.selected_nodes: 85 | continue 86 | arm_pta = self.getProb(node) 87 | if maxPTA < arm_pta: 88 | picked_node = node 89 | maxPTA = arm_pta 90 | # # return a node 91 | self.selected_nodes.append(picked_node.id) 92 | 93 | #get dependent arm 94 | return picked_node 95 | 96 | def getNeighborsFeatureVectorList(self, node): 97 | neighborsFeatureVectorList = [] 98 | indexList = np.nonzero(self.W[node.id])[0] 99 | if len(indexList) == 1: 100 | neighborsFeatureVectorList.append(node.contextFeatureVector) 101 | return neighborsFeatureVectorList 102 | 103 | for i in indexList: 104 | if self.all_nodes[i].id == node.id: 105 | continue 106 | neighborsFeatureVectorList.append(self.all_nodes[i].contextFeatureVector) 107 | 108 | # neighborsFeatureVector = np.mean(neighborsFeatureVectorList, axis=0) 109 | return neighborsFeatureVectorList 110 | 111 | @staticmethod 112 | def average(neighborsFeatureVectorList): 113 | neighborsFeatureVector = np.mean(neighborsFeatureVectorList, axis=0) 114 | return neighborsFeatureVector 115 | 116 | @staticmethod 117 | def max(neighborsFeatureVectorList): 118 | neighborsFeatureVector = np.amax(neighborsFeatureVectorList, axis=0) 119 | return neighborsFeatureVector 120 | 121 | @staticmethod 122 | def sum(neighborsFeatureVectorList): 123 | neighborsFeatureVector = np.sum(neighborsFeatureVectorList, axis=0) 124 | return neighborsFeatureVector 125 | 126 | @staticmethod 127 | def subtract(node, neighborsFeatureVectorList): 128 | neighborsFeatureVector = np.mean(neighborsFeatureVectorList, axis=0) 129 | neighborsFeatureVector = np.abs(node.contextFeatureVector - neighborsFeatureVector) 130 | return neighborsFeatureVector 131 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Implementation of WSDM2019 -- Interactive Anomaly Detection on Attributed Networks 2 | -------------------------------------------------------------------------------- /Simulation.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | import random 4 | import scipy.io 5 | from GraphUCB import GraphUCB 6 | 7 | 8 | class Node: 9 | def __init__(self, id, label, cluster, contextFeatureVector=None): 10 | self.id = id 11 | self.contextFeatureVector = contextFeatureVector 12 | self.label = label 13 | self.cluster = cluster 14 | 15 | 16 | class Simulation(object): 17 | def __init__(self, iterations, algorithms, training_iters, nodes, arm_num, graph): 18 | """ 19 | 20 | :param iterations: 21 | :param algorithms: 22 | """ 23 | self.iterations = iterations 24 | self.algorithms = algorithms 25 | self.start_time = datetime.datetime.now() 26 | self.training_iters = training_iters 27 | self.arm_num = arm_num 28 | self.graph = graph 29 | self.nodes = nodes 30 | self.selected_nodes = [] 31 | # self.all_nodes = all_nodes 32 | # self.two_hop = np.dot(self.graph, self.graph) 33 | 34 | def run(self): 35 | """ 36 | :return: 37 | """ 38 | timeRun = self.start_time.strftime('_%m_%d_%H_%M') 39 | regretFilePath = os.path.join("./Out", "AccRegret" + timeRun + ".csv") 40 | with open(regretFilePath, 'w') as f: 41 | f.write('Time(Iteration)') 42 | f.write(',' + ','.join([str(alg_name) for alg_name in algorithms.keys()])) 43 | f.write('\n') 44 | f.write('0,' + ','.join(['0' for alg_name in algorithms.keys()])) 45 | f.write('\n') 46 | f.close() 47 | 48 | optimalReward = 1 49 | algRegret = {} 50 | batchRegret = {} 51 | 52 | 53 | #training 54 | counter = {} 55 | for iter in range(1, self.training_iters + 1): 56 | print (iter) 57 | for alg_name, alg in self.algorithms.items(): 58 | if alg_name not in counter: 59 | counter[alg_name] = 0 60 | 61 | pickedNode = alg.decide(self.nodes) 62 | # self.selected_nodes.append(pickedNode) 63 | reward = self.getReward(pickedNode) # + noise 64 | alg.updateParameters(pickedNode, reward) 65 | 66 | if reward == 1: 67 | counter[alg_name] += 1 68 | 69 | for alg, cnt in counter.items(): 70 | print (alg + ": " + str(cnt)) 71 | 72 | for alg_name, alg in self.algorithms.items(): 73 | alg.selected_nodes = [] 74 | algRegret[alg_name] = [] 75 | batchRegret[alg_name] = [] 76 | 77 | # testing iteration 78 | for iter in range(1, self.iterations + 1): 79 | print (iter) 80 | for alg_name, alg in self.algorithms.items(): 81 | 82 | pickedNode = alg.decide(self.nodes) 83 | reward = self.getReward(pickedNode) # + noise 84 | alg.updateParameters(pickedNode, reward) 85 | 86 | regret = optimalReward - reward 87 | algRegret[alg_name].append(regret) 88 | if iter % 5 == 0: 89 | for alg_name in self.algorithms.keys(): 90 | batchRegret[alg_name].append(sum(algRegret[alg_name])) 91 | with open(regretFilePath, 'a+') as f: 92 | f.write(str(iter)) 93 | f.write(',' + ','.join([str(batchRegret[alg_name][-1]) for alg_name in algorithms.keys()])) 94 | f.write('\n') 95 | f.close() 96 | 97 | @staticmethod 98 | def getReward(arm): 99 | return 1 if arm.label is True else 0 100 | 101 | 102 | def getTrainAndAll(labels, attributes, training_true_num): 103 | 104 | nodes = [] 105 | index_list1 = [] 106 | index_list2 = [] 107 | for id, label in enumerate(labels): 108 | attribute = attributes[id].T 109 | if label[0] == 1: 110 | node = Node(id, True, attribute.reshape(len(attribute))) 111 | index_list1.append(id) 112 | else: 113 | node = Node(id, False, attribute.reshape(len(attribute))) 114 | index_list2.append(id) 115 | nodes.append(node) 116 | 117 | training_index_list = random.sample(index_list1, training_true_num) 118 | training_index_list.extend(random.sample(index_list2, training_true_num * 10)) 119 | training_nodes = [nodes[i] for i in training_index_list] 120 | return training_nodes, nodes 121 | 122 | 123 | def getClasses(labels, attributes, classes): 124 | class_list = [] 125 | for cla in classes: 126 | if cla[0] not in class_list: 127 | class_list.append(cla[0]) 128 | 129 | class_num = len(class_list) 130 | 131 | class_node_list = [] 132 | for i in class_list: 133 | index_list = [] 134 | for node_id, class_id in enumerate(classes): 135 | if class_id == i: 136 | index_list.append(node_id) 137 | 138 | node_list = [] 139 | for index in index_list: 140 | if labels[index] == 1: 141 | node = Node(index, True, attributes[index]) 142 | else: 143 | node = Node(index, False, attributes[index]) 144 | node_list.append(node) 145 | class_node_list.append(node_list) 146 | 147 | return class_node_list, class_num 148 | 149 | 150 | def getNodes(labels, attributes, classes): 151 | node_list = [] 152 | for id, label in enumerate(labels): 153 | if labels[id] == 1: 154 | node = Node(id, True, classes[id][0] - 1, attributes[id]) 155 | else: 156 | node = Node(id, False, classes[id][0] - 1, attributes[id]) 157 | node_list.append(node) 158 | return node_list 159 | 160 | 161 | if __name__ == '__main__': 162 | 163 | iterations = 250 164 | training_iters = 0 165 | algorithms = {} 166 | 167 | # get data 168 | data = scipy.io.loadmat("BlogCatalog.mat") 169 | labels = data["Label"] 170 | attributes = data["Attributes"] 171 | graph = data["Network"].toarray() 172 | classes = data["Class"] 173 | 174 | class_set = set([i[0] for i in classes]) 175 | arm_num = len(class_set) 176 | print("arm_num = " + str(arm_num)) 177 | 178 | context_dimension = len(attributes[0]) 179 | print("context_dimension = " + str(context_dimension)) 180 | 181 | all_nodes = getNodes(labels, attributes, classes) 182 | 183 | algorithms['GraphUCB'] = GraphUCB(context_dimension, arm_num, graph, all_nodes, ALPHA=10, LAMBDA=0.1, BETA=0.1, RHO=10) 184 | 185 | simExperiment = Simulation(iterations, algorithms, training_iters, all_nodes, arm_num, graph) 186 | simExperiment.run() 187 | 188 | --------------------------------------------------------------------------------