├── Node.py └── README.md /Node.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import json 3 | import math 4 | import numpy as np 5 | import copy 6 | import random 7 | 8 | 9 | class Node(object): 10 | # node类初始化 11 | def __init__(self): 12 | self.parents = None 13 | self.children = [] 14 | self.state = [] 15 | 16 | self.Q = 0 17 | self.N = 0 18 | 19 | 20 | def selection(node, choise): 21 | all_selected = False 22 | 23 | # 当前节点不含所有元素 24 | while len(node.state) < len(choise): 25 | # 第一次访问新节点,初始化它的孩子节点 26 | if len(node.children) == 0: 27 | init_children(node, choise) 28 | # 如果当前节点存在没有访问过的孩子节点,则依据概率选择深度优先还是广度优先 29 | Q_max = 0 30 | is_random = False 31 | for i in node.children: 32 | if i.Q > Q_max: 33 | Q_max = i.Q 34 | if i.N == 0: 35 | is_random = True 36 | 37 | if is_random: 38 | if random.random() > Q_max: 39 | return node, all_selected 40 | 41 | # 否则依据UCB公式计算最优的孩子节点,重复这个过程 42 | node = best_child(node) 43 | 44 | # 当前节点包含所有元素,不再搜索并返回 45 | all_selected = True 46 | return node, all_selected 47 | 48 | 49 | def init_children(node, choise): 50 | # 搜集不在当前节点中的元素,放入列表rest_e 51 | rest_e = [] 52 | for i in choise: 53 | if i not in node.state: 54 | rest_e.append(i) 55 | # 取rest_e中的一个元素与当前节点状态组合,生成新的节点 56 | for e in rest_e: 57 | child = Node() 58 | for parent_e in node.state: 59 | child.state.append(parent_e) 60 | child.state.append(e) 61 | child.parents = node 62 | node.children.append(child) 63 | 64 | 65 | def best_child(node): 66 | # 依据UCB公式计算最优孩子节点 67 | best_score = -1 68 | best = None 69 | 70 | for sub_node in node.children: 71 | 72 | # 在可选的节点里面选择最优 73 | if sub_node.Q > 0: 74 | C = math.sqrt(2.0) 75 | left = sub_node.Q 76 | right = math.log(node.N) / sub_node.N 77 | score = left + C * math.sqrt(right) 78 | 79 | if score > best_score: 80 | best = sub_node 81 | best_score = score 82 | 83 | return best 84 | 85 | 86 | def expansion(selection_node, score_single_e): 87 | # 得到所有孩子节点中的新元素 88 | e_field = [] 89 | for i in selection_node.children: 90 | if i.N == 0: 91 | e_field.append(i.state[-1]) 92 | 93 | # 在新元素中选择Q值最大的一个 94 | max_e = get_max_e(e_field, score_single_e) 95 | return max_e 96 | 97 | 98 | def get_max_e(e_field, score_single_e): 99 | max_e = -1 100 | max_score = -1 101 | for index in range(len(e_field)): 102 | # 避免重复计算,score_single_e在主函数中计算 103 | score = score_single_e[index] 104 | if score > max_score: 105 | max_score = score 106 | max_e = e_field[index] 107 | return max_e 108 | 109 | 110 | def evalation(selection_node, max_e, forecast, real, v, f): 111 | new_set = copy.deepcopy(selection_node.state) 112 | new_set.append(max_e) 113 | # 对新状态计算Q值大小 114 | new_q = get_scores(new_set, forecast, real, v, f) 115 | return new_q 116 | 117 | 118 | def get_scores(set, forecast, real, v, f): 119 | # 复制预测值为cp(copy) 120 | cp = copy.deepcopy(forecast[:-1]) 121 | # 在cp的基础上,根据状态中的所有元素,将cp对应位置改变为计算值 122 | # 1维 123 | if len(set[0]) == 1: 124 | for i in set: 125 | for row in range(len(real) - 1): 126 | # 改变为计算值 127 | cp[row][i] = getValueA(forecast[row][i], forecast[-1][i], real[-1][i]) 128 | 129 | # 2维 130 | if len(set[0]) == 2: 131 | for i in set: 132 | # 直接改变为真实值 133 | cp[i[0]][i[1]] = real[i[0]][i[1]] 134 | 135 | # 去掉每行最后的累和,并把cp整理为一维 136 | a = [] 137 | for l in range(len(cp)): 138 | a.extend(cp[l][:-1]) 139 | # 计算Q值的最终公式 140 | result = max(1 - getDistance(v, a) / getDistance(v, f), 0) 141 | return result 142 | 143 | 144 | def getValueA(a, b, c): 145 | # 计算值公式 146 | return a - (b - c) * float(a) / b 147 | 148 | 149 | def getDistance(u, w): 150 | # 计算两向量的距离 151 | sum = 0 152 | for i in range(len(u)): 153 | sum += (u[i] - w[i]) ** 2 154 | return math.sqrt(sum) 155 | 156 | 157 | def backup(selection_node, max_e, new_q): 158 | index = -1 159 | # 获取计算节点在孩子中的序号 160 | for i in range(len(selection_node.children)): 161 | if selection_node.children[i].state[-1] == max_e: 162 | index = i 163 | 164 | # 从最下层节点开始,对整条路径上的节点:N+1,Q赋值为路径中最大Q值 165 | node = selection_node.children[index] 166 | while node is not None: 167 | node.N += 1 168 | if new_q > node.Q: 169 | node.Q = new_q 170 | node = node.parents 171 | 172 | 173 | def get_best_node(node): 174 | # 获得最大Q值的所有节点中的最下层的节点 175 | best_score = node.Q 176 | while len(node.children) is not 0: 177 | for index in range(len(node.children)): 178 | if node.children[index].Q == best_score: 179 | node = node.children[index] 180 | break 181 | return node 182 | 183 | 184 | def MCTS(forecast, real, choise, M, PT): 185 | # 计算Q值公式中需要的真实向量v、预测向量f 186 | v = [] 187 | f = [] 188 | row_num = len(forecast) - 1 189 | 190 | for i in range(row_num): 191 | v.extend(real[i][:-1]) 192 | f.extend(forecast[i][:-1]) 193 | 194 | # 计算单元素Q值 195 | score_single_e = [] 196 | for e in choise: 197 | score_single_e.append(get_scores([e], forecast, real, v, f)) 198 | 199 | # 初始化根节点,Q值记录,最优节点 200 | node = Node() 201 | max_q = 0 202 | best_node = None 203 | 204 | # 开始搜索,最大搜索次数可变 205 | for i in range(M): 206 | 207 | # 1、选择,如果所有节点搜索完毕,则跳出循环 208 | selection_node, all_selected = selection(node, choise) 209 | if all_selected: 210 | break 211 | 212 | # 2、扩展,获得剩余元素中的最大元素值 213 | max_e = expansion(selection_node, score_single_e) 214 | 215 | # 3、评价,原状态与最大元素值组合成新状态,获得新状态的Q值 216 | new_q = evalation(selection_node, max_e, forecast, real, v, f) 217 | 218 | # 4、更新,新状态节点至根节点路径中的每个节点:N+1,Q赋值为路径中最大Q值 219 | backup(selection_node, max_e, new_q) 220 | 221 | # 如果根节点Q值变大,则更新最优节点 222 | if node.Q > max_q: 223 | best_node = get_best_node(node) 224 | max_q = node.Q 225 | # 如果新节点的Q值超过预设阀值,则跳出循环 226 | if new_q >= PT: 227 | break 228 | return best_node 229 | 230 | 231 | def get_choise(forecast): 232 | choise = [] 233 | for i in range(len(forecast[0]) - 1): 234 | choise.append([i]) 235 | return choise 236 | 237 | 238 | def get_result(row_name, column_name, forecast, real, M, PT): 239 | forecast = np.array(forecast) 240 | real = np.array(real) 241 | 242 | column_node = MCTS(forecast, real, get_choise(forecast), M, PT) 243 | row_node = MCTS(np.transpose(forecast), np.transpose(real), get_choise(np.transpose(forecast)), M, PT) 244 | 245 | mix_choise = [] 246 | for row in row_node.state: 247 | for column in column_node.state: 248 | mix_choise.append([row[0], column[0]]) 249 | mix_node = MCTS(forecast, real, mix_choise, M, PT) 250 | 251 | result_name = [] 252 | result_Q = 0 253 | 254 | # # 返回综合结果 255 | # if row_node.Q >= column_node.Q and row_node.Q >= mix_node.Q: 256 | # for i in row_node.state: 257 | # result_name.append([row_name[i[0]]]) 258 | # result_Q = row_node.Q 259 | # elif column_node.Q >= row_node.Q and column_node.Q >= mix_node.Q: 260 | # for i in column_node.state: 261 | # result_name.append([column_name[i[0]]]) 262 | # result_Q = column_node.Q 263 | # elif mix_node.Q > row_node.Q and mix_node.Q > column_node.Q: 264 | # for i in mix_node.state: 265 | # result_name.append([row_name[i[0]], column_name[i[1]]]) 266 | # result_Q = mix_node.Q 267 | 268 | # 返回二维结果 269 | # print 270 | for i in mix_node.state: 271 | result_name.append([row_name[i[0]], column_name[i[1]]]) 272 | result_Q = mix_node.Q 273 | 274 | return result_name, result_Q 275 | 276 | 277 | if __name__ == '__main__': 278 | # M 是最大搜索次数 279 | M = 1000 280 | # PT 是Q值的阀值 281 | PT = 0.75 282 | 283 | # # 测试数据1 284 | row_name = ['Mobile', 'Unicom'] 285 | column_name = ['Beijing', 'Shanghai', 'Guangzhou'] 286 | forecast = [[20, 15, 10, 45], 287 | [10, 25, 20, 55], 288 | [30, 40, 30, 100]] 289 | real = [[14, 9, 10, 33], 290 | [7, 15, 20, 42], 291 | [21, 24, 30, 75]] 292 | 293 | # # 测试数据2 294 | # row_name = ['Mobile', 'Unicom'] 295 | # column_name = ['Fujian', 'Jiangsu', 'Zhejiang'] 296 | # forecast = [[20, 15, 10, 45], 297 | # [10, 25, 20, 55], 298 | # [30, 40, 30, 100]] 299 | # real = [[5, 15, 10, 30], 300 | # [10, 13, 20, 43], 301 | # [15, 28, 30, 73]] 302 | 303 | # 测试数据3 304 | # row_name = ['联通', '电信', '移动', '长宽'] 305 | # column_name = ['内蒙古', '山东省', '广东省', '新疆', '江西省', '河北省', 306 | # '浙江省', '海南省', '湖北省', '湖南省', '辽宁省', '黑龙江省'] 307 | # forecast = [[53, 0, 111, 0, 0, 203, 0, 0, 0, 0, 141, 87, 595], 308 | # [0, 113, 0, 34, 0, 173, 0, 41, 0, 0, 0, 0, 361], 309 | # [0, 236, 213, 0, 74, 94, 221, 0, 55, 49, 51, 0, 993], 310 | # [0, 0, 73, 0, 0, 0, 0, 0, 0, 0, 0, 0, 73], 311 | # [53, 349, 397, 34, 74, 470, 221, 41, 55, 49, 192, 87, 2022]] 312 | # real = [[32, 0, 70, 0, 0, 124, 0, 0, 0, 0, 75, 63, 364], 313 | # [0, 61, 0, 9, 0, 78, 0, 15, 0, 0, 0, 0, 163], 314 | # [0, 141, 112, 0, 44, 56, 127, 0, 29, 39, 15, 0, 563], 315 | # [0, 0, 41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41], 316 | # [32, 202, 223, 9, 44, 258, 127, 15, 29, 39, 90, 63, 1131]] 317 | 318 | name, Q = get_result(row_name, column_name, forecast, real, M, PT) 319 | 320 | print ("根因组合: ") 321 | print (json.dumps(name, ensure_ascii=False)) 322 | print ("组合得分: ") 323 | print (Q) 324 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Multidimensional_root_cause_analysis 2 | - 基于蒙特卡洛树(MCTS)的多维监控异常根因分析 3 | - 本项目基于[HotSpot: Anomaly Localization for Additive KPIs With Multi-Dimensional Attributes](https://www.researchgate.net/publication/323087892_HotSpot_Anomaly_Localization_for_Additive_KPIs_with_Multi-Dimensional_Attributes) 4 | - 原作者之一是我的本科同学,在他的帮助指导下,我实现了二维的根因分析,我的具体蒙特卡洛树搜索的实现细节与论文中有少许出入。 5 | - 例如当我们监测播放质量这个指标,它会和多个维度关联,包括app版本、运营商、省份、cdn等等,当播放质量发生突降时,我们需要快速定位到生成这个问题的维度组合。 6 | --------------------------------------------------------------------------------