├── .DS_Store ├── README.md ├── code ├── config.py ├── detect.py ├── domi.py ├── evaluate.py ├── model.py ├── pot.py └── util.py └── requirements.txt /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NetManAIOps/DOMI_code/94b5a415c5b6c38dfd7169a07a40b08df3821cad/.DS_Store -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DOMI 2 | 3 | 4 | ### Detecting Outlier Machine Instances through One Dimensional CNN Gaussian Mixture Variational AutoEncoder 5 | 6 | DOMI is a VAE-based model which glues one Dimensional Convolution Neural Network and Gaussian Mixture Variational auto-encoder. 7 | It aims at detecting outlier machine instances and its core idea is to learn the normal patterns of multivariate time series 8 | and use the reconstruction probability to do outlier judgment. 9 | Moreover, for a detected outlier machine instance, DOMI provides interpretation based on reconstruction probability changes of univaraite time series. 10 | 11 | 12 | 13 | ## Getting Started 14 | 15 | #### Clone the repo 16 | 17 | ``` 18 | git clone https://github.com/Tsinghuasuya/DOMI_code 19 | ``` 20 | 21 | #### Get data from github and unzip 22 | 23 | ``` 24 | git lfs clone https://github.com/Tsinghuasuya/DOMI_dataset && cd DOMI_dataset && unzip publicDataset.zip && cd ../DOMI_code 25 | ``` 26 | 27 | 28 | #### Install dependencies (with python 3.6) 29 | 30 | (virtualenv is recommended) 31 | 32 | ```shell 33 | pip install -r requirements.txt 34 | ``` 35 | 36 | 37 | #### Run the code 38 | 39 | ``` 40 | cd code && python domi.py 41 | ``` 42 | 43 | If you want to change the default configuration, you can edit `ExpConfig` in `config.py` or 44 | overwrite the config in `domi.py` using command line args. For example: 45 | 46 | ``` 47 | python domi.py --noExp=2 --max_epoch=100 --initial_lr=0.0001 48 | ``` 49 | 50 | 51 | ## Result 52 | 53 | After running the programmings, you can get the output in the file directory that you set in the config. For each instance, you can get the total score and score of each univariate time series. 54 | All the results are in the folder `{config.result_dir}/`, with trained model in `{config.result_dir}/DOMI_{noExp}.model`, the output and config of DOMI in the folder `{config.result_dir}/DOMI_{noExp}/`, 55 | and the detailed detection results are in the folder `DOMI_{noExp}/result_summay/`. It's made up by the following parts: 56 | * `OutlierScores_metric.txt`: score of each univariate time series for instance in the testing dataset. 57 | * `OutlierScores.txt`: score for each instance in the testing dataset. 58 | * `MetricResult.txt`: interpretation using the univariate time series of machine instances. 59 | * `PRF.txt`: summary of the overall statistics, including expected score of each metric and F1-score, recall, precision. 60 | -------------------------------------------------------------------------------- /code/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import tfsnippet as spt 3 | 4 | 5 | class ExpConfig(spt.Config): 6 | # Data options 7 | noExp = 1 8 | GPU_number = '0' 9 | channels_last = True 10 | datapathForTrain = "../../DOMI_dataset/publicDataset/train_data" 11 | datapathForTest = "../../DOMI_dataset/publicDataset/test_data" 12 | dataReadformat = "each" # or all 13 | labelpath = "../../DOMI_dataset/publicDataset/test_label/" 14 | interpret_filepath = "../../DOMI_dataset/publicDataset/interpretation_label.txt" 15 | result_dir = "results" 16 | 17 | # model parameters 18 | n_c = 4 19 | strides1 = 4 20 | strides2 = 3 21 | kernel_size1 = 12 22 | kernel_size2 = 6 23 | timeLength = 288 24 | metricNumber = 19 25 | x_dim = timeLength*metricNumber 26 | z_dim = 10 27 | norm = False 28 | VALID_PORTION = 0.1 29 | act_norm = True 30 | l2_reg = 0.0001 31 | shortcut_kernel_size= 1 32 | 33 | # Training parameters 34 | batch_size = 32 # 32 35 | initial_lr = 0.001 # 0.0005, 0.001 36 | lr_anneal_factor = 0.5 # 0.5, 0.75 37 | lr_anneal_epoch_freq= 5 # 20 38 | max_epoch = 10 # 50, 100, 200 39 | lr_anneal_step_freq = None 40 | max_step = None 41 | write_summary = False 42 | grad_clip_norm = 1.0 43 | check_numerics = True 44 | std_epsilon = 1e-10 45 | 46 | # Evaluation parameters 47 | test_batch_size = 32 # 64, 128, 256 48 | batchTest = True 49 | test_n_z = 500 # 5000, 1000 50 | train_n_samples = None 51 | savetrainDS = True 52 | savetestDS = True 53 | savetestTS = False 54 | evaluation = True 55 | saveMetricInfo = True 56 | 57 | # Test 58 | q = 1e-4 59 | level = 0.2 60 | 61 | @property 62 | def x_shape(self): 63 | return (self.timeLength, self.metricNumber, 1) if self.channels_last else (1, self.timeLength, self.metricNumber) 64 | -------------------------------------------------------------------------------- /code/detect.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import numpy as np 3 | from pot import POT 4 | 5 | 6 | def pot_eval(init_score, q, level): 7 | """ 8 | Run POT method on given score. 9 | init_score : The data to get init threshold. the outlier score of train set. 10 | q (float): Detection level (risk) 11 | level (float): Probability associated with the initial threshold t 12 | return the threshold under POT estimation algorithm. 13 | """ 14 | s = POT(q) # SPOT object 15 | pot_th = s.initialize(init_score, level=level) # initialization step 16 | return pot_th 17 | 18 | 19 | def cal_scoreChanges(outlierScore_list, ave_twoMetricScore = None, twoMetricScore = None): 20 | """ 21 | get the change score of each metric 22 | return the list of outlier score change. 23 | """ 24 | TwoMetricScoreList = [] 25 | for i in range(0, len(outlierScore_list)): 26 | TwoMetricScoreList.append(-1*(np.array(twoMetricScore[i]) - np.array(ave_twoMetricScore))) 27 | return TwoMetricScoreList 28 | 29 | 30 | def cal_binaryResult(outlierScore_list, threshold, timeIndex, saveMetricInfo = False, 31 | labelFileNameLineCntList = None): 32 | """ 33 | output result according the threshold 34 | return the binary result whether it's an outlier. 35 | """ 36 | result_dict = dict() 37 | fileNameLineCntList = [] 38 | for i in range(0, len(outlierScore_list)): 39 | if outlierScore_list[i] < threshold: 40 | result_dict[i] = outlierScore_list[i] 41 | if saveMetricInfo: 42 | fileNameLineCntList.append(labelFileNameLineCntList[i]) 43 | resultArray = [timeIndex[index] for index, value in result_dict.items()] 44 | if saveMetricInfo: 45 | return resultArray, fileNameLineCntList 46 | else: 47 | return resultArray 48 | 49 | -------------------------------------------------------------------------------- /code/domi.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import functools 3 | import sys 4 | import os 5 | import time 6 | import numpy as np 7 | np.set_printoptions(precision=2) 8 | from argparse import ArgumentParser 9 | import tensorflow as tf 10 | from pprint import pformat 11 | from tensorflow.contrib.framework import arg_scope 12 | 13 | import tfsnippet as spt 14 | from tfsnippet.dataflows import DataFlow 15 | from tfsnippet.scaffold import CheckpointSaver 16 | from tfsnippet.utils import split_numpy_array, get_batch_size 17 | from tfsnippet.examples.utils import MLResults, print_with_title, MultiGPU 18 | 19 | from util import save_file, read_file, load_matrix_allData, get_machineID, cat_List 20 | from evaluate import evaluate, interpretation_hit_ratio 21 | from detect import pot_eval, cal_binaryResult, cal_scoreChanges 22 | from model import q_net, p_net 23 | 24 | from config import ExpConfig 25 | config = ExpConfig() 26 | 27 | 28 | def main(): 29 | # parse the arguments 30 | arg_parser = ArgumentParser() 31 | spt.register_config_arguments(config, arg_parser, title='Model options') 32 | spt.register_config_arguments(spt.settings, arg_parser, prefix='tfsnippet', title='TFSnippet options') 33 | arg_parser.parse_args(sys.argv[1:]) 34 | 35 | # print the config 36 | print_with_title('Configurations', pformat(config.to_dict()), after='\n') 37 | 38 | # open the result object and prepare for result directories 39 | model_file = config.result_dir + "/" + os.path.basename(__file__).split(".py")[0] + "_" + \ 40 | str(config.noExp) + ".model" 41 | dirName = os.path.basename(__file__).split(".py")[0] + "_" + str(config.noExp) 42 | results = MLResults(os.path.join(config.result_dir, dirName)) 43 | results.save_config(config) # save experiment settings 44 | results.make_dirs('train_summary', exist_ok=True) 45 | results.make_dirs('result_summary', exist_ok=True) 46 | results.make_dirs('mid_summary', exist_ok=True) 47 | 48 | # os.environ["CUDA_VISIBLE_DEVICES"] = config.GPU_number 49 | 50 | # input placeholders 51 | input_x = tf.placeholder(dtype=tf.float32, shape=(None,) + config.x_shape, name='input_x') 52 | learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr, config.lr_anneal_factor, min_value=1e-6) 53 | multi_gpu = MultiGPU(disable_prebuild=True) 54 | # multi_gpu = MultiGPU() 55 | 56 | # derive the training operation 57 | gradses = [] 58 | grad_vars = [] 59 | train_losses = [] 60 | BATCH_SIZE = get_batch_size(input_x) 61 | 62 | for dev, pre_build, [dev_input_x] in multi_gpu.data_parallel(BATCH_SIZE, [input_x]): 63 | with tf.device(dev), multi_gpu.maybe_name_scope(dev): 64 | # derive the loss for initializing 65 | with tf.name_scope('initialization'), \ 66 | arg_scope([p_net, q_net], is_initializing=True), \ 67 | spt.utils.scoped_set_config(spt.settings, auto_histogram=False): 68 | init_q_net = q_net(dev_input_x, n_z=config.train_n_samples) 69 | init_chain = init_q_net.chain(p_net, latent_axis=0, observed={'x': dev_input_x}) 70 | init_loss = tf.reduce_mean(init_chain.vi.training.vimco()) 71 | 72 | # derive the loss and lower-bound for training 73 | with tf.name_scope('training'), \ 74 | arg_scope([p_net, q_net], is_training=True): 75 | train_q_net = q_net(dev_input_x, n_z=config.train_n_samples) 76 | train_chain = train_q_net.chain(p_net, latent_axis=0, observed={'x': dev_input_x}) 77 | train_loss = ( 78 | tf.reduce_mean(train_chain.vi.training.vimco()) + 79 | tf.losses.get_regularization_loss() 80 | ) 81 | train_losses.append(train_loss) 82 | 83 | # derive the logits output for testing 84 | with tf.name_scope('testing'): 85 | test_q_net = q_net(dev_input_x, n_z=config.test_n_z) 86 | test_chain = test_q_net.chain(p_net, latent_axis=0, observed={'x': dev_input_x}) 87 | # log_prob of X and each univariate time series of X 88 | log_prob = tf.reduce_mean(test_chain.model['x'].distribution.log_prob(dev_input_x), 0) 89 | log_prob_per_element = tf.reduce_sum(log_prob) 90 | log_prob_per_element_univariate_TS = tf.reduce_sum(log_prob, [0, 1, 3]) 91 | log_prob_per_element_univariate_TS_All = tf.reduce_sum(log_prob, [1, 3]) 92 | 93 | # derive the optimizer 94 | with tf.name_scope('optimizing'): 95 | params = tf.trainable_variables() 96 | optimizer = tf.train.AdamOptimizer(learning_rate) 97 | grads = optimizer.compute_gradients(train_loss, params) 98 | for grad, var in grads: 99 | if grad is not None and var is not None: 100 | if config.grad_clip_norm: 101 | grad = tf.clip_by_norm(grad, config.grad_clip_norm) 102 | if config.check_numerics: 103 | grad = tf.check_numerics(grad, 'gradient for {} has numeric issue'.format(var.name)) 104 | grad_vars.append((grad, var)) 105 | gradses.append(grad_vars) 106 | 107 | # merge multi-gpu outputs and operations 108 | [train_loss] = multi_gpu.average([train_losses], BATCH_SIZE) 109 | train_op = multi_gpu.apply_grads( 110 | grads=multi_gpu.average_grads(gradses), 111 | optimizer=optimizer, 112 | control_inputs=tf.get_collection(tf.GraphKeys.UPDATE_OPS) 113 | ) 114 | 115 | # sort the contribution of each univariate_TS of input 116 | SORT_UNIVARIATE_TS_INPUT = tf.placeholder(dtype=tf.float32, shape=(None, None), name='SORT_UNIVARIATE_TS_INPUT') 117 | SORT_UNIVARIATE_TS = tf.nn.top_k(SORT_UNIVARIATE_TS_INPUT, k=config.metricNumber).indices + 1 118 | 119 | # load the training and testing data 120 | print("="*10+"Shape of Input data"+"="*10) 121 | x, time_indexs, x_test, time_indexs2 = load_matrix_allData( 122 | config.dataReadformat, config.datapathForTrain, config.datapathForTest, config.timeLength, config.metricNumber, 123 | "TrainFileNameList.txt", "TestFileNameList.txt", results, config.norm 124 | ) 125 | 126 | x_test = x_test.reshape([-1, config.timeLength, config.metricNumber, 1]) 127 | print("Test:", x_test.shape) 128 | if config.batchTest: 129 | test_flow = DataFlow.arrays([x_test], config.test_batch_size) # DataFlow is iterator 130 | del x_test 131 | x_train, x_val = split_numpy_array(x, portion=config.VALID_PORTION) 132 | x_train = x_train.reshape([-1, config.timeLength, config.metricNumber, 1]) 133 | x_val = x_val.reshape([-1, config.timeLength, config.metricNumber, 1]) 134 | train_flow = DataFlow.arrays([x_train], config.batch_size, shuffle=False, skip_incomplete=True) 135 | val_flow = DataFlow.arrays([x_val], config.test_batch_size) 136 | print("Note:", config.x_dim, ", x_dim = size of datapoint = timeLength * metricNumber") 137 | print("Input data shape:", x.shape, "Train data shape:", x_train.shape, "Validation data shape:", x_val.shape) 138 | del x_train, x_val, x 139 | 140 | # training part 141 | with spt.utils.create_session().as_default() as session: 142 | spt.utils.ensure_variables_initialized() 143 | saver = CheckpointSaver(tf.trainable_variables(), model_file) 144 | if os.path.exists(model_file): 145 | # load the parameters of trained model 146 | saver.restore_latest() 147 | else: 148 | # initialize the network 149 | while True: 150 | breakFlag = 0 151 | for [x] in train_flow: 152 | INITLOSS = session.run(init_loss, feed_dict={input_x: x}) 153 | print('Network initialized, first-batch loss is {:.6g}.'.format(INITLOSS)) 154 | if np.isnan(INITLOSS) or np.isinf(INITLOSS) or INITLOSS > 10 ** 5: 155 | pass 156 | else: 157 | breakFlag = 1 158 | break 159 | if breakFlag: 160 | break 161 | 162 | # train the network 163 | with train_flow.threaded(10) as train_flow: 164 | with spt.TrainLoop(params, 165 | var_groups=['q_net', 'p_net'], 166 | max_epoch=config.max_epoch, 167 | max_step=config.max_step, 168 | summary_dir=(results.system_path('train_summary') if config.write_summary else None), 169 | summary_graph=tf.get_default_graph(), 170 | early_stopping=True) as loop: 171 | trainer = spt.Trainer( 172 | loop, train_op, [input_x], train_flow, 173 | metrics={'loss': train_loss}, 174 | summaries=tf.summary.merge_all(spt.GraphKeys.AUTO_HISTOGRAM) 175 | ) 176 | # anneal the learning rate 177 | trainer.anneal_after( 178 | learning_rate, 179 | epochs=config.lr_anneal_epoch_freq, 180 | steps=config.lr_anneal_step_freq 181 | ) 182 | validator = spt.Validator( 183 | loop, train_loss, [input_x], val_flow, 184 | ) 185 | trainer.evaluate_after_epochs(validator, freq=10) 186 | trainer.log_after_epochs(freq=1) 187 | trainer.run() 188 | saver.save() 189 | 190 | # save the training infomation 191 | firWrite = True 192 | num = 0 193 | time0 = time.time() 194 | for [x_train] in train_flow: 195 | if config.savetrainDS: 196 | # log prob of each metric of each instance 197 | log_prob_per_element_univariate_TS_list_item_Train = (session.run( 198 | log_prob_per_element_univariate_TS_All, 199 | feed_dict={input_x: x_train} 200 | )) 201 | log_prob_per_element_univariate_TS_list_Train = log_prob_per_element_univariate_TS_list_item_Train 202 | log_prob_per_element_list_Train = np.sum( 203 | np.array(log_prob_per_element_univariate_TS_list_item_Train), axis=1 204 | ).tolist() 205 | if firWrite: 206 | save_file( 207 | results.system_path("train_summary"), "OutlierScores_metric.txt", 208 | log_prob_per_element_univariate_TS_list_Train 209 | ) 210 | save_file( 211 | results.system_path("train_summary"), "OutlierScores.txt", log_prob_per_element_list_Train) 212 | else: 213 | save_file( 214 | results.system_path("train_summary"), "OutlierScores_metric.txt", 215 | log_prob_per_element_univariate_TS_list_Train, "\n", "a" 216 | ) 217 | save_file( 218 | results.system_path("train_summary"), "OutlierScores.txt", 219 | log_prob_per_element_list_Train, "\n", "a" 220 | ) 221 | 222 | firWrite = False 223 | num += 1 224 | if num % 1000 == 0: 225 | print( 226 | "-----Train %s >>>>>:Sum time of batch instances:%s" % 227 | (num, float(time.time()-time0)/float(num)) 228 | ) 229 | del train_flow, val_flow 230 | 231 | # online test 232 | time2 = time.time() 233 | log_prob_per_element_list, log_prob_per_element_univariate_TS_list = [], [] 234 | if config.batchTest: 235 | num = 0 236 | for [x_test] in test_flow: 237 | if config.savetestDS: 238 | # log prob of each metric of each instance 239 | log_prob_per_element_univariate_TS_list_item = (session.run( 240 | log_prob_per_element_univariate_TS_All, 241 | feed_dict={input_x: x_test} 242 | )) 243 | log_prob_per_element_univariate_TS_list += log_prob_per_element_univariate_TS_list_item.tolist() 244 | log_prob_per_element_list += np.sum(np.array( 245 | log_prob_per_element_univariate_TS_list_item), axis=1 246 | ).tolist() 247 | 248 | num += 1 249 | if num % 200 == 0: 250 | print( 251 | "-----Test %s >>>>>:Sum time of batch instances:%s" % 252 | (num, float(time.time()-time2)/float(num)) 253 | ) 254 | else: 255 | num = 1 256 | for batch_x in x_test: 257 | if config.savetestTS: 258 | log_prob_per_element_list_item = (session.run(log_prob_per_element, feed_dict={input_x: [batch_x]})) 259 | log_prob_per_element_list.append(log_prob_per_element_list_item) 260 | 261 | if config.savetestDS: 262 | log_prob_per_element_univariate_TS_list_item = (session.run( 263 | log_prob_per_element_univariate_TS, 264 | feed_dict={input_x: [batch_x]} 265 | )) 266 | log_prob_per_element_univariate_TS_list.append(log_prob_per_element_univariate_TS_list_item) 267 | log_prob_per_element_list.append(sum(log_prob_per_element_univariate_TS_list_item)) 268 | 269 | if num % 200 == 0: 270 | print( 271 | "-----Test>>>>>:%d, average time of each instance:%s" % 272 | (num, float(time.time()-time2)/float(num)) 273 | ) 274 | num += 1 275 | 276 | # get the lable file name and its line cnt number 277 | allLabelFileNameLineCntList = get_machineID(results, config.labelpath) 278 | 279 | print("No of OutlierScores for all dataPoint:(%s):" % len(log_prob_per_element_list)) 280 | if config.savetestDS: 281 | save_file( 282 | results.system_path("result_summary"), "OutlierScores_metric.txt", 283 | cat_List(allLabelFileNameLineCntList, log_prob_per_element_univariate_TS_list) 284 | ) 285 | save_file( 286 | results.system_path("result_summary"), "OutlierScores.txt", 287 | cat_List(allLabelFileNameLineCntList, log_prob_per_element_list) 288 | ) 289 | 290 | if config.evaluation: 291 | # Prepraration for the hitory two-metric results 292 | twoMetricScore = read_file(results.system_path("train_summary"), "OutlierScores_metric.txt") 293 | ave_twoMetricScore = np.mean(np.array(twoMetricScore), axis=0).tolist() 294 | save_file(results.system_path("result_summary"), "PRF.txt", 295 | ["Average score of each univariate time series", "\n"], ",") 296 | save_file(results.system_path("result_summary"), "PRF.txt", 297 | ave_twoMetricScore+["\n"], ",", "a") 298 | save_file(results.system_path("result_summary"), "PRF.txt", 299 | ["Threshold", "F", "Precision", "Recall", "TP", "FP", "FN", "\n"], ",", "a") 300 | 301 | # get the sorted item each metric by change score 302 | twoMetricScoreList = cal_scoreChanges(log_prob_per_element_list, 303 | ave_twoMetricScore, log_prob_per_element_univariate_TS_list) 304 | MetricResult = session.run(SORT_UNIVARIATE_TS, 305 | feed_dict={SORT_UNIVARIATE_TS_INPUT: twoMetricScoreList}) 306 | save_file(results.system_path("result_summary"), "MetricResult.txt", 307 | cat_List(allLabelFileNameLineCntList, MetricResult)) 308 | 309 | # POT evalution 310 | POT_TH = pot_eval( 311 | read_file(results.system_path("train_summary"), "OutlierScores.txt", "float"), config.q, config.level 312 | ) 313 | resultArray, outlierLabelfileNameLineCntList = cal_binaryResult( 314 | log_prob_per_element_list, POT_TH, time_indexs2, config.saveMetricInfo, allLabelFileNameLineCntList 315 | ) 316 | evaluate(results, config.labelpath, resultArray, time_indexs2, POT_TH) 317 | 318 | # print the final metrics and close the results object 319 | print_with_title('Results', results.format_metrics(), before='\n') 320 | results.close() 321 | 322 | interpretation_hit_ratio( 323 | truth_filepath=config.interpret_filepath, 324 | prediction_filepath=os.path.join(config.result_dir, dirName, "result_summary", "MetricResult.txt") 325 | ) 326 | 327 | 328 | if __name__ == '__main__': 329 | main() 330 | -------------------------------------------------------------------------------- /code/evaluate.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from util import read_file, save_file 3 | import numpy as np 4 | import os 5 | 6 | 7 | def evaluate(MLResult, labelpath, resultArray, timeIndex, threshold): 8 | """ 9 | evalute the results 10 | return F score of prediction and truth. 11 | """ 12 | groundTruthArray = [] 13 | TPArray = [] 14 | num = 0 15 | for fileName in read_file(MLResult.system_path("mid_summary"), "TestFileNameList.txt"): 16 | with open(labelpath + fileName,"r") as f: 17 | fline = f.readlines() 18 | for line in fline: 19 | count = line.strip("\n") 20 | if int(count) == 1 and num in timeIndex: 21 | groundTruthArray.append(num) 22 | num += 1 23 | 24 | TP = 0 25 | for i in resultArray: 26 | if i in groundTruthArray: 27 | TP += 1 28 | TPArray.append(i) 29 | 30 | FP = len(resultArray) - TP 31 | FN = len(groundTruthArray) - TP 32 | Precision = TP / (float(TP + FP)) if TP + FP != 0 else 1 33 | Recall = TP/(float(TP + FN)) if TP+FN != 0 else 1 34 | F = 0 if Recall + Precision == 0 else (2 * Recall * Precision)/(Recall + Precision) 35 | save_file( 36 | MLResult.system_path("result_summary"), "PRF.txt", 37 | [threshold, F, Precision, Recall, TP, FP, FN, "\n"], ",", "a" 38 | ) 39 | return F 40 | 41 | 42 | def interpretation_hit_ratio(truth_filepath, prediction_filepath): 43 | """ 44 | compute top 100%/120% interpretation hit ratio given truth lists of univariate time series 45 | that contribute to outlier judgement and predicted lists of univariate time series. 46 | return top 100%/120% interpretation hit ratio 47 | """ 48 | with open(truth_filepath, 'r') as f: 49 | gt = f.readlines() 50 | 51 | with open(prediction_filepath, 'r') as f: 52 | result = f.readlines() 53 | 54 | gtDict = {} 55 | for i in gt: 56 | iList = i.strip("\n").strip("\r").split(",") 57 | gtDict[iList[0]] = iList[1:] 58 | 59 | resultDict = {} 60 | for i in result: 61 | iList = i.strip("\n").strip("\r").replace(".txt", "").split(",") 62 | resultDict[iList[0]] = iList[1:] 63 | 64 | for rate in [1.0, 1.2]: 65 | accurate_list = [] 66 | for k in gtDict.keys(): 67 | t1 = resultDict[k] 68 | t2 = gtDict[k] 69 | t3 = list(set(t2).intersection(t1[0:int(len(t2) * rate)])) 70 | accurate_list.append(float(len(t3)) / float(len(t2))) 71 | print("top {}% interpretation hit ratio: ".format(rate * 100), sum(accurate_list) / len(accurate_list)) 72 | 73 | -------------------------------------------------------------------------------- /code/model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import tfsnippet as spt 3 | import functools 4 | import tensorflow as tf 5 | from tensorflow.contrib.framework import arg_scope, add_arg_scope 6 | 7 | from config import ExpConfig 8 | config = ExpConfig() 9 | 10 | 11 | @spt.global_reuse 12 | @add_arg_scope 13 | def q_net(x, observed=None, n_z=None, is_training=False, is_initializing=False): 14 | """ 15 | Inference net 16 | param x: input X, multivariate time series data. 17 | return q net structure. 18 | """ 19 | net = spt.BayesianNet(observed=observed) 20 | 21 | normalizer_fn = None if not config.act_norm else functools.partial( 22 | spt.layers.act_norm, 23 | axis=-1 if config.channels_last else -3, 24 | initializing=is_initializing, 25 | value_ndims=3, 26 | ) 27 | print("="*10+"qnet"+"="*10) 28 | 29 | # compute the hidden features 30 | with arg_scope([spt.layers.resnet_conv2d_block], 31 | kernel_size=config.kernel_size2, 32 | shortcut_kernel_size=config.shortcut_kernel_size, 33 | activation_fn=tf.nn.elu, 34 | normalizer_fn=normalizer_fn, 35 | kernel_regularizer=spt.layers.l2_regularizer(config.l2_reg), 36 | channels_last=config.channels_last): 37 | print("qx:%s"%x.get_shape()) 38 | h_x = tf.reshape( 39 | tf.to_float(x), 40 | [-1, config.timeLength, config.metricNumber, 1] 41 | if config.channels_last 42 | else [-1, 1, config.timeLength, config.metricNumber] 43 | ) 44 | print("q1:%s"%h_x.get_shape()) 45 | h_x = spt.layers.resnet_conv2d_block( 46 | h_x, 1, kernel_size=(config.kernel_size1, 1), strides=(config.strides1, 1) 47 | ) 48 | print("q2:%s"%h_x.get_shape()) 49 | h_x = spt.layers.resnet_conv2d_block( 50 | h_x, 1, kernel_size=(config.kernel_size1, 1), strides=(config.strides1, 1) 51 | ) 52 | print("q3:%s"%h_x.get_shape()) 53 | h_x = spt.layers.resnet_conv2d_block( 54 | h_x, 1, kernel_size=(config.kernel_size2, 1), strides=(config.strides2, 1) 55 | ) 56 | print("q4:%s"%h_x.get_shape()) 57 | h_x = spt.layers.resnet_conv2d_block( 58 | h_x, 1, kernel_size=(config.kernel_size2, 1), strides=(config.strides2, 1) 59 | ) 60 | print("q5:%s"%h_x.get_shape()) 61 | 62 | h_x = spt.ops.reshape_tail(h_x, ndims=3, shape=[-1]) 63 | print("q6:%s" % h_x.get_shape()) 64 | 65 | # sample y ~ q(y|x) 66 | c_logits = spt.layers.dense(h_x, config.n_c, name='c_logits') 67 | c = net.add('c', spt.Categorical(c_logits)) 68 | c_one_hot = tf.one_hot(c, config.n_c, dtype=tf.float32) 69 | print("qc:%s, %s, %s" % (c_logits.shape, c.shape, c_one_hot.shape)) 70 | h_z = h_x 71 | 72 | # sample z ~ q(z|x) 73 | z_mean = spt.layers.dense(h_z, config.z_dim, name='z_mean') 74 | z_logstd = spt.layers.dense(h_z, config.z_dim, name='z_logstd', activation_fn=tf.nn.elu) + config.std_epsilon 75 | z = net.add('z', spt.Normal(mean=z_mean, logstd=z_logstd), n_samples=n_z, group_ndims=1) 76 | print("q7:%s, %s, %s" % (z_mean.get_shape(), z_logstd.get_shape(), z.get_shape())) 77 | 78 | return net 79 | 80 | 81 | @spt.global_reuse 82 | @add_arg_scope 83 | def p_net(observed=None, n_z=None, is_training=False, is_initializing=False): 84 | """ 85 | Generative net 86 | return p net structure. 87 | """ 88 | net = spt.BayesianNet(observed=observed) 89 | 90 | normalizer_fn = None if not config.act_norm else functools.partial( 91 | spt.layers.act_norm, 92 | axis=-1 if config.channels_last else -3, 93 | initializing=is_initializing, 94 | value_ndims=3, 95 | ) 96 | 97 | def make_component(i): 98 | normal = spt.Normal( 99 | mean=tf.get_variable('mean_{}'.format(i), shape=[1, config.z_dim], 100 | dtype=tf.float32, trainable=True), 101 | logstd=tf.maximum( 102 | tf.get_variable('logstd_{}'.format(i), shape=[1, config.z_dim], 103 | dtype=tf.float32, trainable=True), 104 | -1. 105 | ) 106 | ) 107 | return normal.expand_value_ndims(1) 108 | 109 | components = [make_component(i) for i in range(config.n_c)] 110 | mixture = spt.Mixture( 111 | categorical=spt.Categorical(logits=tf.zeros([1, config.n_c])), 112 | components=components, 113 | is_reparameterized=True 114 | ) 115 | z = net.add('z', mixture, n_samples=n_z) 116 | 117 | print("="*10+"pnet"+"="*10) 118 | # compute the hidden features 119 | with arg_scope([spt.layers.resnet_deconv2d_block], 120 | kernel_size=config.kernel_size2, 121 | shortcut_kernel_size=config.shortcut_kernel_size, 122 | activation_fn=tf.nn.elu, 123 | normalizer_fn=normalizer_fn, 124 | kernel_regularizer=spt.layers.l2_regularizer(config.l2_reg), 125 | channels_last=config.channels_last): 126 | print("px:%s"%z.get_shape()) 127 | h_z = spt.layers.dense( 128 | z, int(config.timeLength / (config.strides1 ** 2) / (config.strides2 ** 2) * int(config.metricNumber)) 129 | ) 130 | h_z = spt.ops.reshape_tail( 131 | h_z, 132 | ndims=1, 133 | shape=( 134 | int(config.timeLength / (config.strides1 ** 2) / (config.strides2 ** 2)), 135 | int(config.metricNumber), 1 136 | ) 137 | if config.channels_last else ( 138 | 1, int(config.timeLength / (config.strides1 ** 2) / (config.strides2 ** 2)), 139 | int(config.metricNumber) 140 | ) 141 | ) 142 | print("p1:%s"%h_z.get_shape()) 143 | h_z = spt.layers.resnet_deconv2d_block( 144 | h_z, 1, kernel_size=(config.kernel_size2, 1), strides=(config.strides2, 1) 145 | ) 146 | print("p2:%s"%h_z.get_shape()) 147 | h_z = spt.layers.resnet_deconv2d_block( 148 | h_z, 1, kernel_size=(config.kernel_size2, 1), strides=(config.strides2, 1) 149 | ) 150 | print("p3:%s"%h_z.get_shape()) 151 | h_z = spt.layers.resnet_deconv2d_block( 152 | h_z, 1, kernel_size=(config.kernel_size1, 1), strides=(config.strides1, 1) 153 | ) 154 | print("p4:%s"%h_z.get_shape()) 155 | h_z = spt.layers.resnet_deconv2d_block( 156 | h_z, 1, kernel_size=(config.kernel_size1, 1), strides=(config.strides1, 1) 157 | ) 158 | print("p5:%s"%h_z.get_shape()) 159 | 160 | # sample x ~ p(x|z) 161 | x_mean = spt.layers.conv2d( 162 | h_z, 1, (1, 1), padding='same', name='x_mean', 163 | channels_last=config.channels_last 164 | ) 165 | x_logstd = spt.layers.conv2d( 166 | h_z, 1, (1, 1), padding='same', name='x_logstd', 167 | channels_last=config.channels_last, activation_fn=tf.nn.elu, 168 | ) + config.std_epsilon 169 | x = net.add('x', spt.Normal(mean=x_mean, logstd=x_logstd), n_samples=n_z, group_ndims=3) 170 | print("p6:%s, %s, %s" % (x_mean.get_shape(), x_logstd.get_shape(), x.get_shape())) 171 | 172 | return net 173 | -------------------------------------------------------------------------------- /code/pot.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from math import log 3 | import numpy as np 4 | from scipy.optimize import minimize 5 | 6 | 7 | def _rootsFinder(fun, jac, bounds, npoints, method): 8 | """ 9 | Find possible roots of a scalar function 10 | method : str 11 | 'regular' : regular sample of the search interval, 12 | 'random' : uniform (distribution) sample of the search interval 13 | 14 | Return possible roots of the function 15 | """ 16 | if method == 'regular': 17 | step = (bounds[1] - bounds[0]) / (npoints + 1) 18 | X0 = np.arange(bounds[0] + step, bounds[1], step) 19 | elif method == 'random': 20 | X0 = np.random.uniform(bounds[0], bounds[1], npoints) 21 | 22 | def objFun(X, f, jac): 23 | g = 0 24 | j = np.zeros(X.shape) 25 | i = 0 26 | for x in X: 27 | fx = f(x) 28 | g = g + fx ** 2 29 | j[i] = 2 * fx * jac(x) 30 | i = i + 1 31 | return g, j 32 | 33 | opt = minimize(lambda X: objFun(X, fun, jac), X0, 34 | method='L-BFGS-B', 35 | jac=True, bounds=[bounds] * len(X0)) 36 | 37 | X = opt.x 38 | np.round(X, decimals=5) 39 | return np.unique(X) 40 | 41 | 42 | def _log_likelihood(Y, gamma, sigma): 43 | """ 44 | Compute the log-likelihood for the Generalized Pareto Distribution (μ=0) 45 | Returns log-likelihood of the sample Y to be drawn from a GPD(γ,σ,μ=0) 46 | """ 47 | n = Y.size 48 | if gamma != 0: 49 | tau = gamma / sigma 50 | L = -n * log(sigma) - (1 + (1 / gamma)) * (np.log(1 + tau * Y)).sum() 51 | else: 52 | L = n * (1 + log(Y.mean())) 53 | return L 54 | 55 | 56 | class POT: 57 | """ 58 | This class allows to run POT algorithm on univariate dataset (upper-bound) 59 | """ 60 | 61 | def __init__(self, q=1e-4): 62 | self.proba = q 63 | self.extreme_quantile = None 64 | self.init_data = None 65 | self.init_threshold = None 66 | self.peaks = None 67 | self.n = 0 68 | self.Nt = 0 69 | 70 | def initialize(self, init_data, level=0.02, min_extrema=False): 71 | self.init_data = np.array(init_data) 72 | n_init = self.init_data.size 73 | 74 | S = np.sort(self.init_data) # we sort X to get the empirical quantile 75 | self.init_threshold = S[int(level * n_init)] # t is fixed for the whole algorithm 76 | 77 | # initial peaks 78 | self.peaks = -1*self.init_data[self.init_data < self.init_threshold] + self.init_threshold 79 | self.Nt = self.peaks.size 80 | self.n = n_init 81 | g, s, l = self._grimshaw() 82 | self.extreme_quantile = self._quantile(g, s) 83 | return self.extreme_quantile 84 | 85 | def _grimshaw(self, epsilon=1e-8, n_points=10): 86 | """ 87 | Compute the GPD parameters estimation with the Grimshaw's trick 88 | """ 89 | def u(s): 90 | return 1 + np.log(s).mean() 91 | 92 | def v(s): 93 | return np.mean(1 / s) 94 | 95 | def w(Y, t): 96 | s = 1 + t * Y 97 | us = u(s) 98 | vs = v(s) 99 | return us * vs - 1 100 | 101 | def jac_w(Y, t): 102 | s = 1 + t * Y 103 | us = u(s) 104 | vs = v(s) 105 | jac_us = (1 / t) * (1 - vs) 106 | jac_vs = (1 / t) * (-vs + np.mean(1 / s ** 2)) 107 | return us * jac_vs + vs * jac_us 108 | 109 | Ym = self.peaks.min() 110 | YM = self.peaks.max() 111 | Ymean = self.peaks.mean() 112 | 113 | a = -1 / YM 114 | if abs(a) < 2 * epsilon: 115 | epsilon = abs(a) / n_points 116 | 117 | a = a + epsilon 118 | b = 2 * (Ymean - Ym) / (Ymean * Ym) 119 | c = 2 * (Ymean - Ym) / (Ym ** 2) 120 | 121 | # We look for possible roots 122 | left_zeros = _rootsFinder(lambda t: w(self.peaks, t), 123 | lambda t: jac_w(self.peaks, t), 124 | (a + epsilon, -epsilon), 125 | n_points, 'regular') 126 | 127 | right_zeros = _rootsFinder(lambda t: w(self.peaks, t), 128 | lambda t: jac_w(self.peaks, t), 129 | (b, c), 130 | n_points, 'regular') 131 | 132 | # all the possible roots 133 | zeros = np.concatenate((left_zeros, right_zeros)) 134 | 135 | # 0 is always a solution so we initialize with it 136 | gamma_best = 0 137 | sigma_best = Ymean 138 | ll_best = _log_likelihood(self.peaks, gamma_best, sigma_best) 139 | 140 | # we look for better candidates 141 | for z in zeros: 142 | gamma = u(1 + z * self.peaks) - 1 143 | sigma = gamma / z 144 | ll = _log_likelihood(self.peaks, gamma, sigma) 145 | if ll > ll_best: 146 | gamma_best = gamma 147 | sigma_best = sigma 148 | ll_best = ll 149 | 150 | return gamma_best, sigma_best, ll_best 151 | 152 | def _quantile(self, gamma, sigma): 153 | """ 154 | Compute the quantile at level 1-q 155 | Returns quantile at level 1-q for the GPD(γ,σ,μ=0) 156 | """ 157 | r = self.n * self.proba / self.Nt 158 | if gamma != 0: 159 | return self.init_threshold - (sigma / gamma) * (pow(r, -gamma) - 1) 160 | else: 161 | return self.init_threshold + sigma * (r) 162 | 163 | -------------------------------------------------------------------------------- /code/util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import time 4 | import pandas as pd 5 | import csv 6 | import numpy as np 7 | from functools import partial 8 | np.seterr(divide='ignore', invalid='ignore') 9 | import multiprocessing as mul 10 | 11 | delEXTREVALUE = True 12 | 13 | 14 | def read_file(pathName, fileName, Type="string", name=False): 15 | """ 16 | read the content from txt file to a matrix 17 | return the matrix of the file 18 | """ 19 | matrix = [] 20 | with open(os.path.join(pathName, fileName), "r") as f: 21 | lines = f.readlines() 22 | for line in lines: 23 | if "," in line: 24 | vector = line.strip("\r").strip("\n").split(',') 25 | matrix.append([float(v) for v in vector[1:]] if name else [float(v) for v in vector]) 26 | else: 27 | if Type == "string": 28 | matrix.append(line.strip("\r").strip("\n")) 29 | if Type == "float": 30 | matrix.append(float(line.strip("\r").strip("\n"))) 31 | return matrix 32 | 33 | 34 | def save_file(pathName, fileName, resultList, cat="\n", writeType = "w"): 35 | """ 36 | save the 'resultList' in a 'fileName' File 37 | """ 38 | with open(os.path.join(pathName, fileName), writeType) as f: 39 | if len(np.array(resultList).shape) == 1: 40 | f.write(cat.join(str(x) for x in resultList) + '\n') 41 | if len(np.array(resultList).shape) == 2: 42 | w = csv.writer(f, delimiter=',') 43 | w.writerows(resultList) 44 | 45 | 46 | def data_norm_all(path, dirPath, timeLength, metricNumber, norm): 47 | """ 48 | use the entire metric for normalize 49 | return all normalized matrix data and filepath. 50 | """ 51 | df = pd.read_csv(os.path.join(dirPath, path)).astype(float) 52 | matrix = np.array(df.values.tolist()) 53 | matrix = np.around(matrix, decimals=2) 54 | if norm: 55 | if delEXTREVALUE: 56 | Y = np.sort(matrix, axis=0) 57 | a, _ = Y.shape 58 | Z = Y[int(0.01*a):int(0.99*a), :] 59 | m_mean = np.mean(Z, axis=0, keepdims=True).astype(float) 60 | m_std = np.std(Z, axis=0, keepdims=True).astype(float) 61 | else: 62 | m_mean = np.mean(matrix, axis=0).astype(float) 63 | m_std = np.std(matrix, axis=0).astype(float) 64 | 65 | norm_matrix = (matrix - m_mean) / m_std 66 | norm_matrix = np.where(np.isnan(norm_matrix), 0, norm_matrix) 67 | norm_matrix = np.around(norm_matrix, decimals=2) 68 | norm_matrix = norm_matrix.reshape(-1, timeLength*metricNumber) 69 | return norm_matrix.tolist(), path, norm_matrix.shape[0] 70 | else: 71 | matrix = np.around(matrix, decimals=2) 72 | matrix = np.array(matrix).reshape(-1, timeLength*metricNumber) 73 | return matrix.tolist(), path, matrix.shape[0] 74 | 75 | 76 | def get_data_eachday(path, dirPath, timeLength, metricNumber, norm): 77 | """ 78 | use the each day metric for normalize 79 | return normalized matrix data for each day and filepath. 80 | """ 81 | df = pd.read_csv(os.path.join(dirPath, path)).astype(float) 82 | matrix = np.array(df.values.tolist()) 83 | matrix = np.around(matrix, decimals=2) 84 | 85 | if norm: 86 | matrix = matrix.reshape(-1, timeLength, metricNumber) 87 | if delEXTREVALUE: 88 | Y = np.sort(matrix, axis=1) 89 | a,b,c = Y.shape 90 | Z = Y[:, int(0.01*b):int(0.99*b), :] 91 | m_mean = np.mean(Z, axis=1, keepdims=True).astype(float) 92 | m_std = np.std(Z, axis=1, keepdims=True).astype(float) 93 | else: 94 | m_mean = np.mean(matrix, axis=1, keepdims=True).astype(float) 95 | m_std = np.std(matrix, axis=1, keepdims=True).astype(float) 96 | 97 | norm_matrix = (matrix - m_mean) / m_std 98 | norm_matrix = np.where(np.isnan(norm_matrix), 0, norm_matrix) 99 | norm_matrix = np.around(norm_matrix, decimals=2) 100 | norm_matrix = norm_matrix.reshape(-1, timeLength*metricNumber) 101 | return norm_matrix.tolist(), path, norm_matrix.shape[0] 102 | else: 103 | matrix = np.around(matrix, decimals=2) 104 | matrix = np.array(matrix).reshape(-1, timeLength*metricNumber) 105 | return matrix.tolist(), path, matrix.shape[0] 106 | 107 | 108 | def load_matrix_allData(dataReadformat, dirPath1, dirPath2, 109 | timeLength, metricNumber, fileInfo1, fileInfo2, MLResult, norm): 110 | """ 111 | read and normalize the data by Parallel using pool 112 | return the two matrix data and corresponding time index. 113 | """ 114 | st = time.time() 115 | 116 | matrix1, matrix2 = [], [] 117 | fileDirList1, fileDirList2 = [], [] 118 | 119 | WORKERS = mul.cpu_count() 120 | pool = mul.Pool(processes=WORKERS, maxtasksperchild=WORKERS) 121 | 122 | paras1 = [path for path in os.listdir(dirPath1) if ".txt" in path] 123 | paras2 = [path for path in os.listdir(dirPath2) if ".txt" in path] 124 | 125 | if dataReadformat == "all": 126 | get_data_partial1 = partial( 127 | data_norm_all, dirPath=dirPath1, timeLength=timeLength, metricNumber=metricNumber, norm=norm 128 | ) 129 | get_data_partial2 = partial( 130 | data_norm_all, dirPath=dirPath2, timeLength=timeLength, metricNumber=metricNumber, norm=norm 131 | ) 132 | result1 = pool.map_async(get_data_partial1, paras1) 133 | result2 = pool.map_async(get_data_partial2, paras2) 134 | else: 135 | get_data_partial1 = partial( 136 | get_data_eachday, dirPath=dirPath1, timeLength=timeLength, metricNumber=metricNumber, norm=norm 137 | ) 138 | get_data_partial2 = partial( 139 | get_data_eachday, dirPath=dirPath2, timeLength=timeLength, metricNumber=metricNumber, norm=norm 140 | ) 141 | result1 = pool.map_async(get_data_partial1, paras1) 142 | result2 = pool.map_async(get_data_partial2, paras2) 143 | 144 | pool.close() 145 | pool.join() 146 | 147 | for i in result1.get(): 148 | matrix1 += i[0] 149 | for j in range(1, i[2]+1): 150 | fileDirList1.append(i[1]+'+'+str(j)) 151 | for i in result2.get(): 152 | matrix2 += i[0] 153 | for j in range(1, i[2] + 1): 154 | fileDirList2.append(i[1]+'+'+str(j)) 155 | time_indexs1 = [i for i in range(0, len(matrix1))] 156 | time_indexs2 = [i for i in range(0, len(matrix2))] 157 | save_file(MLResult.system_path("mid_summary"), fileInfo1, fileDirList1) 158 | save_file(MLResult.system_path("mid_summary"), fileInfo2, fileDirList2) 159 | print("-----Get data>>>>>:Time:%s" % (time.time()-st)) 160 | 161 | return np.array(matrix1), np.array(time_indexs1), np.array(matrix2), np.array(time_indexs2) 162 | 163 | 164 | def cat_List(a, b): 165 | """ 166 | cat the a: fileNameLineCnt list and b: resultList 167 | return the combined list. 168 | """ 169 | c = [] 170 | i = 0 171 | while i < len(a): 172 | if isinstance(b[i], list): 173 | c.append([a[i]] + b[i]) 174 | elif isinstance(b[i], np.ndarray): 175 | c.append([a[i]] + b[i].tolist()) 176 | else: 177 | c.append([a[i]] + [b[i]]) 178 | i += 1 179 | return c 180 | 181 | 182 | def get_machineID(MLResult, labelpath=None): 183 | """ 184 | get the list: fileName + lineNum 185 | return the result list. 186 | """ 187 | if labelpath is None: 188 | return read_file(MLResult.system_path("mid_summary"), "TestFileNameList.txt") 189 | else: 190 | labelFileNameLineCntList = [] 191 | for fileName in read_file(MLResult.system_path("mid_summary"), "TestFileNameList.txt"): 192 | with open(labelpath + fileName, "r") as f: 193 | fline = f.readlines() 194 | lineCnt = 1 195 | while lineCnt <= len(fline): 196 | labelFileNameLineCntList.append(fileName+"+"+str(lineCnt)) 197 | lineCnt += 1 198 | return labelFileNameLineCntList 199 | 200 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib == 3.0.2 2 | numpy == 1.15.4 3 | pandas == 0.23.4 4 | scipy == 1.2.0 5 | scikit_learn == 0.20.2 6 | tensorflow == 1.14.0 7 | tensorflow_estimator == 1.14.0 8 | git+https://github.com/thu-ml/zhusuan.git 9 | git+https://github.com/haowen-xu/tfsnippet.git@v0.2.0-alpha4 10 | imageio == 2.4.1 11 | fs == 2.3.0 12 | click == 7.0 --------------------------------------------------------------------------------