├── LICENSE.txt ├── README.md ├── lstm-synthetic-wave-anomaly-detect.py └── results-plot.png /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | Copyright (c) 2016 Adam Wentz 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of 5 | this software and associated documentation files (the "Software"), to deal in the 6 | Software without restriction, including without limitation the rights to use, 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the 8 | Software, and to permit persons to whom the Software is furnished to do so, 9 | subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in all 12 | copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 19 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # lstm-anomaly-detect 2 | -------------------------------------------------------------------------------- /lstm-synthetic-wave-anomaly-detect.py: -------------------------------------------------------------------------------- 1 | """ Inspired by example from 2 | https://github.com/Vict0rSch/deep_learning/tree/master/keras/recurrent 3 | Uses the TensorFlow backend 4 | The basic idea is to detect anomalies in a time-series. 5 | """ 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | import time 9 | from keras.layers.core import Dense, Activation, Dropout 10 | from keras.layers.recurrent import LSTM 11 | from keras.models import Sequential 12 | from numpy import arange, sin, pi, random 13 | 14 | np.random.seed(1234) 15 | 16 | # Global hyper-parameters 17 | sequence_length = 100 18 | random_data_dup = 10 # each sample randomly duplicated between 0 and 9 times, see dropin function 19 | epochs = 1 20 | batch_size = 50 21 | 22 | 23 | def dropin(X, y): 24 | """ The name suggests the inverse of dropout, i.e. adding more samples. See Data Augmentation section at 25 | http://simaaron.github.io/Estimating-rainfall-from-weather-radar-readings-using-recurrent-neural-networks/ 26 | :param X: Each row is a training sequence 27 | :param y: Tne target we train and will later predict 28 | :return: new augmented X, y 29 | """ 30 | print("X shape:", X.shape) 31 | print("y shape:", y.shape) 32 | X_hat = [] 33 | y_hat = [] 34 | for i in range(0, len(X)): 35 | for j in range(0, np.random.random_integers(0, random_data_dup)): 36 | X_hat.append(X[i, :]) 37 | y_hat.append(y[i]) 38 | return np.asarray(X_hat), np.asarray(y_hat) 39 | 40 | 41 | def gen_wave(): 42 | """ Generate a synthetic wave by adding up a few sine waves and some noise 43 | :return: the final wave 44 | """ 45 | t = np.arange(0.0, 10.0, 0.01) 46 | wave1 = sin(2 * 2 * pi * t) 47 | noise = random.normal(0, 0.1, len(t)) 48 | wave1 = wave1 + noise 49 | print("wave1", len(wave1)) 50 | wave2 = sin(2 * pi * t) 51 | print("wave2", len(wave2)) 52 | t_rider = arange(0.0, 0.5, 0.01) 53 | wave3 = sin(10 * pi * t_rider) 54 | print("wave3", len(wave3)) 55 | insert = round(0.8 * len(t)) 56 | wave1[insert:insert + 50] = wave1[insert:insert + 50] + wave3 57 | return wave1 + wave2 58 | 59 | 60 | def z_norm(result): 61 | result_mean = result.mean() 62 | result_std = result.std() 63 | result -= result_mean 64 | result /= result_std 65 | return result, result_mean 66 | 67 | 68 | def get_split_prep_data(train_start, train_end, 69 | test_start, test_end): 70 | data = gen_wave() 71 | print("Length of Data", len(data)) 72 | 73 | # train data 74 | print "Creating train data..." 75 | 76 | result = [] 77 | for index in range(train_start, train_end - sequence_length): 78 | result.append(data[index: index + sequence_length]) 79 | result = np.array(result) # shape (samples, sequence_length) 80 | result, result_mean = z_norm(result) 81 | 82 | print "Mean of train data : ", result_mean 83 | print "Train data shape : ", result.shape 84 | 85 | train = result[train_start:train_end, :] 86 | np.random.shuffle(train) # shuffles in-place 87 | X_train = train[:, :-1] 88 | y_train = train[:, -1] 89 | X_train, y_train = dropin(X_train, y_train) 90 | 91 | # test data 92 | print "Creating test data..." 93 | 94 | result = [] 95 | for index in range(test_start, test_end - sequence_length): 96 | result.append(data[index: index + sequence_length]) 97 | result = np.array(result) # shape (samples, sequence_length) 98 | result, result_mean = z_norm(result) 99 | 100 | print "Mean of test data : ", result_mean 101 | print "Test data shape : ", result.shape 102 | 103 | X_test = result[:, :-1] 104 | y_test = result[:, -1] 105 | 106 | print("Shape X_train", np.shape(X_train)) 107 | print("Shape X_test", np.shape(X_test)) 108 | 109 | X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1)) 110 | X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1)) 111 | 112 | return X_train, y_train, X_test, y_test 113 | 114 | 115 | def build_model(): 116 | model = Sequential() 117 | layers = {'input': 1, 'hidden1': 64, 'hidden2': 256, 'hidden3': 100, 'output': 1} 118 | 119 | model.add(LSTM( 120 | input_length=sequence_length - 1, 121 | input_dim=layers['input'], 122 | output_dim=layers['hidden1'], 123 | return_sequences=True)) 124 | model.add(Dropout(0.2)) 125 | 126 | model.add(LSTM( 127 | layers['hidden2'], 128 | return_sequences=True)) 129 | model.add(Dropout(0.2)) 130 | 131 | model.add(LSTM( 132 | layers['hidden3'], 133 | return_sequences=False)) 134 | model.add(Dropout(0.2)) 135 | 136 | model.add(Dense( 137 | output_dim=layers['output'])) 138 | model.add(Activation("linear")) 139 | 140 | start = time.time() 141 | model.compile(loss="mse", optimizer="rmsprop") 142 | print "Compilation Time : ", time.time() - start 143 | return model 144 | 145 | 146 | def run_network(model=None, data=None): 147 | global_start_time = time.time() 148 | 149 | if data is None: 150 | print 'Loading data... ' 151 | # train on first 700 samples and test on next 300 samples (has anomaly) 152 | X_train, y_train, X_test, y_test = get_split_prep_data(0, 700, 500, 1000) 153 | else: 154 | X_train, y_train, X_test, y_test = data 155 | 156 | print '\nData Loaded. Compiling...\n' 157 | 158 | if model is None: 159 | model = build_model() 160 | 161 | try: 162 | print("Training...") 163 | model.fit( 164 | X_train, y_train, 165 | batch_size=batch_size, nb_epoch=epochs, validation_split=0.05) 166 | print("Predicting...") 167 | predicted = model.predict(X_test) 168 | print("Reshaping predicted") 169 | predicted = np.reshape(predicted, (predicted.size,)) 170 | except KeyboardInterrupt: 171 | print("prediction exception") 172 | print 'Training duration (s) : ', time.time() - global_start_time 173 | return model, y_test, 0 174 | 175 | try: 176 | plt.figure(1) 177 | plt.subplot(311) 178 | plt.title("Actual Test Signal w/Anomalies") 179 | plt.plot(y_test[:len(y_test)], 'b') 180 | plt.subplot(312) 181 | plt.title("Predicted Signal") 182 | plt.plot(predicted[:len(y_test)], 'g') 183 | plt.subplot(313) 184 | plt.title("Squared Error") 185 | mse = ((y_test - predicted) ** 2) 186 | plt.plot(mse, 'r') 187 | plt.show() 188 | except Exception as e: 189 | print("plotting exception") 190 | print str(e) 191 | print 'Training duration (s) : ', time.time() - global_start_time 192 | 193 | return model, y_test, predicted 194 | 195 | 196 | run_network() 197 | -------------------------------------------------------------------------------- /results-plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aurotripathy/lstm-anomaly-detect/bcfb01db383698acbd5692f1a76a5f20ec3629a8/results-plot.png --------------------------------------------------------------------------------