├── .gitignore ├── .vscode └── settings.json ├── DeepOneClass ├── code.py ├── imgs │ └── 001.jpg ├── readme.md └── total.py ├── LICENSE ├── Mahalanobis ├── Mahalanobis.py ├── Pics │ ├── Mahdist_verify_result.jpg │ ├── mahal_dist.jpg │ ├── 变体参数含义.jpg │ └── 马氏距离变体.jpg ├── README.md ├── data │ ├── forest_cover │ │ └── README.md │ ├── kdd_http │ │ └── README.md │ ├── kdd_smtp │ │ └── README.md │ └── shuttle │ │ └── README.md ├── mahal_dist.py ├── mahal_dist_variant.py ├── main.py ├── modules │ ├── autoencoder.py │ └── mahalanobis.py ├── requirements.txt ├── run.sh ├── utils │ ├── dataloading.py │ ├── experiment.py │ └── tracking.py └── verify_mahal_equivalence.py ├── README.md ├── adVAE ├── imgs │ └── advae.png └── readme.md ├── anomalyLocalization ├── README.md ├── code │ ├── dataset.py │ ├── eval.py │ ├── eval.sh │ ├── network.py │ ├── train.py │ └── train.sh └── imgs │ ├── 001.png │ ├── 002.png │ ├── 003.png │ └── face - 副本.png ├── dataset ├── imgs │ └── 001.png └── readme.md ├── memae ├── imgs │ └── memae.png ├── memoryzing_normality_to_detect_anomaly.py └── readme.md ├── projects.md ├── records ├── README.md ├── difficulty.md └── imgs │ ├── 7.jpg │ ├── BP202190822100682_3_3.jpg │ ├── MVTec.png │ ├── anomaly_detection_example1.PNG │ ├── anomaly_detection_types.png │ ├── image001.png │ ├── image002.png │ ├── image003.png │ ├── image004.png │ ├── image005.jpg │ ├── image006.jpg │ ├── image007.png │ ├── image008.png │ ├── image009.jpeg │ ├── image010.png │ └── image011.png └── resources.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | 132 | #myself 133 | .mypy_cache 134 | *.pt 135 | *.gz 136 | memae/data/ 137 | .vscode/settings.json 138 | records/*.pdf -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.pythonPath": "C:\\Users\\forrest\\Anaconda3\\python.exe" 3 | } -------------------------------------------------------------------------------- /DeepOneClass/code.py: -------------------------------------------------------------------------------- 1 | from keras.datasets import fashion_mnist 2 | from keras.utils import to_categorical 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | 6 | # dataset 7 | (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data() 8 | 9 | x_train = x_train.reshape(x_train.shape[0], 28, 28, 1) 10 | x_test = x_test.reshape(x_test.shape[0], 28, 28, 1) 11 | 12 | x_train = x_train.astype('float32') / 255 13 | x_test = x_test.astype('float32') / 255 14 | 15 | #学習データ 16 | x_train_s, x_test_s, x_test_b = [], [], [] 17 | x_ref, y_ref = [], [] 18 | 19 | x_train_shape = x_train.shape 20 | 21 | 22 | for i in range(len(x_train)): 23 | if y_train[i] == 7:#スニーカーは7 24 | temp = x_train[i] 25 | x_train_s.append(temp.reshape((x_train_shape[1:]))) 26 | else: 27 | temp = x_train[i] 28 | x_ref.append(temp.reshape((x_train_shape[1:]))) 29 | y_ref.append(y_train[i]) 30 | 31 | x_ref = np.array(x_ref) 32 | 33 | #refデータからランダムに6000個抽出 34 | number = np.random.choice(np.arange(0,x_ref.shape[0]),6000,replace=False) 35 | 36 | x, y = [], [] 37 | 38 | x_ref_shape = x_ref.shape 39 | 40 | for i in number: 41 | temp = x_ref[i] 42 | x.append(temp.reshape((x_ref_shape[1:]))) 43 | y.append(y_ref[i]) 44 | 45 | x_train_s = np.array(x_train_s) 46 | x_ref = np.array(x) 47 | y_ref = to_categorical(y) 48 | 49 | #テストデータ 50 | for i in range(len(x_test)): 51 | if y_test[i] == 7:#スニーカーは7 52 | temp = x_test[i,:,:,:] 53 | x_test_s.append(temp.reshape((x_train_shape[1:]))) 54 | 55 | if y_test[i] == 9:#ブーツは9 56 | temp = x_test[i,:,:,:] 57 | x_test_b.append(temp.reshape((x_train_shape[1:]))) 58 | 59 | x_test_s = np.array(x_test_s) 60 | x_test_b = np.array(x_test_b) 61 | 62 | 63 | 64 | import cv2 65 | from PIL import Image 66 | 67 | def resize(x): 68 | x_out = [] 69 | 70 | for i in range(len(x)): 71 | img = cv2.cvtColor(x[i], cv2.COLOR_GRAY2RGB) 72 | img = cv2.resize(img,dsize=(96,96)) 73 | x_out.append(img) 74 | 75 | return np.array(x_out) 76 | 77 | X_train_s = resize(x_train_s) 78 | X_ref = resize(x_ref) 79 | X_test_s = resize(x_test_s) 80 | X_test_b = resize(x_test_b) 81 | 82 | 83 | def original_loss(y_true, y_pred): 84 | lc = 1/(classes*batchsize) * batchsize**2 * K.sum((y_pred -K.mean(y_pred,axis=0))**2,axis=[1]) / ((batchsize-1)**2) 85 | return lc 86 | 87 | #target data 88 | #学習しながら、損失を取得 89 | lc.append(model_t.train_on_batch(batch_target, np.zeros((batchsize, feature_out)))) 90 | 91 | #reference data 92 | #学習しながら、損失を取得 93 | ld.append(model_r.train_on_batch(batch_ref, batch_y)) -------------------------------------------------------------------------------- /DeepOneClass/imgs/001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/DeepOneClass/imgs/001.jpg -------------------------------------------------------------------------------- /DeepOneClass/readme.md: -------------------------------------------------------------------------------- 1 | ## Learning Deep Features for One-Class Classification 2 | 3 | 4 | ## Architecture 5 | ![img](./imgs/001.jpg) 6 | 7 | 8 | ## Reference 9 | [1] Dong Gong et al. (2019). Learning Deep Features for One-Class Classification. IEEE Transactions on Image Processing 28.11 (2019): 5450-5463 10 | 11 | 12 | [github](https://github.com/PINTO0309/Keras-OneClassAnomalyDetection) -------------------------------------------------------------------------------- /DeepOneClass/total.py: -------------------------------------------------------------------------------- 1 | from keras.applications import MobileNetV2, VGG16 2 | from keras.optimizers import SGD 3 | from keras.models import Model 4 | from keras.layers import GlobalAveragePooling2D, Dense 5 | from keras import backend as K 6 | from keras.engine.network import Network 7 | 8 | input_shape = (96, 96, 3) 9 | classes = 10 10 | batchsize = 128 11 | #feature_out = 512 #secondary network out for VGG16 12 | feature_out = 1280 #secondary network out for MobileNet 13 | alpha = 0.5 #for MobileNetV2 14 | lambda_ = 0.1 #for compact loss 15 | 16 | #損失関数 17 | def original_loss(y_true, y_pred): 18 | lc = 1/(classes*batchsize) * batchsize**2 * K.sum((y_pred -K.mean(y_pred,axis=0))**2,axis=[1]) / ((batchsize-1)**2) 19 | return lc 20 | 21 | #学習 22 | def train(x_target, x_ref, y_ref, epoch_num): 23 | 24 | # VGG16読み込み, S network用 25 | print("Model build...") 26 | #mobile = VGG16(include_top=False, input_shape=input_shape, weights='imagenet') 27 | 28 | # mobile net読み込み, S network用 29 | mobile = MobileNetV2(include_top=True, input_shape=input_shape, alpha=alpha, 30 | , weights='imagenet') 31 | 32 | #最終層削除 33 | mobile.layers.pop() 34 | 35 | # 重みを固定 36 | for layer in mobile.layers: 37 | if layer.name == "block_13_expand": # "block5_conv1": for VGG16 38 | break 39 | else: 40 | layer.trainable = False 41 | 42 | model_t = Model(inputs=mobile.input,outputs=mobile.layers[-1].output) 43 | 44 | # R network用 Sと重み共有 45 | model_r = Network(inputs=model_t.input, 46 | outputs=model_t.output, 47 | name="shared_layer") 48 | 49 | #Rに全結合層を付ける 50 | prediction = Dense(classes, activation='softmax')(model_t.output) 51 | model_r = Model(inputs=model_r.input,outputs=prediction) 52 | 53 | #コンパイル 54 | optimizer = SGD(lr=5e-5, decay=0.00005) 55 | model_r.compile(optimizer=optimizer, loss="categorical_crossentropy") 56 | model_t.compile(optimizer=optimizer, loss=original_loss) 57 | 58 | model_t.summary() 59 | model_r.summary() 60 | 61 | print("x_target is",x_target.shape[0],'samples') 62 | print("x_ref is",x_ref.shape[0],'samples') 63 | 64 | ref_samples = np.arange(x_ref.shape[0]) 65 | loss, loss_c = [], [] 66 | 67 | print("training...") 68 | 69 | #学習 70 | for epochnumber in range(epoch_num): 71 | x_r, y_r, lc, ld = [], [], [], [] 72 | 73 | #ターゲットデータシャッフル 74 | np.random.shuffle(x_target) 75 | 76 | #リファレンスデータシャッフル 77 | np.random.shuffle(ref_samples) 78 | for i in range(len(x_target)): 79 | x_r.append(x_ref[ref_samples[i]]) 80 | y_r.append(y_ref[ref_samples[i]]) 81 | x_r = np.array(x_r) 82 | y_r = np.array(y_r) 83 | 84 | for i in range(int(len(x_target) / batchsize)): 85 | 86 | #batchsize分のデータロード 87 | batch_target = x_target[i*batchsize:i*batchsize+batchsize] 88 | batch_ref = x_r[i*batchsize:i*batchsize+batchsize] 89 | batch_y = y_r[i*batchsize:i*batchsize+batchsize] 90 | 91 | #target data 92 | #学習しながら、損失を取得 93 | lc.append(model_t.train_on_batch(batch_target, np.zeros((batchsize, feature_out)))) 94 | 95 | #reference data 96 | #学習しながら、損失を取得 97 | ld.append(model_r.train_on_batch(batch_ref, batch_y)) 98 | 99 | loss.append(np.mean(ld)) 100 | loss_c.append(np.mean(lc)) 101 | 102 | if (epochnumber+1) % 5 == 0: 103 | print("epoch:",epochnumber+1) 104 | print("Descriptive loss:", loss[-1]) 105 | print("Compact loss", loss_c[-1]) 106 | 107 | #結果グラフ 108 | plt.plot(loss,label="Descriptive loss") 109 | plt.xlabel("epoch") 110 | plt.legend() 111 | plt.show() 112 | 113 | plt.plot(loss_c,label="Compact loss") 114 | plt.xlabel("epoch") 115 | plt.legend() 116 | plt.show() 117 | 118 | return model_t 119 | 120 | model = train(X_train_s, X_ref, y_ref, 5) 121 | 122 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Forrest-Zhu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Mahalanobis/Mahalanobis.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | x=np.random.random(10) 3 | y=np.random.random(10) 4 | 5 | #马氏距离要求样本数要大于维数,否则无法求协方差矩阵 6 | #此处进行转置,表示10个样本,每个样本2维 7 | X=np.vstack([x,y]) 8 | print(X) 9 | XT=X.T 10 | 11 | #方法一:根据公式求解 12 | S=np.cov(X) #两个维度之间协方差矩阵 13 | SI = np.linalg.inv(S) #协方差矩阵的逆矩阵 14 | #马氏距离计算两个样本之间的距离,此处共有10个样本,两两组合,共有45个距离。 15 | n=XT.shape[0] 16 | d1=[] 17 | for i in range(0,n): 18 | for j in range(i+1,n): 19 | delta=XT[i]-XT[j] 20 | d=np.sqrt(np.dot(np.dot(delta,SI),delta.T)) 21 | d1.append(d) 22 | print(d1) 23 | #方法二:根据scipy库求解 24 | from scipy.spatial.distance import pdist 25 | d2=pdist(XT,'mahalanobis') 26 | print(d2) -------------------------------------------------------------------------------- /Mahalanobis/Pics/Mahdist_verify_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/Mahalanobis/Pics/Mahdist_verify_result.jpg -------------------------------------------------------------------------------- /Mahalanobis/Pics/mahal_dist.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/Mahalanobis/Pics/mahal_dist.jpg -------------------------------------------------------------------------------- /Mahalanobis/Pics/变体参数含义.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/Mahalanobis/Pics/变体参数含义.jpg -------------------------------------------------------------------------------- /Mahalanobis/Pics/马氏距离变体.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/Mahalanobis/Pics/马氏距离变体.jpg -------------------------------------------------------------------------------- /Mahalanobis/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## 1. 马氏距离 4 | 5 | #### 1.1 马氏距离等价于【规范化的主成分空间内的欧氏距离】 6 | 7 | - **规范化的主成分空间** 8 | - 对数据集进行主成分分析,即对数据集的协方差矩阵进行特征值分解,求主成分(特征向量) 9 | - 对所有主成分进行归一化处理,这些规范化的主成分即构成了规范化主成分空间的坐标轴 10 | 11 | - **将样本映射至规范化主成分空间,意味着数据从超椭圆(ellipsoidal)分布转化为超球面(spherical)分布** 12 | - 样本在规范化主成分空间各坐标轴上的投影(坐标分量),可通过计算样本向量与规范化主成分的内积求得 13 | 14 | - **两个向量的马氏距离等价于两者在规范化的主成分空间内的欧氏距离** 15 | - If each of these axes is re-scaled to have unit variance, then the Mahalanobis distance corresponds to standard Euclidean distance in the transformed space. 16 | 17 | 18 | #### 1.2 马氏距离的特点 19 | - **特点一:马氏距离是无单位化的、尺度无关的,它内生地考虑到了数据集各坐标轴之间的相关性** 20 | - The Mahalanobis distance is thus unitless and scale-invariant, and takes into account the correlations of the data set. 21 | 22 | - **特点二:马氏距离与样本在各主成分上的偏离度成正比** 23 | - This distance is zero if P is at the mean of D, and grows as P moves away from the mean along each principal component axis 24 | 25 | - The Mahalanobis distance measures the number of standard deviations from P to the mean of D. 26 | 27 | - 参考资料:[Wikipedia : Mahalanobis distance](https://en.wikipedia.org/wiki/Mahalanobis_distance) 28 | 29 | --- 30 | 31 | ## 2. 马氏距离的计算方法及其代码实现 32 | #### 2.1 Python代码实现:[mahal_dist](./mahal_dist.py) 33 | 34 | #### 2.2 计算样本点x距离样本集中心的马氏距离公式 35 | ![马氏距离](./Pics/mahal_dist.jpg) 36 | 37 | --- 38 | 39 | ## 3. 马氏距离的变体及其代码实现 40 | #### 3.1 Python代码实现: [mahal_dist_variant](./mahal_dist_variant.py) 41 | 42 | #### 3.2 论文出处: [A Novel Anomaly Detection Scheme Based on Principal Component Classifier](https://github.com/Albertsr/Anomaly-Detection/blob/master/UnSupervised-Based%20on%20PCA/Papers/A%20Novel%20Anomaly%20Detection%20Scheme%20Based%20on%20Principal%20Component%20Classifier.pdf) 43 | 44 | #### 3.3 计算方法 45 | 46 | ![马氏距离变体](./Pics/%E9%A9%AC%E6%B0%8F%E8%B7%9D%E7%A6%BB%E5%8F%98%E4%BD%93.jpg) 47 | 48 | - **参数含义** 49 | 50 | ![参数含义](./Pics/%E5%8F%98%E4%BD%93%E5%8F%82%E6%95%B0%E5%90%AB%E4%B9%89.jpg) 51 | 52 | - **异常样本的判定:** 当Score(x)大于某个阈值时,便可将样本x判定为异常样本 53 | 54 | --- 55 | 56 | ## 4. 马氏距离及其变体【对样本的异常程度评估完全一致】 57 | 58 | #### 4.1 验证方法 59 | - 根据多个不同的随机种子生成多组实验数据集 60 | - 根据两种方法返回的分数对样本集的索引进行升序或降序排列,例如数值最大的样本其对应的索引排在最前面,依次类推; 61 | - 若分别根据马氏距离及其变体返回的数值大小对样本索引降序排列,若两个索引序列完全一致,则证明这两种方法对样本集中每一个样本的异常程度评估是完全一致的 62 | - 换句话说,在数据集中随机抽取两个不同样本a与b,若马氏距离返回的数据显示样本a比样本b更偏离数据数据中心,则马氏距离变体对这种大小关系有一致的判定 63 | 64 | #### 4.2 验证代码:[verify_mahal_equivalence](./verify_mahal_equivalence.py) 65 | 66 | #### 4.3 验证结论 67 | - 马氏距离及其变体对**各样本在数据集中的异常程度大小关系是完全一致的** 68 | - 根据随机生成的多个数据集进行验证,**实验结果表明上述结论是完全正确的** 69 | - 每个数据集的行数、列数、异常样本比例均在一定区间内随机生成 70 | - 正常样本服从标准正态分布,异常样本由两组异常样本子集构成,分别服从伽玛分布、指数分布 71 | - 更多细节请查阅上述验证代码 72 | 73 | ![Mahdist_verify_result](./Pics/Mahdist_verify_result.jpg) 74 | -------------------------------------------------------------------------------- /Mahalanobis/data/forest_cover/README.md: -------------------------------------------------------------------------------- 1 | Source: http://odds.cs.stonybrook.edu/forestcovercovertype-dataset/ 2 | 3 | 286048 observations, 0.9% anomalous 4 | 5 | Description: 6 | The original ForestCover/Covertype dataset from UCI machine learning repository is a multiclass classification dataset. It is used in predicting forest cover type from cartographic variables only (no remotely sensed data). This study area includes four wilderness areas located in the Roosevelt National Forest of northern Colorado. These areas represent forests with minimal human-caused disturbances, so that existing forest cover types are more a result of ecological processes rather than forest management practices. This dataset has 54 attributes (10 quantitative variables, 4 binary wilderness areas and 40 binary soil type variables). Here, outlier detection dataset is created using only 10 quantitative attributes. Instances from class 2 are considered as normal points and instances from class 4 are anomalies. The anomalies ratio is 0.9%. Instances from the other classes are omitted. -------------------------------------------------------------------------------- /Mahalanobis/data/kdd_http/README.md: -------------------------------------------------------------------------------- 1 | Source: http://odds.cs.stonybrook.edu/http-kddcup99-dataset/ 2 | 3 | 567479 observations, 0.4% anomalous 4 | 5 | Description: 6 | The original KDD Cup 1999 dataset from UCI machine learning repository contains 41 attributes (34 continuous, and 7 categorical), however, they are reduced to 4 attributes (service, duration, src_bytes, dst_bytes) as these attributes are regarded as the most basic attributes (see kddcup.names), where only ‘service’ is categorical. Using the ‘service’ attribute, the data is divided into {http, smtp, ftp, ftp_data, others} subsets. Here, only ‘http’ service data is used. Since the continuous attribute values are concentrated around ‘0’, we transformed each value into a value far from ‘0’, by y = log(x + 0.1). The original data set has 3,925,651 attacks (80.1%) out of 4,898,431 records. A smaller set is forged by having only 3,377 attacks (0.35%) of 976,157 records, where attribute ‘logged_in’ is positive. From this forged dataset 567,497 ‘http’ service data is used to construct the http (KDDCUP99) dataset. -------------------------------------------------------------------------------- /Mahalanobis/data/kdd_smtp/README.md: -------------------------------------------------------------------------------- 1 | Source: 2 | http://odds.cs.stonybrook.edu/smtp-kddcup99-dataset/ 3 | 4 | 95156 observations, 0.03% anomalous 5 | 6 | Description: 7 | The original KDD Cup 1999 dataset from UCI machine learning repository contains 41 attributes (34 continuous, and 7 categorical), however, they are reduced to 4 attributes (service, duration, src_bytes, dst_bytes) as these attributes are regarded as the most basic attributes(see kddcup.names), where only ‘service’ is categorical. Using the ‘service’ attribute, the data is divided into {http, smtp, ftp, ftp_data, others} subsets. Here, only ‘smtp’ service data is used. Since the continuous attribute values are concentrated around ‘0’, we transformed each value into a value far from ‘0’, by y = log(x + 0.1). The original data set has 3,925,651 attacks (80.1%) out of 4,898,431 records. A smaller set is forged by having only 3,377 attacks (0.35%) of 976,157 records, where attribute ‘logged_in’ is positive. From this forged dataset 95,156 ‘smtp’ service data is used to construct the Smtp (KDDCUP99) dataset. -------------------------------------------------------------------------------- /Mahalanobis/data/shuttle/README.md: -------------------------------------------------------------------------------- 1 | Source: http://odds.cs.stonybrook.edu/shuttle-dataset/ 2 | 3 | 49097 observations, 7% anomalous 4 | 5 | Description: 6 | The original Statlog (Shuttle) dataset from UCI machine learning repository is a multi-class classification dataset with dimensionality 9. Here, the training and test data are combined. The smallest five classes, i.e. 2, 3, 5, 6, 7 are combined to form the outliers class, while class 1 forms the inlier class. Data for class 4 is discarded. -------------------------------------------------------------------------------- /Mahalanobis/mahal_dist.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import numpy as np 4 | from numpy import linalg as LA 5 | 6 | 7 | def mahal_dist(matrix): 8 | # 计算样本矩阵的中心向量 9 | matrix_mean = np.mean(matrix, axis=0) 10 | # 计算各样本与中心向量之间的差异 11 | delta = matrix - matrix_mean 12 | 13 | # 求协方差矩阵及其逆矩阵 14 | cov_matrix = np.cov(matrix, rowvar=False, ddof=1) 15 | cov_matrix_inv = LA.inv(cov_matrix) 16 | 17 | # 求单个样本向量与样本中心的马氏距离 18 | def md_vector(vector): 19 | inner_prod = np.dot(vector, cov_matrix_inv) 20 | inner_product = np.dot(inner_prod, vector) 21 | dist = np.sqrt(inner_product) 22 | return dist 23 | 24 | # 求矩阵中所有样本与中心之间的马氏距离 25 | mahal_dist = np.apply_along_axis(arr=delta, axis=1, func1d=md_vector) 26 | return mahal_dist 27 | -------------------------------------------------------------------------------- /Mahalanobis/mahal_dist_variant.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import numpy as np 4 | from numpy import linalg as LA 5 | from sklearn.preprocessing import StandardScaler 6 | 7 | 8 | def mahal_dist_variant(matrix): 9 | # 将数据集标准化 10 | matrix = StandardScaler().fit_transform(matrix) 11 | # 对数据集进行主成分分析 12 | cov_matrix = np.cov(matrix, rowvar=False, ddof=1) 13 | eigen_values, eigen_vectors = LA.eig(cov_matrix) 14 | 15 | # 函数get_score用于返回数据集在单个主成分上的分数 16 | # 参数pc_idx表示主成分的索引 17 | def get_score(pc_idx): 18 | # eigen_vectors[pc_idx]表示第idx个主成分构成的列向量 19 | inner_product = np.dot(matrix, eigen_vectors[pc_idx]) 20 | score = np.square(inner_product) / eigen_values[pc_idx] 21 | return score 22 | # 返回训练集每一个样本在所有主成分上的分数,并分别求和 23 | mahal_dist = sum(map(get_score, range(len(eigen_values)))) 24 | return mahal_dist 25 | -------------------------------------------------------------------------------- /Mahalanobis/main.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | import argparse 4 | 5 | from modules.autoencoder import Autoencoder 6 | from utils.dataloading import load_dataset 7 | from utils.tracking import Tracker 8 | from utils.experiment import train_model 9 | 10 | parser = argparse.ArgumentParser(description='Automahalanobis experiment') 11 | 12 | # Autoencoder args 13 | parser.add_argument('--mahalanobis', dest='mahalanobis', action='store_true') 14 | parser.set_defaults(mahalanobis=False) 15 | parser.add_argument('--mahalanobis_cov_decay', type=float, default=1E-4) 16 | parser.add_argument('--distort_inputs', dest='distort_inputs', 17 | action='store_true') 18 | parser.set_defaults(distort_inputs=False) 19 | parser.add_argument('--distort_targets', dest='distort_targets', 20 | action='store_true') 21 | parser.set_defaults(distort_targets=False) 22 | 23 | # Dataset args 24 | parser.add_argument('--dataset_name', type=str, default='forest_cover', 25 | help='name of the dataset') 26 | parser.add_argument('--test_prop', type=str, default=0.2) 27 | parser.add_argument('--val_prop', type=str, default=0.2) 28 | 29 | # Training args 30 | parser.add_argument('--n_epochs', type=int, default=500) 31 | parser.add_argument('--batch_size', type=int, default=512) 32 | parser.add_argument('--no_adam', dest='adam', action='store_false', 33 | help='boolean whether to not use adam optimizer but SGD with momentum') 34 | parser.set_defaults(adam=True) 35 | parser.add_argument('--no_cuda', dest='cuda', action='store_false') 36 | parser.set_defaults(cuda=True) 37 | parser.add_argument('--no_tensorboard', dest='tensorboard', action='store_false') 38 | parser.set_defaults(tensorboard=True) 39 | 40 | # Collect args and kwargs 41 | args = parser.parse_args() 42 | args.cuda = args.cuda if torch.cuda.is_available() else False 43 | kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} 44 | 45 | # Set model name 46 | args.model_name = 'ae' 47 | args.model_name += '-mahalanobis' if args.mahalanobis else '-vanilla' 48 | args.model_name += '-distortinputs' if args.distort_inputs else '' 49 | args.model_name += '-distorttargets' if args.distort_targets else '' 50 | 51 | if __name__ == '__main__': 52 | 53 | # Load data 54 | train_loader, val_loader, test_loader, scaler, model_args = \ 55 | load_dataset(args, **kwargs) 56 | 57 | # Construct model and cast to double 58 | model = Autoencoder(model_args.layer_dims, args.mahalanobis, 59 | args.mahalanobis_cov_decay, args.distort_inputs) 60 | model.double() 61 | 62 | # Determine device and copy model and scaler 63 | device = torch.device("cuda:0" if args.cuda else "cpu") 64 | model.to(device) 65 | scaler.to(device) 66 | 67 | # Instantiate tracker 68 | tracker = Tracker(args) 69 | 70 | # Construct loss function 71 | criterion = torch.nn.L1Loss() 72 | 73 | # Construct optimizer 74 | if args.adam: 75 | optimizer = torch.optim.Adam(model.parameters()) 76 | else: 77 | optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, 78 | nesterov=False) 79 | 80 | # Train the model 81 | model, epoch = train_model(model, criterion, optimizer, train_loader, 82 | val_loader, scaler, tracker, args, device) 83 | 84 | print("Trained model on device: {}".format(device)) 85 | 86 | state = { 87 | 'epoch': epoch, 88 | 'state_dict': model.state_dict(), 89 | 'optimizer': optimizer.state_dict() 90 | } 91 | torch.save(state, tracker.dir+'model_state') 92 | 93 | # state = torch.load() 94 | # model.load_state_dict(state['state_dict']) 95 | # optimizer.load_state_dict(state['optimizer']) 96 | 97 | 98 | -------------------------------------------------------------------------------- /Mahalanobis/modules/autoencoder.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Autoencoder module 4 | -------------------------- 5 | """ 6 | import torch 7 | import torch.nn as nn 8 | from modules.mahalanobis import MahalanobisLayer 9 | 10 | class Autoencoder(nn.Module): 11 | 12 | def __init__(self, layer_dims, mahalanobis=False, 13 | mahalanobis_cov_decay=0.1, distort_inputs=False): 14 | super(Autoencoder, self).__init__() 15 | 16 | self.layer_dims = layer_dims 17 | 18 | self.encoding_layers = torch.nn.Sequential( 19 | nn.Linear(layer_dims[0], layer_dims[1]), # 1st hidden layer 20 | nn.Tanh(), # 1st hidden layer 21 | nn.Linear(layer_dims[1], layer_dims[2]) # Compression layer 22 | ) 23 | 24 | self.decoding_layers = torch.nn.Sequential( 25 | nn.Linear(layer_dims[2], layer_dims[3]), # 3rd hidden layer 26 | nn.Tanh(), # 3d hidden layer 27 | nn.Linear(layer_dims[3], layer_dims[4]) # Output layer 28 | ) 29 | 30 | self.mahalanobis = mahalanobis 31 | 32 | if mahalanobis: 33 | self.mahalanobis_layer = MahalanobisLayer(layer_dims[0], 34 | mahalanobis_cov_decay) 35 | 36 | self.distort_input = distort_inputs 37 | 38 | def forward(self, x): 39 | x_in = x + torch.randn_like(x) if self.distort_input else x 40 | x_enc = self.encoding_layers(x_in) 41 | x_fit = self.decoding_layers(x_enc) 42 | if self.mahalanobis: 43 | x_fit = self.mahalanobis_layer(x, x_fit) 44 | return x_fit 45 | 46 | def encode(self, x): 47 | return self.encoding_layers(x) 48 | 49 | def decode(self, x): 50 | return self.decoding_layers(x) 51 | 52 | def reconstruct(self, x): 53 | x = self.encoding_layers(x) 54 | x = self.decoding_layers(x) 55 | return x 56 | 57 | 58 | if __name__ == "__main__": 59 | batch_size = 128 60 | layer_dims = 10, 30, 5, 30, 10 61 | 62 | # Create random Tensors to hold inputs and outputs 63 | x = torch.Tensor(torch.randn(batch_size, layer_dims[0])) 64 | 65 | # Construct our model by instantiating the class defined above 66 | model = Autoencoder(layer_dims, True, 0.001, True) 67 | 68 | # Select device to train model on and copy model to device 69 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 70 | model.to(device) 71 | 72 | # Copy data to device 73 | x = x.to(device) 74 | 75 | # Construct our loss function and an optimizer 76 | criterion = nn.L1Loss() 77 | optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0) 78 | 79 | for t in range(2000): 80 | # Forward pass: Compute predicted y by passing x to the model 81 | errors = model(x) 82 | 83 | # Compute and print loss 84 | loss = criterion(errors, torch.zeros(errors.size(), device=device)) 85 | print(t, loss.item()) 86 | 87 | # Zero gradients, perform a backward pass, and update the weights. 88 | optimizer.zero_grad() 89 | loss.backward() 90 | optimizer.step() 91 | 92 | if model.mahalanobis_layer: 93 | with torch.no_grad(): 94 | x_fit = model.reconstruct(x) 95 | model.mahalanobis_layer.update(x, x_fit) 96 | 97 | print("Trained model on device: {}".format(device)) 98 | 99 | print(errors) 100 | print(x) 101 | print(model.reconstruct(x)) 102 | if model.mahalanobis: 103 | print(model.mahalanobis_layer.S) 104 | print(model.mahalanobis_layer.S_inv) 105 | -------------------------------------------------------------------------------- /Mahalanobis/modules/mahalanobis.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Mahalanobis module 4 | -------------------------- 5 | """ 6 | import torch 7 | import torch.nn as nn 8 | 9 | class MahalanobisLayer(nn.Module): 10 | 11 | def __init__(self, dim, decay = 0.1): 12 | super(MahalanobisLayer, self).__init__() 13 | self.register_buffer('S', torch.eye(dim)) 14 | self.register_buffer('S_inv', torch.eye(dim)) 15 | self.decay = decay 16 | 17 | def forward(self, x, x_fit): 18 | """ 19 | Calculates the squared Mahalanobis distance between x and x_fit 20 | """ 21 | 22 | delta = x - x_fit 23 | m = torch.mm(torch.mm(delta, self.S_inv), delta.t()) 24 | return torch.diag(m) 25 | 26 | def cov(self, x): 27 | x -= torch.mean(x, dim=0) 28 | return 1 / (x.size(0) - 1) * x.t().mm(x) 29 | 30 | def update(self, X, X_fit): 31 | delta = X - X_fit 32 | self.S = (1 - self.decay) * self.S + self.decay * self.cov(delta) 33 | self.S_inv = torch.pinverse(self.S) 34 | 35 | if __name__ == "__main__": 36 | 37 | from scipy.spatial import distance 38 | import numpy as np 39 | 40 | # Some example data for testing 41 | v = torch.Tensor([[1, 0.5, 0.5], [0.5, 1, 0.5], [0.5, 0.5, 1]]) 42 | iv = torch.inverse(v) 43 | X1 = torch.Tensor([[1, 0, 0], [0, 1, 0], [0, 2, 0]]) 44 | X2 = torch.Tensor([[0, 1, 0], [0, 2, 0], [0, 2, 0]]) 45 | 46 | # Squared Mahalanobis distance using scipy 47 | scipy_dist_list = [distance.mahalanobis(x1.numpy(), x2.numpy(), iv.numpy()) for x1, x2 in zip(X1, X2)] 48 | scipy_dist = np.array(scipy_dist_list)**2 49 | 50 | # Mahalanobis distance pytorch implementation 51 | mah_layer = MahalanobisLayer(3, decay=0.99) 52 | mah_layer.S_inv = iv 53 | 54 | pytorch_dist = mah_layer(X1, X2) 55 | 56 | # Check if almost equal 57 | np.testing.assert_almost_equal(scipy_dist, pytorch_dist.numpy()) 58 | 59 | # Covariance method 60 | X = torch.rand(10, 3) 61 | np_cov_X = np.cov(X.numpy(), rowvar=False) 62 | pytorch_cov_X = mah_layer.cov(X) 63 | 64 | # Check if almost equal 65 | np.testing.assert_almost_equal(np_cov_X, pytorch_cov_X.numpy()) 66 | 67 | # Update method 68 | X_fit = torch.rand(10, 3) 69 | delta = X - X_fit 70 | np_cov_delta = np.cov(delta.numpy(), rowvar=False) 71 | pytorch_cov_delta = mah_layer.cov(delta) 72 | 73 | # Check if almost equal after enough updates 74 | for i in range(20): 75 | mah_layer.update(X, X_fit) 76 | np.testing.assert_almost_equal(np_cov_delta, mah_layer.S.numpy()) 77 | 78 | # Test if numpy inverse and pytorch pseudo inverse are close 79 | np.testing.assert_almost_equal(np.linalg.inv(np_cov_delta), mah_layer.S_inv.numpy(), decimal=5) 80 | -------------------------------------------------------------------------------- /Mahalanobis/requirements.txt: -------------------------------------------------------------------------------- 1 | name: pytorch 2 | channels: 3 | - pytorch 4 | - defaults 5 | dependencies: 6 | - blas=1.0=mkl 7 | - ca-certificates=2018.03.07=0 8 | - certifi=2018.10.15=py37_0 9 | - cffi=1.11.5=py37he75722e_1 10 | - cycler=0.10.0=py37_0 11 | - dbus=1.13.2=h714fa37_1 12 | - expat=2.2.6=he6710b0_0 13 | - fontconfig=2.13.0=h9420a91_0 14 | - freetype=2.9.1=h8a8886c_1 15 | - glib=2.56.2=hd408876_0 16 | - gst-plugins-base=1.14.0=hbbd80ab_1 17 | - gstreamer=1.14.0=hb453b48_1 18 | - icu=58.2=h9c2bf20_1 19 | - intel-openmp=2019.0=118 20 | - jpeg=9b=h024ee3a_2 21 | - kiwisolver=1.0.1=py37hf484d3e_0 22 | - libedit=3.1.20170329=h6b74fdf_2 23 | - libffi=3.2.1=hd88cf55_4 24 | - libgcc-ng=8.2.0=hdf63c60_1 25 | - libgfortran-ng=7.3.0=hdf63c60_0 26 | - libpng=1.6.35=hbc83047_0 27 | - libstdcxx-ng=8.2.0=hdf63c60_1 28 | - libtiff=4.0.9=he85c1e1_2 29 | - libuuid=1.0.3=h1bed415_2 30 | - libxcb=1.13=h1bed415_1 31 | - libxml2=2.9.8=h26e45fe_1 32 | - matplotlib=3.0.1=py37h5429711_0 33 | - mkl=2019.0=118 34 | - mkl_fft=1.0.6=py37h7dd41cf_0 35 | - mkl_random=1.0.1=py37h4414c95_1 36 | - ncurses=6.1=hf484d3e_0 37 | - ninja=1.8.2=py37h6bb024c_1 38 | - numpy=1.15.4=py37h1d66e8a_0 39 | - numpy-base=1.15.4=py37h81de0dd_0 40 | - olefile=0.46=py37_0 41 | - openssl=1.0.2p=h14c3975_0 42 | - pandas=0.23.4=py37h04863e7_0 43 | - patsy=0.5.1=py37_0 44 | - pcre=8.42=h439df22_0 45 | - pillow=5.3.0=py37h34e0f95_0 46 | - pip=18.1=py37_0 47 | - pycparser=2.19=py37_0 48 | - pyparsing=2.3.0=py37_0 49 | - pyqt=5.9.2=py37h05f1152_2 50 | - python=3.7.0=h6e4f718_3 51 | - python-dateutil=2.7.5=py37_0 52 | - pytz=2018.7=py37_0 53 | - qt=5.9.6=h8703b6f_2 54 | - readline=7.0=h7b6447c_5 55 | - scikit-learn=0.20.0=py37h4989274_1 56 | - scipy=1.1.0=py37hfa4b5c9_1 57 | - seaborn=0.9.0=py37_0 58 | - setuptools=40.5.0=py37_0 59 | - sip=4.19.8=py37hf484d3e_0 60 | - six=1.11.0=py37_1 61 | - sqlite=3.25.2=h7b6447c_0 62 | - statsmodels=0.9.0=py37h035aef0_0 63 | - tk=8.6.8=hbc83047_0 64 | - tornado=5.1.1=py37h7b6447c_0 65 | - wheel=0.32.2=py37_0 66 | - xz=5.2.4=h14c3975_4 67 | - zlib=1.2.11=ha838bed_2 68 | - pytorch=0.4.1=py37_py36_py35_py27__9.0.176_7.1.2_2 69 | - torchvision=0.2.1=py37_1 70 | - pip: 71 | - torch==0.4.1.post2 72 | prefix: /home/bart/anaconda3/envs/pytorch 73 | 74 | -------------------------------------------------------------------------------- /Mahalanobis/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | source activate pytorch 4 | 5 | # Forest cover 6 | python3 main.py --dataset_name forest_cover 7 | python3 main.py --mahalanobis --dataset_name forest_cover 8 | python3 main.py --mahalanobis --distort_inputs --dataset_name forest_cover 9 | python3 main.py --mahalanobis --distort_targets --dataset_name forest_cover 10 | 11 | # Kdd smtp 12 | python3 main.py --dataset_name kdd_smtp 13 | python3 main.py --mahalanobis --dataset_name kdd_smtp 14 | python3 main.py --mahalanobis --distort_inputs --dataset_name kdd_smtp 15 | python3 main.py --mahalanobis --distort_targets --dataset_name kdd_smtp 16 | 17 | # Kdd http 18 | python3 main.py --dataset_name kdd_http 19 | python3 main.py --mahalanobis --dataset_name kdd_http 20 | python3 main.py --mahalanobis --distort_inputs --dataset_name kdd_http 21 | python3 main.py --mahalanobis --distort_targets --dataset_name kdd_http 22 | 23 | # Shuttle 24 | python3 main.py --dataset_name shuttle 25 | python3 main.py --mahalanobis --dataset_name shuttle 26 | python3 main.py --mahalanobis --distort_inputs --dataset_name shuttle 27 | python3 main.py --mahalanobis --distort_targets --dataset_name shuttle 28 | 29 | # Exit script 30 | exit 0 -------------------------------------------------------------------------------- /Mahalanobis/utils/dataloading.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | import torch.utils.data as data_utils 4 | import numpy as np 5 | 6 | import h5py 7 | from scipy.io import loadmat 8 | 9 | 10 | class Scaler: 11 | 12 | def __init__(self, x): 13 | # Numpy array input to tensor 14 | x = torch.from_numpy(x).double() 15 | 16 | # Calculate mean and standard deviation of train 17 | self.mean_vec = torch.mean(x, dim=0) 18 | self.sd_vec = torch.std(x, dim=0) 19 | 20 | def to(self, device): 21 | self.mean_vec = self.mean_vec.to(device) 22 | self.sd_vec = self.sd_vec.to(device) 23 | 24 | def normalize(self, x): 25 | return (x - self.mean_vec) / self.sd_vec 26 | 27 | 28 | def np_shuffle_arrays(a, b): 29 | assert len(a) == len(b) 30 | p = np.random.permutation(len(a)) 31 | return a[p], b[p] 32 | 33 | 34 | def read_mat(path: str, transpose=True, print_dim=False): 35 | 36 | # Read data - different .mat versions: first try h5py, then scipy 37 | try: 38 | file = h5py.File(path, 'r') 39 | except OSError: 40 | file = loadmat(path) 41 | 42 | # Extract X and labels 43 | X = np.array(file.get('X')) 44 | labels = np.array(file.get('y')) 45 | 46 | # Transpose data 47 | if transpose: 48 | X = X.transpose() 49 | labels = labels.transpose() 50 | 51 | if print_dim: 52 | print('Input data dim:') 53 | print(' X: {}'.format(X.shape)) 54 | print(' labels: {}'.format(labels.shape)) 55 | 56 | return X, labels 57 | 58 | 59 | def generate_loaders(X, labels, args, **kwargs): 60 | 61 | # Train validation test split 62 | X, labels = np_shuffle_arrays(X, labels) 63 | 64 | data_nrows = X.shape[0] 65 | val_size = int(args.val_prop * data_nrows) 66 | test_size = int(args.test_prop * data_nrows) 67 | 68 | splits = [data_nrows - val_size - test_size, data_nrows - val_size] 69 | X_train, X_val, X_test = np.split(X, splits) 70 | labels_train, labels_val, labels_test = np.split(labels, splits) 71 | 72 | # Fit scaler 73 | scaler = Scaler(X_train) 74 | 75 | # Pytorch data loaders 76 | train = data_utils.TensorDataset(torch.from_numpy(X_train).double(), 77 | torch.from_numpy(labels_train).double()) 78 | train_loader = data_utils.DataLoader(train, 79 | batch_size=args.batch_size, 80 | shuffle=True, **kwargs) 81 | 82 | validation = data_utils.TensorDataset(torch.from_numpy(X_val).double(), 83 | torch.from_numpy(labels_val).double()) 84 | val_loader = data_utils.DataLoader(validation, 85 | batch_size=args.batch_size, 86 | shuffle=False, **kwargs) 87 | 88 | test = data_utils.TensorDataset(torch.from_numpy(X_test).double(), 89 | torch.from_numpy(labels_test).double()) 90 | test_loader = data_utils.DataLoader(test, 91 | batch_size=args.batch_size, 92 | shuffle=False, **kwargs) 93 | 94 | return train_loader, val_loader, test_loader, scaler 95 | 96 | 97 | def load_kdd_smtp(args, as_numpy, **kwargs): 98 | 99 | # Set args 100 | args.layer_dims = (3, 10, 2, 10, 3) 101 | 102 | # Load data 103 | X, labels = read_mat('./data/kdd_smtp/kdd_smtp.mat', 104 | transpose=True, print_dim=True) 105 | 106 | if as_numpy: 107 | return X, labels 108 | 109 | # Split data and generate the data loaders 110 | train_loader, val_loader, test_loader, scaler = \ 111 | generate_loaders(X, labels, args, **kwargs) 112 | 113 | return train_loader, val_loader, test_loader, scaler, args 114 | 115 | 116 | def load_kdd_http(args, as_numpy, **kwargs): 117 | 118 | # Set args 119 | args.layer_dims = (3, 10, 2, 10, 3) 120 | 121 | # Load data 122 | X, labels = read_mat('./data/kdd_http/kdd_http.mat', 123 | transpose=True, print_dim=True) 124 | 125 | if as_numpy: 126 | return X, labels 127 | 128 | # Split data and generate the data loaders 129 | train_loader, val_loader, test_loader, scaler = \ 130 | generate_loaders(X, labels, args, **kwargs) 131 | 132 | return train_loader, val_loader, test_loader, scaler, args 133 | 134 | 135 | def load_shuttle(args, as_numpy, **kwargs): 136 | 137 | # Set args 138 | args.layer_dims = (9, 20, 5, 20, 9) 139 | 140 | # Load data 141 | X, labels = read_mat('./data/shuttle/shuttle.mat', 142 | transpose=False, print_dim=True) 143 | 144 | if as_numpy: 145 | return X, labels 146 | 147 | # Split data and generate the data loaders 148 | train_loader, val_loader, test_loader, scaler = \ 149 | generate_loaders(X, labels, args, **kwargs) 150 | 151 | return train_loader, val_loader, test_loader, scaler, args 152 | 153 | 154 | def load_forest_cover(args, as_numpy, **kwargs): 155 | 156 | # Set args 157 | args.layer_dims = (10, 20, 5, 20, 10) 158 | 159 | # Load data 160 | X, labels = read_mat('./data/forest_cover/forest_cover.mat', 161 | transpose=False, print_dim=True) 162 | 163 | if as_numpy: 164 | return X, labels 165 | 166 | # Split data and generate the data loaders 167 | train_loader, val_loader, test_loader, scaler = \ 168 | generate_loaders(X, labels, args, **kwargs) 169 | 170 | return train_loader, val_loader, test_loader, scaler, args 171 | 172 | 173 | def load_dataset(args, **kwargs): 174 | ''' 175 | Load torch data loaders for datasets: kdd_smtp, kdd_http 176 | 177 | :param args: Namespace object created by argparse containing: 178 | dataset_name, test_prop, val_prop, batch_size 179 | :param kwargs: to be passed to torch.utils.data.DataLoader 180 | :return: Tuple: train_loader, val_loader, test_loader, labels_split, args 181 | ''' 182 | if args.dataset_name == 'kdd_smtp': 183 | data_tuple = load_kdd_smtp(args, False, **kwargs) 184 | elif args.dataset_name == 'kdd_http': 185 | data_tuple = load_kdd_http(args, False, **kwargs) 186 | elif args.dataset_name == 'shuttle': 187 | data_tuple = load_shuttle(args, False, **kwargs) 188 | elif args.dataset_name == 'forest_cover': 189 | data_tuple = load_forest_cover(args, False, **kwargs) 190 | else: 191 | raise Exception('Wrong name of the dataset!') 192 | return data_tuple 193 | 194 | 195 | if __name__ == "__main__": 196 | 197 | X_train = np.random.randn(20, 5) 198 | scaler = Scaler(X_train) 199 | X_scaled = scaler.normalize(X_train) 200 | 201 | np.testing.assert_almost_equal(np.array([0,0,0,0,0]), 202 | np.mean(X_scaled, axis=0)) 203 | np.testing.assert_almost_equal(np.array([1, 1, 1, 1, 1]), 204 | np.std(X_scaled, axis=0)) 205 | 206 | from argparse import Namespace 207 | data_args = Namespace(dataset_name='forest_cover', 208 | test_prop=0.2, 209 | val_prop=0.2, 210 | batch_size=128) 211 | 212 | train_loader, val_loader, test_loader, scaler, args= \ 213 | load_dataset(args=data_args) 214 | -------------------------------------------------------------------------------- /Mahalanobis/utils/experiment.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import math 4 | import torch 5 | 6 | def train_model(model, criterion, optimizer, train_loader, val_loader, scaler, 7 | tracker, args, device): 8 | 9 | # Performance metrics and tracking 10 | val_loss, top1, top5, top10, top25 = \ 11 | validate(val_loader, model, criterion, scaler, device) 12 | tracker.track(0, 0, val_loss, top1, top5, top10, top25) 13 | 14 | for epoch in range(1, args.n_epochs + 1): 15 | 16 | for X_batch, labels_batch in train_loader: 17 | 18 | # Copy data to device 19 | X_batch, labels_batch = X_batch.to(device), labels_batch.to(device) 20 | 21 | # Scale X 22 | X_batch = scaler.normalize(X_batch) 23 | 24 | # Forward pass: Compute predicted y by passing x to the model 25 | out = model(X_batch) 26 | 27 | # Construct y tensor 28 | y_batch = torch.zeros_like(out) if model.mahalanobis else X_batch 29 | 30 | # Compute and print loss 31 | loss = criterion(out, y_batch) 32 | print('Epoch: {}/{} -- Loss: {}'.format(epoch, args.n_epochs, 33 | loss.item())) 34 | 35 | # Zero gradients, perform a backward pass, and update the weights. 36 | optimizer.zero_grad() 37 | loss.backward() 38 | optimizer.step() 39 | 40 | if model.mahalanobis: 41 | with torch.no_grad(): 42 | X_fit = model.reconstruct(X_batch) 43 | model.mahalanobis_layer.update(X_batch, X_fit) 44 | 45 | # Performance metrics and tracking 46 | val_loss, top1, top5, top10, top25 = \ 47 | validate(val_loader, model, criterion, scaler, device) 48 | tracker.track(epoch, loss, val_loss, top1, top5, top10, top25) 49 | 50 | return model, epoch 51 | 52 | def outlier_factor(x, x_val): 53 | err = x - x_val 54 | err = torch.pow(err, 2) 55 | err = torch.sum(err, 1) 56 | return err / len(err) 57 | 58 | 59 | def performance(anomalies, scores, percentage): 60 | 61 | # Order anomalies (binary vector) by the anomaly score in descending order 62 | _, ordering = torch.sort(scores, descending=True) 63 | ordered_anomalies = anomalies[ordering.type(torch.LongTensor)] 64 | 65 | # Number of observations to include in top 66 | n_top = math.ceil(len(anomalies) * percentage / 100) 67 | 68 | return torch.sum(ordered_anomalies[:n_top]) / torch.sum(anomalies) 69 | 70 | def validate(data_loader, model, criterion, scaler, device): 71 | 72 | class FillableArray: 73 | 74 | def __repr__(self): 75 | return self.X.__str__() 76 | 77 | def __init__(self, n, tensor=False): 78 | self.n = n 79 | self.X = torch.Tensor(torch.zeros(n)) if tensor else np.zeros(n) 80 | self.i = 0 81 | 82 | def fill(self, x): 83 | stop_ind = self.i + len(x) 84 | assert self.n >= stop_ind 85 | self.X[self.i:stop_ind] = x.flatten() 86 | self.i = stop_ind 87 | 88 | nrow = len(data_loader.dataset) 89 | anomalies = FillableArray(nrow, tensor=True) 90 | scores = FillableArray(nrow, tensor=True) 91 | loss =0 92 | 93 | for i, (X_val, labels_val) in enumerate(data_loader): 94 | 95 | # Copy to device 96 | X_val, labels_val = X_val.to(device), labels_val.to(device) 97 | 98 | # Scale X 99 | X_val = scaler.normalize(X_val) 100 | 101 | # Calculate output of model: reconstructions or Mahalanobis distance 102 | out = model(X_val) 103 | 104 | # Construct y tensor and calculate loss 105 | y_val = torch.zeros_like(out) if model.mahalanobis else X_val 106 | loss = criterion(out, y_val) 107 | 108 | # Determine anomaly scores 109 | val_scores = out if model.mahalanobis else outlier_factor(out, X_val) 110 | 111 | # Fill anomaly and score tensors to compute performance on full set 112 | anomalies.fill(labels_val) 113 | scores.fill(val_scores) 114 | 115 | loss /= i + 1 116 | top1 = performance(anomalies.X, scores.X, 1).item() 117 | top5 = performance(anomalies.X, scores.X, 5).item() 118 | top10 = performance(anomalies.X, scores.X, 10).item() 119 | top25 = performance(anomalies.X, scores.X, 25).item() 120 | 121 | return loss.item(), top1, top5, top10, top25 122 | 123 | if __name__=='__main__': 124 | 125 | x = torch.randn(10,3) 126 | x_val = torch.randn_like(x) 127 | print(outlier_factor(x, x_val)) 128 | 129 | from utils.dataloading import load_dataset 130 | from argparse import Namespace 131 | from modules.autoencoder import Autoencoder 132 | 133 | data_args = Namespace(dataset_name='kdd_smtp', 134 | test_prop=0.2, 135 | val_prop=0.2, 136 | batch_size=128) 137 | 138 | train_loader, val_loader, test_loader, scaler, model_args = \ 139 | load_dataset(args=data_args) 140 | 141 | args = Namespace(mahalanobis=True, 142 | mahalanobis_cov_decay=0.9, 143 | distort_inputs=False) 144 | 145 | ae = Autoencoder(model_args.layer_dims, args.mahalanobis, 146 | args.mahalanobis_cov_decay, args.distort_inputs) 147 | ae.double() 148 | device = torch.device("cuda:0" if False else "cpu") 149 | ae.to(device) 150 | 151 | criterion = torch.nn.L1Loss() 152 | test = validate(train_loader, ae, criterion, scaler, device) -------------------------------------------------------------------------------- /Mahalanobis/utils/tracking.py: -------------------------------------------------------------------------------- 1 | 2 | import datetime 3 | import re 4 | import os 5 | import csv 6 | import json 7 | from tensorboardX import SummaryWriter 8 | 9 | class Tracker: 10 | 11 | def __init__(self, args): 12 | 13 | # Make signature of experiment 14 | time_signature = str(datetime.datetime.now())[:19] 15 | time_signature = re.sub('[^0-9]', '_', time_signature) 16 | signature = '{}_{}_{}'.format(time_signature, args.model_name, 17 | args.dataset_name) 18 | 19 | # Set directory to store run 20 | self.dir = './runs/{}/'.format(signature) 21 | 22 | if not os.path.exists(self.dir): 23 | os.makedirs(self.dir) 24 | 25 | # Store settings 26 | settings_dict = vars(args) 27 | 28 | with open(self.dir + 'settings.json', 'w') as file: 29 | json.dump(settings_dict, file, sort_keys=True, indent=4) 30 | 31 | # Create csv file for appending stuff during training 32 | with open(self.dir + 'train_metrics.csv', 'w') as file: 33 | filewriter = csv.writer(file, delimiter=';') 34 | filewriter.writerow(['epoch', 'train_loss', 'val_loss', 35 | 'top1_percent', 'top5_percent', 36 | 'top10_percent', 'top25_percent']) 37 | 38 | # Tensorboard writer 39 | self.tensorboard=args.tensorboard 40 | if self.tensorboard: 41 | self.writer = SummaryWriter(log_dir=self.dir + 'tensorboard/') 42 | self.k = 0 # Counter for tensorboard events 43 | 44 | def __del__(self): 45 | if self.tensorboard: 46 | self.writer.close() 47 | 48 | def track(self, epoch, train_loss, val_loss, top1_percent=0, 49 | top5_percent=0, top10_percent=0, top25_percent=0): 50 | 51 | # Collect values in list 52 | metrics = [epoch, train_loss, val_loss, top1_percent, top5_percent, 53 | top10_percent, top25_percent] 54 | 55 | # Append to csv file 56 | with open(self.dir + 'train_metrics.csv', 'a') as f: 57 | writer = csv.writer(f) 58 | writer.writerow(metrics) 59 | 60 | # Write tensorboard events 61 | if self.tensorboard: 62 | self.writer.add_scalar('data/train_loss', train_loss, self.k) 63 | self.writer.add_scalar('data/val_loss', val_loss, self.k) 64 | self.writer.add_scalar('data/top1_percent', top1_percent, self.k) 65 | self.writer.add_scalar('data/top5_percent', top5_percent, self.k) 66 | self.writer.add_scalar('data/top10_percent', top10_percent, self.k) 67 | self.writer.add_scalar('data/top25_percent', top25_percent, self.k) 68 | self.k += 1 69 | 70 | if __name__=='__main__': 71 | 72 | from argparse import Namespace 73 | args = Namespace(dataset_name='shuttle', 74 | test_prop=0.2, 75 | val_prop=0.2, 76 | batch_size=128, 77 | model_name='autoencoder', 78 | tensorboard=True) 79 | 80 | t = Tracker(args) 81 | 82 | t.track(10,0.1,0.11,0.111,0.1111,0.11111) 83 | -------------------------------------------------------------------------------- /Mahalanobis/verify_mahal_equivalence.py: -------------------------------------------------------------------------------- 1 | # Author:马肖 2 | # E-mail:maxiaoscut@aliyun.com 3 | # Github:https://github.com/Albertsr 4 | 5 | import numpy as np 6 | import pandas as pd 7 | from mahal_dist import mahal_dist 8 | from mahal_dist_variant import mahal_dist_variant 9 | 10 | def generate_dataset(seed): 11 | rdg = np.random.RandomState(seed) 12 | row = rdg.randint(8000, 10000) 13 | col = rdg.randint(30, 35) 14 | contamination = rdg.uniform(0.015, 0.025) 15 | 16 | outlier_num = int(row*contamination) 17 | inlier_num = row - outlier_num 18 | 19 | # 正常样本集服从标准正态分布 20 | inliers = rdg.randn(inlier_num, col) 21 | 22 | # 如果outlier_num为奇数,row_1=outlier_num//2,否则row_1=int(outlier_num/2) 23 | row_1 = outlier_num//2 if np.mod(outlier_num, 2) else int(outlier_num/2) 24 | row_2 = outlier_num - row_1 25 | 26 | # outliers_sub_1服从伽玛分布;outliers_sub_2服从指数分布 27 | outliers_sub_1 = rdg.gamma(shape=2, scale=0.5, size=(row_1 , col)) 28 | outliers_sub_2 = rdg.exponential(1.5, size=(row_2, col)) 29 | outliers = np.r_[outliers_sub_1, outliers_sub_2] 30 | 31 | # 将inliers与outliers在axis=0方向上予以整合,构成实验数据集 32 | dataset = np.r_[inliers, outliers] 33 | outliers_indices = range(len(dataset))[inlier_num:] 34 | return dataset 35 | 36 | def verify_maldist_equivalence(dataset): 37 | # 马氏距离的初始定义 38 | dist_original = mahal_dist(dataset) 39 | # 根据数值大小,对数据集索引降序排列 40 | indices_desc_original = np.argsort(-dist_original) 41 | 42 | # 马氏距离的变体 43 | dist_variant = mahal_dist_variant(dataset) 44 | # 根据数值大小,对数据集索引降序排列 45 | indices_desc_variant = np.argsort(-dist_variant) 46 | 47 | assert not np.allclose(dist_original, dist_variant), '马氏距离及其变体返回的数值一般不相等' 48 | indices_verify_result = np.allclose(indices_desc_original, indices_desc_variant) 49 | return indices_verify_result 50 | 51 | # 生成一系列随机种子及其对应的数据集 52 | seeds = np.random.choice(range(1000), size=10, replace=False) 53 | datasets = list(map(generate_dataset, seeds)) 54 | 55 | # 返回验证结果 56 | verify_result = list(map(verify_maldist_equivalence, datasets)) 57 | 58 | # 输出验证结果 59 | if all(verify_result): 60 | description = '经过{:}个不重复的随机数据集的测试,马氏距离及其变体对样本相对异常程度的评估是一致的\n' 61 | print(description.format(len(seeds))) 62 | else: 63 | print('经过随机数据集的测试,马氏距离及其变体对样本相对异常程度的评估不一致') 64 | 65 | dataset_name = ['Dataset_' + str(i) for i in range(len(seeds))] 66 | verify_result = pd.DataFrame(verify_result, index=dataset_name, columns=['Equivalence']) 67 | print(verify_result.T) 68 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AnomalyDetection 2 | Anomaly Detection in computer vision 3 | 4 | ## paper list in records 5 | [paper](./records/README.md) 6 | 7 | 8 | ## the difficulty and the Potential solution 9 | [solution](./records/difficulty.md) 10 | 11 | 12 | ## some resources about anomaly detection 13 | [resources](./resources.md) 14 | [projects](./projects.md) -------------------------------------------------------------------------------- /adVAE/imgs/advae.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/adVAE/imgs/advae.png -------------------------------------------------------------------------------- /adVAE/readme.md: -------------------------------------------------------------------------------- 1 | ## Memorizing Normality to Detect Anomaly: Memory-augmented Deep Autoencoder for Unsupervised Anomaly Detection. 2 | 3 | 4 | ## Architecture 5 | ![img](https://github.com/YeongHyeon/adVAE/blob/master/figures/advae.png) 6 | 7 | 8 | 9 | 10 | ## Reference 11 | [adVAE](https://github.com/YeongHyeon/adVAE) 12 | 13 | [1] Wang, Xuhong, et al. Advae: a self-adversarial variational autoencoder with gaussian anomaly prior knowledge for anomaly detection.. Knowledge-Based Systems 190 (2020): 105187. 14 | 15 | -------------------------------------------------------------------------------- /anomalyLocalization/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Iterative energy-based projection on a normal data manifold for anomaly localization 4 | 5 | ## Architecture 6 | ![img](./imgs/001.png) 7 | 8 | 9 | ## Result 10 | ![img](./imgs/002.png) 11 | 12 | ![img](./imgs/003.png) 13 | 14 | 15 | ## Reference 16 | [1] Dehaene, David, et al. Iterative energy-based projection on a normal data manifold for anomaly localization. arXiv preprint arXiv:2002.03734 (2020). 17 | 18 | 19 | reference:https://qiita.com/kogepan102/items/122b2862ad5a51180656 -------------------------------------------------------------------------------- /anomalyLocalization/code/dataset.py: -------------------------------------------------------------------------------- 1 | # data loader 2 | import os 3 | import numpy as np 4 | from PIL import Image 5 | 6 | import torch 7 | from torch.utils import data 8 | from torchvision import transforms as T 9 | import torch.nn as nn 10 | import torch.optim as optim 11 | from torch.nn import functional as F 12 | 13 | 14 | 15 | class MVTecAD(data.Dataset): 16 | """Dataset class for the MVTecAD dataset.""" 17 | 18 | def __init__(self, image_dir, transform): 19 | """Initialize and preprocess the MVTecAD dataset.""" 20 | self.image_dir = image_dir 21 | self.transform = transform 22 | 23 | def __getitem__(self, index): 24 | """Return one image""" 25 | filename = "{:03}.png".format(index) 26 | image = Image.open(os.path.join(self.image_dir, filename)) 27 | return self.transform(image) 28 | 29 | def __len__(self): 30 | """Return the number of images.""" 31 | return len(os.listdir(self.image_dir)) 32 | 33 | 34 | def return_MVTecAD_loader(image_dir, batch_size=256, train=True): 35 | """Build and return a data loader.""" 36 | transform = [] 37 | transform.append(T.Resize((512, 512))) 38 | transform.append(T.RandomCrop((128,128))) 39 | transform.append(T.RandomHorizontalFlip(p=0.5)) 40 | transform.append(T.RandomVerticalFlip(p=0.5)) 41 | transform.append(T.ToTensor()) 42 | transform = T.Compose(transform) 43 | 44 | dataset = MVTecAD(image_dir, transform) 45 | 46 | data_loader = data.DataLoader(dataset=dataset, 47 | batch_size=batch_size, 48 | shuffle=train) 49 | return data_loader -------------------------------------------------------------------------------- /anomalyLocalization/code/eval.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.nn import functional as F 4 | from dataset import return_MVTecAD_loader 5 | from network import VAE,loss_function 6 | import matplotlib.pyplot as plt 7 | 8 | def eval(model,test_loader,device): 9 | model.eval() 10 | x_0 = iter(test_loader).next() 11 | with torch.no_grad(): 12 | x_vae = model(x_0.to(device)).detach().cpu().numpy() 13 | 14 | 15 | def EBM(model,test_loader,device): 16 | model.train() 17 | x_0 = iter(test_loader).next() 18 | alpha = 0.05 19 | lamda = 1 20 | x_0 = x_0.to(device).clone().detach().requires_grad_(True) 21 | recon_x = model(x_0).detach() 22 | loss = F.binary_cross_entropy(x_0, recon_x, reduction='sum') 23 | loss.backward(retain_graph=True) 24 | 25 | x_grad = x_0.grad.data 26 | 27 | x_t = x_0 - alpha * x_grad * (x_0 - recon_x) ** 2 28 | 29 | for i in range(15): 30 | recon_x = model(x_t).detach() 31 | loss = F.binary_cross_entropy(x_t, recon_x, reduction='sum') + lamda * torch.abs(x_t - x_0).sum() 32 | loss.backward(retain_graph=True) 33 | 34 | x_grad = x_0.grad.data 35 | #eps = 0.028 36 | x_grad = F.normalize(x_grad) 37 | eps = 0.4 38 | x_t = x_t - eps * x_grad * (x_t - recon_x) ** 2 39 | iterative_plot(x_t.detach().cpu().numpy(), i) 40 | 41 | 42 | # gif 43 | def iterative_plot(x_t, j): 44 | plt.figure(figsize=(15, 4)) 45 | for i in range(10): 46 | plt.subplot(1, 10, i+1) 47 | plt.xticks([]) 48 | plt.yticks([]) 49 | plt.imshow(x_t[i][0], cmap=plt.cm.gray) 50 | plt.subplots_adjust(wspace=0., hspace=0.) 51 | plt.savefig("./results/{}.png".format(j)) 52 | #plt.show() 53 | 54 | def main(): 55 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 56 | print(device) 57 | 58 | seed = 42 59 | out_dir = './logs' 60 | if not os.path.exists(out_dir): 61 | os.mkdir(out_dir) 62 | checkpoints_dir ="./checkpoints" 63 | if not os.path.exists(checkpoints_dir): 64 | os.mkdir(out_dir) 65 | 66 | torch.manual_seed(seed) 67 | if torch.cuda.is_available(): 68 | torch.cuda.manual_seed(seed) 69 | 70 | model = VAE(z_dim=512) 71 | model.load_state_dict(torch.load("./checkpoints/500.pth")) 72 | model=model.to(device) 73 | 74 | optimizer = torch.optim.Adam(model.parameters(), lr=5e-4) 75 | 76 | test_loader = return_MVTecAD_loader(image_dir="./mvtec_anomaly_detection/grid/test/metal_contamination/", batch_size=10, train=False) 77 | #eval(model=model,test_loader=test_loader,device=device) 78 | EBM(model,test_loader,device) 79 | 80 | if __name__ == "__main__": 81 | main() -------------------------------------------------------------------------------- /anomalyLocalization/code/eval.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=2 python eval.py -------------------------------------------------------------------------------- /anomalyLocalization/code/network.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.nn import functional as F 4 | class VAE(nn.Module): 5 | 6 | def __init__(self, z_dim=128): 7 | super(VAE, self).__init__() 8 | 9 | # encode 10 | self.conv_e = nn.Sequential( 11 | nn.Conv2d(1, 32, kernel_size=4, stride=2, padding=1), # 128 ⇒ 64 12 | nn.BatchNorm2d(32), 13 | nn.LeakyReLU(0.2), 14 | nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=1), # 64 ⇒ 32 15 | nn.BatchNorm2d(64), 16 | nn.LeakyReLU(0.2), 17 | nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1), # 32 ⇒ 16 18 | nn.BatchNorm2d(128), 19 | nn.LeakyReLU(0.2), 20 | ) 21 | self.fc_e = nn.Sequential( 22 | nn.Linear(128 * 16 * 16, 1024), 23 | nn.BatchNorm1d(1024), 24 | nn.LeakyReLU(0.2), 25 | nn.Linear(1024, z_dim*2), 26 | ) 27 | 28 | # decode 29 | self.fc_d = nn.Sequential( 30 | nn.Linear(z_dim, 1024), 31 | nn.BatchNorm1d(1024), 32 | nn.LeakyReLU(0.2), 33 | nn.Linear(1024, 128 * 16 * 16), 34 | nn.LeakyReLU(0.2) 35 | ) 36 | self.conv_d = nn.Sequential( 37 | nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1), 38 | nn.BatchNorm2d(64), 39 | nn.LeakyReLU(0.2), 40 | nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1), 41 | nn.BatchNorm2d(32), 42 | nn.LeakyReLU(0.2), 43 | nn.ConvTranspose2d(32, 1, kernel_size=4, stride=2, padding=1), 44 | nn.Sigmoid() 45 | ) 46 | 47 | self.z_dim = z_dim 48 | 49 | def encode(self, input): 50 | x = self.conv_e(input) 51 | x = x.view(-1, 128*16*16) 52 | x = self.fc_e(x) 53 | return x[:, :self.z_dim], x[:, self.z_dim:] 54 | 55 | def reparameterize(self, mu, logvar): 56 | if self.training: 57 | std = logvar.mul(0.5).exp_() 58 | eps = std.new(std.size()).normal_() 59 | return eps.mul(std).add_(mu) 60 | else: 61 | return mu 62 | 63 | def decode(self, z): 64 | h = self.fc_d(z) 65 | h = h.view(-1, 128, 16, 16) 66 | return self.conv_d(h) 67 | 68 | def forward(self, x): 69 | mu, logvar = self.encode(x) 70 | z = self.reparameterize(mu, logvar) 71 | self.mu = mu 72 | self.logvar = logvar 73 | return self.decode(z) 74 | 75 | 76 | 77 | def loss_function(recon_x, x, mu, logvar): 78 | recon = F.binary_cross_entropy(recon_x, x, reduction='sum') 79 | kld = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) 80 | return recon + kld 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /anomalyLocalization/code/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.nn import functional as F 4 | from dataset import return_MVTecAD_loader 5 | from network import VAE,loss_function 6 | import matplotlib.pyplot as plt 7 | 8 | def train(model,train_loader,device,optimizer,epoch): 9 | model.train() 10 | train_loss = 0 11 | for batch_idx, data in enumerate(train_loader): 12 | data = data.to(device) 13 | optimizer.zero_grad() 14 | recon_batch = model(data) 15 | loss = loss_function(recon_batch, data, model.mu, model.logvar) 16 | loss.backward() 17 | train_loss += loss.item() 18 | optimizer.step() 19 | train_loss /= len(train_loader.dataset) 20 | return train_loss 21 | 22 | 23 | def eval(model,test_loader,device): 24 | model.eval() 25 | x_0 = iter(test_loader).next() 26 | with torch.no_grad(): 27 | x_vae = model(x_0.to(device)).detach().cpu().numpy() 28 | 29 | 30 | def EBM(model,test_loader,device): 31 | model.train() 32 | x_0 = iter(test_loader).next() 33 | alpha = 0.05 34 | lamda = 1 35 | x_0 = x_0.to(device).clone().detach().requires_grad_(True) 36 | recon_x = model(x_0).detach() 37 | loss = F.binary_cross_entropy(x_0, recon_x, reduction='sum') 38 | loss.backward(retain_graph=True) 39 | 40 | x_grad = x_0.grad.data 41 | x_t = x_0 - alpha * x_grad * (x_0 - recon_x) ** 2 42 | 43 | for i in range(15): 44 | recon_x = model(x_t).detach() 45 | loss = F.binary_cross_entropy(x_t, recon_x, reduction='sum') + lamda * torch.abs(x_t - x_0).sum() 46 | loss.backward(retain_graph=True) 47 | 48 | x_grad = x_0.grad.data 49 | eps = 0.001 50 | x_t = x_t - eps * x_grad * (x_t - recon_x) ** 2 51 | iterative_plot(x_t.detach().cpu().numpy(), i) 52 | 53 | 54 | # gif 55 | def iterative_plot(x_t, j): 56 | plt.figure(figsize=(15, 4)) 57 | for i in range(10): 58 | plt.subplot(1, 10, i+1) 59 | plt.xticks([]) 60 | plt.yticks([]) 61 | plt.imshow(x_t[i][0], cmap=plt.cm.gray) 62 | plt.subplots_adjust(wspace=0., hspace=0.) 63 | plt.savefig("./results/{}.png".format(j)) 64 | #plt.show() 65 | 66 | def main(): 67 | train_loader = return_MVTecAD_loader(image_dir="./mvtec_anomaly_detection/grid/train/good/", batch_size=256, train=True) 68 | 69 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 70 | print(device) 71 | 72 | seed = 42 73 | out_dir = './logs' 74 | if not os.path.exists(out_dir): 75 | os.mkdir(out_dir) 76 | checkpoints_dir ="./checkpoints" 77 | if not os.path.exists(checkpoints_dir): 78 | os.mkdir(out_dir) 79 | 80 | torch.manual_seed(seed) 81 | if torch.cuda.is_available(): 82 | torch.cuda.manual_seed(seed) 83 | 84 | model = VAE(z_dim=512).to(device) 85 | 86 | optimizer = torch.optim.Adam(model.parameters(), lr=5e-4) 87 | num_epochs = 500 88 | for epoch in range(num_epochs): 89 | loss = train(model=model,train_loader=train_loader,device=device,optimizer=optimizer,epoch=epoch) 90 | print('epoch [{}/{}], train loss: {:.4f}'.format(epoch + 1,num_epochs,loss)) 91 | if (epoch+1) % 10 == 0: 92 | torch.save(model.state_dict(), os.path.join(checkpoints_dir,"{}.pth".format(epoch+1))) 93 | test_loader = return_MVTecAD_loader(image_dir="./mvtec_anomaly_detection/grid/test/metal_contamination/", batch_size=10, train=False) 94 | eval(model=model,test_loader=test_loader,device=device) 95 | EBM(model,test_loader,device) 96 | 97 | if __name__ == "__main__": 98 | main() -------------------------------------------------------------------------------- /anomalyLocalization/code/train.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=1 python train.py -------------------------------------------------------------------------------- /anomalyLocalization/imgs/001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/anomalyLocalization/imgs/001.png -------------------------------------------------------------------------------- /anomalyLocalization/imgs/002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/anomalyLocalization/imgs/002.png -------------------------------------------------------------------------------- /anomalyLocalization/imgs/003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/anomalyLocalization/imgs/003.png -------------------------------------------------------------------------------- /anomalyLocalization/imgs/face - 副本.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/anomalyLocalization/imgs/face - 副本.png -------------------------------------------------------------------------------- /dataset/imgs/001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/dataset/imgs/001.png -------------------------------------------------------------------------------- /dataset/readme.md: -------------------------------------------------------------------------------- 1 | ## MVTec AD--A Comprehensive Real-World Dataset for Unsupervised Anomaly Detection 2 | 3 | ## Presentation 4 | ![img](./imgs/001.png) 5 | 6 | ## Reference 7 | [1] Bergmann, Paul, et al. MVTec AD--A Comprehensive Real-World Dataset for Unsupervised Anomaly Detection. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2019. 8 | -------------------------------------------------------------------------------- /memae/imgs/memae.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/memae/imgs/memae.png -------------------------------------------------------------------------------- /memae/memoryzing_normality_to_detect_anomaly.py: -------------------------------------------------------------------------------- 1 | """ 2 | Memorizing Normality to Detect Anomaly: Memory-augmented Deep Autoencoder for Unsupervised Anomaly Detection 3 | https://arxiv.org/pdf/1904.02639.pdf 4 | 5 | #https://github.com/VieVie31/cool-papers-in-pytorch/blob/master/memoryzing_normality_to_detect_anomaly.py 6 | """ 7 | import numpy as np 8 | import matplotlib.pyplot as plt 9 | 10 | import torch 11 | import torch as T 12 | import torch.nn as nn 13 | 14 | from torch.nn.modules import * 15 | 16 | from tqdm import tqdm, trange 17 | from torchvision import datasets, transforms 18 | 19 | from sklearn.metrics import f1_score, accuracy_score 20 | 21 | 22 | T.set_default_tensor_type('torch.FloatTensor') 23 | 24 | batch_size = 32 25 | nb_epochs = 1 26 | nb_digits = 10 27 | 28 | 29 | train_normals = [ 30 | img for img, lbl in datasets.MNIST( 31 | './data', train=True, download=True, 32 | transform=transforms.Compose([ 33 | transforms.ToTensor(), 34 | #transforms.Normalize((0.1307,), (0.3081,)) 35 | ]) 36 | ) if lbl == 9 37 | ] 38 | train_normals = torch.utils.data.TensorDataset( 39 | torch.tensor([v.numpy() for v in train_normals]) 40 | ) 41 | train_normals_loader = T.utils.data.DataLoader( 42 | train_normals, 43 | batch_size=batch_size, 44 | shuffle=True 45 | ) 46 | 47 | 48 | train_loader = T.utils.data.DataLoader(datasets.MNIST( 49 | './data', train=True, download=True, 50 | transform=transforms.Compose([ 51 | transforms.ToTensor(), 52 | ])), 53 | batch_size=batch_size, shuffle=True 54 | ) 55 | 56 | test_loader = T.utils.data.DataLoader(datasets.MNIST( 57 | './data', train=False, download=True, 58 | transform=transforms.Compose([ 59 | transforms.ToTensor(), 60 | ])), 61 | batch_size=batch_size, shuffle=False 62 | ) 63 | 64 | class Encoder(nn.Module): 65 | def __init__(self): 66 | super(Encoder, self).__init__() 67 | self.cnn = nn.Sequential( 68 | nn.Conv2d(1, 16, 1, stride=1), 69 | nn.BatchNorm2d(16), 70 | nn.ReLU(), 71 | nn.Conv2d(16, 32, 3, stride=2), 72 | nn.BatchNorm2d(32), 73 | nn.ReLU(), 74 | nn.Conv2d(32, 64, 3, stride=2), 75 | nn.BatchNorm2d(64), 76 | nn.ReLU(), 77 | ) 78 | 79 | def forward(self, x): 80 | return self.cnn(x) 81 | 82 | 83 | class Decoder(nn.Module): 84 | def __init__(self): 85 | super(Decoder, self).__init__() 86 | self.cnn = nn.Sequential( 87 | nn.ConvTranspose2d(64, 32, 2, stride=2), 88 | nn.BatchNorm2d(32), 89 | nn.ReLU(), 90 | nn.ConvTranspose2d(32, 16, 2, stride=2), 91 | nn.BatchNorm2d(16), 92 | nn.ReLU(), 93 | nn.ConvTranspose2d(16, 1, 3, ), 94 | nn.Sigmoid() 95 | ) 96 | 97 | def forward(self, x): 98 | return self.cnn(x) #[B, 1, 26, 26] 99 | 100 | class Memory(nn.Module): 101 | def __init__(self, dimention, capacity=100, lbd=.02): 102 | super(Memory, self).__init__() 103 | self.cap = capacity 104 | self.dim = dimention 105 | self.lbd = lbd 106 | self.mem = T.rand((capacity, dimention), requires_grad=True) 107 | self.cos_sim = nn.CosineSimilarity() 108 | self.softmax = nn.Softmax(1) 109 | 110 | def forward(self, z): 111 | #z should be : [BATCH, dimention] 112 | z = z.unsqueeze(1) 113 | #compute w with attention 114 | w = self.softmax(self.cos_sim( 115 | z.permute(0, 2, 1), 116 | self.mem.expand(z.shape[0], self.cap, self.dim).permute(0, 2, 1) 117 | )) 118 | #hard-shrinking of w 119 | t = w - self.lbd 120 | w_hat = (T.max(t, T.zeros(w.shape)) * w) / (abs(t) + 1e-15) 121 | print("average number of 0ed adresses", ((w_hat == 0).sum(1)).float().mean()) 122 | w_hat = (w_hat + 1e-15) / (w_hat + 1e-15).sum(1).reshape(-1, 1) #adding epsilon because of infinity graidnt => nan 123 | #compute the w_hat enery by request 124 | adressing_enery = (-w_hat * T.log(w_hat + 1e-3)).sum(0) 125 | #get z_hat from memory with the computer soft adresseses w_hat 126 | z_hat = w_hat.mm(self.mem) 127 | return z_hat, adressing_enery 128 | 129 | # Build the proposed model 130 | class MemAE(nn.Module): 131 | def __init__(self, dimension=2304, capacity=100, lbd=.002): 132 | super(MemAE, self).__init__() 133 | self.encoder = Encoder() 134 | self.decoder = Decoder() 135 | self.memory = Memory(dimention=dimension, capacity=capacity, lbd=lbd) 136 | 137 | def forward(self, x): 138 | # Compute z and flatten it 139 | z = self.encoder(x) 140 | encoded_input_shape = z.shape 141 | z = z.reshape(z.shape[0], -1) 142 | # Get the new z_hat latent representation and the energy required for retriving it 143 | z_hat, adressing_enery = self.memory(z) 144 | # Decode the new latent representation 145 | out = self.decoder(z_hat.reshape(encoded_input_shape)) 146 | return out, adressing_enery 147 | 148 | def parameters(self): 149 | for p in self.encoder.parameters(): 150 | yield p 151 | for p in self.decoder.parameters(): 152 | yield p 153 | yield self.memory.mem 154 | return 155 | 156 | # Train a classic ConvAE for future comparison 157 | classic_AE = nn.Sequential(Encoder(), Decoder()) 158 | 159 | optimizer = torch.optim.Adam(classic_AE.parameters()) 160 | loss_function = nn.BCELoss() 161 | 162 | classic_AE.train() 163 | for (x,) in tqdm(train_normals_loader): 164 | y = x[:, :, 1:-1, 1:-1] 165 | optimizer.zero_grad() 166 | yhat = classic_AE(x.view([x.shape[0], 1, 28, 28])) 167 | loss = loss_function(yhat, y) 168 | loss.backward() 169 | optimizer.step() 170 | 171 | 172 | # Train the proposed anomaly detection autoencoder 173 | anomdec_memae = MemAE(lbd=.01) 174 | 175 | optimizer = torch.optim.Adam(anomdec_memae.parameters()) 176 | loss_function = nn.BCELoss() 177 | 178 | anomdec_memae.train() 179 | for i in range(2): 180 | for (x,) in tqdm(train_normals_loader): 181 | y = x[:, :, 1:-1, 1:-1] 182 | optimizer.zero_grad() 183 | yhat, energy = anomdec_memae(x.view([x.shape[0], 1, 28, 28])) 184 | loss = loss_function(yhat, y) + (.002 * energy).mean() 185 | loss.backward() 186 | optimizer.step() 187 | #slowly augment the sparse regulariation for addressing 188 | anomdec_memae.memory.lbd = min(anomdec_memae.memory.lbd + 1e-5, 0.01005) 189 | print(loss.item(), energy.mean().item()) 190 | 191 | # Try to classify 9 or not 9 after learning only on 9 on the test set after fining the optimal threshold a posteriori 192 | 193 | # Print the classical reconstruction error with normal AE (at 1.5 std) 194 | classic_recontruction = [] 195 | labels = [] 196 | for xx, yy in tqdm(test_loader): 197 | classic_recontruction.extend( 198 | ((classic_AE(xx) - xx[:, :, 1:-1, 1:-1]) ** 2).sum(1).sum(1).sum(1).detach().numpy() 199 | ) 200 | labels.extend(yy.numpy()) 201 | 202 | print( 203 | "classical mean training reconstruction error on normal : ", 204 | np.array(classic_recontruction)[np.array(labels) == 9].mean() 205 | ) 206 | print( 207 | "classical mean training reconstruction error on abnormal : ", 208 | np.array(classic_recontruction)[np.array(labels) != 9].mean() 209 | ) 210 | 211 | naive_th = np.array(classic_recontruction)[np.array(labels) == 9].mean() + 1.5 * np.array(classic_recontruction)[np.array(labels) == 9].std() 212 | 213 | print("classical AE f1 :", f1_score(np.array(labels) == 9, classic_recontruction < naive_th)) 214 | print("classical AE acc:", accuracy_score(np.array(labels) == 9, classic_recontruction < naive_th)) 215 | #classical AE f1 : 0.1899810019 216 | #classical AE acc: 0.1899 217 | 218 | # Compare with the new method 219 | memae_recontruction = [] 220 | labels = [] 221 | for xx, yy in tqdm(test_loader): 222 | memae_recontruction.extend( 223 | ((anomdec_memae(xx)[0] - xx[:, :, 1:-1, 1:-1]) ** 2).sum(1).sum(1).sum(1).detach().numpy() 224 | ) 225 | labels.extend(yy.numpy()) 226 | 227 | print( 228 | "anomdec_memae mean training reconstruction error on normal : ", 229 | np.array(memae_recontruction)[np.array(labels) == 9].mean() 230 | ) 231 | print( 232 | "anomdec_memae mean training reconstruction error on abnormal : ", 233 | np.array(memae_recontruction)[np.array(labels) != 9].mean() 234 | ) 235 | 236 | naive_th = np.array(memae_recontruction)[np.array(labels) == 9].mean() + 1.5 * np.array(memae_recontruction)[np.array(labels) == 9].std() 237 | 238 | print("memory AE f1 :", f1_score(np.array(labels) == 9, memae_recontruction < naive_th)) 239 | print("memory AE acc:", accuracy_score(np.array(labels) == 9, memae_recontruction < naive_th)) 240 | #memory AE f1 : 0.455628495016 241 | #memory AE acc: 0.7761 242 | -------------------------------------------------------------------------------- /memae/readme.md: -------------------------------------------------------------------------------- 1 | 2 | ## Memorizing Normality to Detect Anomaly: Memory-augmented Deep Autoencoder for Unsupervised Anomaly Detection. 3 | 4 | 5 | ## Architecture 6 | ![img](https://github.com/YeongHyeon/MemAE/blob/master/figures/memae.png) 7 | 8 | 9 | ## Reference 10 | [MemAE](https://github.com/YeongHyeon/MemAE) 11 | 12 | [1] Dong Gong et al. (2019). Memorizing Normality to Detect Anomaly: Memory-augmented Deep Autoencoder for Unsupervised Anomaly Detection. arXiv preprint arXiv:1904.02639. 13 | -------------------------------------------------------------------------------- /projects.md: -------------------------------------------------------------------------------- 1 | [pyod](https://github.com/yzhao062/pyod) 2 | 3 | [SUOD](https://github.com/yzhao062/SUOD) 4 | 5 | [anomaly-detection-resources](https://github.com/yzhao062/anomaly-detection-resources) 6 | 7 | -------------------------------------------------------------------------------- /records/README.md: -------------------------------------------------------------------------------- 1 | # awesome anomaly detection 2 | A curated list of awesome anomaly detection resources. Inspired by [`awesome-architecture-search`](https://github.com/sdukshis/awesome-ml) and [`awesome-automl`](https://github.com/hibayesian/awesome-automl-papers). 3 | 4 | *Last updated: 2020/02/27* 5 | 6 | ## What is anomaly detection? 7 | 8 | ![img](./imgs/anomaly_detection_example1.PNG) 9 | 10 | 11 | Anomaly detection is a technique used to identify unusual patterns that do not conform to expected behavior, called outliers. Typically, this is treated as an unsupervised learning problem where the anomalous samples are not known a priori and it is assumed that the majority of the training dataset consists of “normal” data (here and elsewhere the term “normal” means *not anomalous* and is unrelated to the Gaussian distribution). [Lukas Ruff et al., 2018; Deep One-Class Classification] 12 | 13 | In general, Anomaly detection is also called `Novelty Detection` or `Outlier Detection`, `Forgery Detection` and `Out-of-distribution Detection`. 14 | 15 | Each term has slightly different meanings. Mostly, on the assumption that you do not have unusual data, this problem is especially called `One Class Classification`, `One Class Segmentation`. 16 | 17 | ![img](./imgs/anomaly_detection_types.png) 18 | 19 | 20 | and `Novelty Detection` and `Outlier Detection` have slightly different meanings. Figure below shows the differences of two terms. 21 | 22 | Also, there are two types of target data. (`time-series data`, and `image data`) 23 | In time-series data, it is aimed to detect a abnormal sections or frames in input data. (ex, videos, signal, etc) 24 | In image data, it is aimed to classify abnormal images or to segment abnormal regions, for example, defect in some manufacturing data. 25 | 26 | ## Survey Paper 27 | - Deep Learning for Anomaly Detection: A Survey | Raghavendra Chalapathy, Sanjay Chawla | **[arXiv' 19]** |[`[pdf]`](https://arxiv.org/pdf/1901.03407.pdf) 28 | 29 | 30 | ## Table of Contents 31 | 32 | (#time-series-anomaly-detection) 33 | - [Image-level anomaly detection](#image-level-anomaly-detection) 34 | - [Anomaly Classification target](#anomaly-classification-target) 35 | - [Out-Of-Distribution(OOD) Detction target](#out-of-distributionood-detction-target) 36 | - [Anomaly Segmentation target](#anomaly-segmentation-target) 37 | 38 | 39 | ## Image-level anomaly detection 40 | 41 | ### One Class (Anomaly) Classification target 42 | - Estimating the Support of a High- Dimensional Distribution [**OC-SVM**] | **[Journal of Neural Computation' 01]** | [`[pdf]`](http://users.cecs.anu.edu.au/~williams/papers/P132.pdf) 43 | - A Survey of Recent Trends in One Class Classification | **[AICS' 09]** | [`[pdf]`](https://aran.library.nuigalway.ie/xmlui/bitstream/handle/10379/1472/camera_ready_occ_lnai.pdf?sequence=1) 44 | - Anomaly detection using autoencoders with nonlinear dimensionality reduction | **[MLSDA Workshop' 14]** | [`[link]`](https://dl.acm.org/citation.cfm?id=2689747) 45 | - A review of novelty detection | **[Signal Processing' 14]** | [`[link]`](https://www.sciencedirect.com/science/article/pii/S016516841300515X) 46 | - Variational Autoencoder based Anomaly Detection using Reconstruction Probability | **[SNU DMC Tech' 15]** | [`[pdf]`](http://dm.snu.ac.kr/static/docs/TR/SNUDM-TR-2015-03.pdf) 47 | - High-dimensional and large-scale anomaly detection using a linear one-class SVM with deep learning | **[Pattern Recognition' 16]** | [`[link]`](https://dl.acm.org/citation.cfm?id=2952200) 48 | - Transfer Representation-Learning for Anomaly Detection | **[ICML' 16]** | [`[pdf]`](https://pdfs.semanticscholar.org/c533/52a4239568cc915ad968aff51c49924a3072.pdf) 49 | - Outlier Detection with Autoencoder Ensembles | **[SDM' 17]** | [`[pdf]`](http://saketsathe.net/downloads/autoencode.pdf) 50 | - Provable self-representation based outlier detection in a union of subspaces | **[CVPR' 17]** | [`[pdf]`](https://arxiv.org/pdf/1704.03925.pdf) 51 | - [**ALOCC**]Adversarially Learned One-Class Classifier for Novelty Detection | **[CVPR' 18]** | [`[pdf]`](https://arxiv.org/pdf/1802.09088.pdf) [`[code]`](https://github.com/khalooei/ALOCC-CVPR2018) 52 | - Learning Deep Features for One-Class Classification | **[arXiv' 18]** | [`[pdf]`](https://arxiv.org/pdf/1801.05365.pdf) [`[code]`](https://github.com/PramuPerera/DeepOneClass) 53 | - Efficient GAN-Based Anomaly Detection | **[arXiv' 18]** | [`[pdf]`](https://arxiv.org/pdf/1802.06222.pdf) 54 | - Hierarchical Novelty Detection for Visual Object Recognition | **[CVPR' 18]** | [`[pdf]`](https://arxiv.org/pdf/1804.00722.pdf) 55 | - Deep One-Class Classification | **[ICML' 18]** | [`[pdf]`](http://data.bit.uni-bonn.de/publications/ICML2018.pdf) 56 | - Reliably Decoding Autoencoders’ Latent Spaces for One-Class Learning Image Inspection Scenarios | **[OAGM Workshop' 18]** | [`[pdf]`](https://workshops.aapr.at/wp-content/uploads/Proceedings/2018/OAGM_2018_paper_19.pdf) 57 | - q-Space Novelty Detection with Variational Autoencoders | **[arXiv' 18]** | [`[pdf]`](https://arxiv.org/pdf/1806.02997.pdf) 58 | - GANomaly: Semi-Supervised Anomaly Detection via Adversarial Training | **[ACCV' 18]** | [`[pdf]`](https://arxiv.org/pdf/1805.06725.pdf) 59 | - Deep Anomaly Detection Using Geometric Transformations | **[NIPS' 18]** | [`[pdf]`](http://papers.nips.cc/paper/8183-deep-anomaly-detection-using-geometric-transformations.pdf) 60 | - Generative Probabilistic Novelty Detection with Adversarial Autoencoders | **[NIPS' 18]** | [`[pdf]`](http://papers.nips.cc/paper/7915-generative-probabilistic-novelty-detection-with-adversarial-autoencoders.pdf) 61 | - A loss framework for calibrated anomaly detection | **[NIPS' 18]** | [`[pdf]`](http://papers.nips.cc/paper/7422-a-loss-framework-for-calibrated-anomaly-detection.pdf) 62 | - A Practical Algorithm for Distributed Clustering and Outlier Detection | **[NIPS' 18]** | [`[pdf]`](http://papers.nips.cc/paper/7493-a-practical-algorithm-for-distributed-clustering-and-outlier-detection.pdf) 63 | - Efficient Anomaly Detection via Matrix Sketching | **[NIPS' 18]** | [`[pdf]`](http://papers.nips.cc/paper/8030-efficient-anomaly-detection-via-matrix-sketching.pdf) 64 | - Adversarially Learned Anomaly Detection | **[IEEE ICDM' 18]** | [`[pdf]`](https://arxiv.org/pdf/1812.02288.pdf) 65 | - Anomaly Detection With Multiple-Hypotheses Predictions | **[ICML' 19]** | [`[pdf]`](https://arxiv.org/pdf/1810.13292v5.pdf) 66 | - Exploring Deep Anomaly Detection Methods Based on Capsule Net | **[ICMLW' 19]** | [`[pdf]`](https://arxiv.org/pdf/1907.06312v1.pdf) 67 | - Latent Space Autoregression for Novelty Detection | **[CVPR' 19]** | [`[pdf]`](https://arxiv.org/pdf/1807.01653.pdf) 68 | - OCGAN: One-Class Novelty Detection Using GANs With Constrained Latent Representations | **[CVPR' 19]** | [`[pdf]`](https://arxiv.org/pdf/1903.08550.pdf) 69 | - Unsupervised Learning of Anomaly Detection from Contaminated Image Data using Simultaneous Encoder Training | **[arXiv' 19]** | [`[pdf]`](https://arxiv.org/pdf/1905.11034.pdf) 70 | - Using Self-Supervised Learning Can Improve Model Robustness and Uncertainty | **[NeurIPS' 19]** | [`[pdf]`](https://arxiv.org/abs/1906.12340) [`[code]`](https://github.com/hendrycks/ss-ood) 71 | - Classification-Based Anomaly Detection for General Data | **[ICLR' 20]** | [`[pdf]`](https://openreview.net/pdf?id=H1lK_lBtvS) 72 | - Robust Subspace Recovery Layer for Unsupervised Anomaly Detection | **[ICLR' 20]** | [`[pdf]`](https://openreview.net/pdf?id=rylb3eBtwr) 73 | - RaPP: Novelty Detection with Reconstruction along Projection Pathway | **[ICLR' 20]** | [`[pdf]`](https://openreview.net/pdf?id=HkgeGeBYDB) 74 | - Novelty Detection Via Blurring | **[ICLR' 20]** | [`[pdf]`](https://openreview.net/pdf?id=ByeNra4FDB) 75 | - Deep Semi-Supervised Anomaly Detection | **[ICLR' 20]** | [`[pdf]`](https://openreview.net/pdf?id=HkgH0TEYwH) 76 | - Robust anomaly detection and backdoor attack detection via differential privacy | **[ICLR' 20]** | [`[pdf]`](https://openreview.net/pdf?id=SJx0q1rtvS) 77 | 78 | 79 | ### Out-of-Distribution(OOD) Detction target 80 | - A Baseline for Detecting Misclassified and Out-of-Distribution Examples in Neural Networks | **[ICLR' 17]** | [`[pdf]`](https://arxiv.org/pdf/1610.02136.pdf) 81 | - [**ODIN**] Enhancing The Reliability of Out-of-distribution Image Detection in Neural Networks | **[ICLR' 18]** | [`[pdf]`](https://arxiv.org/pdf/1706.02690.pdf) 82 | - Training Confidence-calibrated Classifiers for Detecting Out-of-Distribution Samples | **[ICLR' 18]** | [`[pdf]`](https://arxiv.org/pdf/1711.09325.pdf) 83 | - Learning Confidence for Out-of-Distribution Detection in Neural Networks | **[arXiv' 18]** | [`[pdf]`](https://arxiv.org/pdf/1802.04865.pdf) 84 | - Out-of-Distribution Detection using Multiple Semantic Label Representations | **[NIPS' 18]** | [`[pdf]`](http://papers.nips.cc/paper/7967-out-of-distribution-detection-using-multiple-semantic-label-representations.pdf) 85 | - A Simple Unified Framework for Detecting Out-of-Distribution Samples and Adversarial Attacks | **[NIPS' 18]** | [`[pdf]`](http://papers.nips.cc/paper/7947-a-simple-unified-framework-for-detecting-out-of-distribution-samples-and-adversarial-attacks.pdf) 86 | - Deep Anomaly Detection with Outlier Exposure | **[ICLR' 19]** | [`[pdf]`](https://openreview.net/pdf?id=HyxCxhRcY7) 87 | - Why ReLU networks yield high-confidence predictions far away from the training data and how to mitigate the problem | **[CVPR' 19]** | [`[pdf]`](https://arxiv.org/pdf/1812.05720.pdf) 88 | - Outlier Exposure with Confidence Control for Out-of-Distribution Detection | **[arXiv' 19]** | [`[pdf]`](https://arxiv.org/abs/1906.03509v2) [`[code]`](https://github.com/nazim1021/OOD-detection-using-OECC) 89 | - Likelihood Ratios for Out-of-Distribution Detection | **[NeurIPS' 19]** | [`[pdf]`](https://arxiv.org/pdf/1906.02845.pdf) 90 | - Input Complexity and Out-of-distribution Detection with Likelihood-based Generative Models | **[ICLR' 20]** | [`[pdf]`](https://openreview.net/pdf?id=SyxIWpVYvr) 91 | 92 | 93 | ### One Class (Anomaly) Segmentation target 94 | - Anomaly Detection and Localization in Crowded Scenes | **[TPAMI' 14]** | [`[pdf]`](http://www.svcl.ucsd.edu/publications/journal/2013/pami.anomaly/pami_anomaly.pdf) 95 | - Novelty detection in images by sparse representations | **[IEEE Symposium on IES' 14]** | [`[link]`](https://ieeexplore.ieee.org/document/7008985/) 96 | - Detecting anomalous structures by convolutional sparse models | **[IJCNN' 15]** | [`[pdf]`](http://www.cs.tut.fi/~foi/papers/IJCNN2015-Carrera-Detecting_Anomalous_Structures.pdf) 97 | - Real-Time Anomaly Detection and Localization in Crowded Scenes | **[CVPR Workshop' 15]** | [`[pdf]`](https://arxiv.org/pdf/1511.06936.pdf) 98 | - Learning Deep Representations of Appearance and Motion for Anomalous Event Detection | **[BMVC' 15]** | [`[pdf]`](https://arxiv.org/pdf/1510.01553.pdf) 99 | - Scale-invariant anomaly detection with multiscale group-sparse models | **[IEEE ICIP' 16]** | [`[link]`](https://ieeexplore.ieee.org/document/7533089/) 100 | - [**AnoGAN**] Unsupervised Anomaly Detection with Generative Adversarial Networks to Guide Marker Discovery | **[IPMI' 17]** | [`[pdf]`](https://arxiv.org/pdf/1703.05921.pdf) 101 | - Deep-Anomaly: Fully Convolutional Neural Network for Fast Anomaly Detection in Crowded Scenes | **[Journal of Computer Vision and Image Understanding' 17]** | [`[pdf]`](https://arxiv.org/pdf/1609.00866.pdf) 102 | - Anomaly Detection using a Convolutional Winner-Take-All Autoencoder | **[BMVC' 17]** | [`[pdf]`](http://eprints.whiterose.ac.uk/121891/1/BMVC2017.pdf) 103 | - Anomaly Detection in Nanofibrous Materials by CNN-Based Self-Similarity | **[Sensors' 17]** | [`[pdf]`](http://www.mdpi.com/1424-8220/18/1/209/pdf) 104 | - Defect Detection in SEM Images of Nanofibrous Materials | **[IEEE Trans. on Industrial Informatics' 17]** | [`[pdf]`](http://home.deib.polimi.it/boracchi/docs/2017_Anomaly_Detection_SEM.pdf) 105 | - Abnormal event detection in videos using generative adversarial nets | **[ICIP' 17]** | [`[link]`](https://ieeexplore.ieee.org/document/8296547/) 106 | - An overview of deep learning based methods for unsupervised and semi-supervised anomaly detection in videos | **[arXiv' 18]** | [`[pdf]`](https://arxiv.org/pdf/1801.03149.pdf) 107 | - Improving Unsupervised Defect Segmentation by Applying Structural Similarity to Autoencoders | **[arXiv' 18]** | [`[pdf]`](https://arxiv.org/pdf/1807.02011.pdf) 108 | - Satellite Image Forgery Detection and Localization Using GAN and One-Class Classifier | **[IS&T EI' 18]** | [`[pdf]`](https://arxiv.org/pdf/1802.04881.pdf) 109 | - Deep Autoencoding Models for Unsupervised Anomaly Segmentation in Brain MR Images | **[arXiv' 18]** | [`[pdf]`](https://arxiv.org/pdf/1804.04488.pdf) 110 | - AVID: Adversarial Visual Irregularity Detection | **[arXiv' 18]** |[`[pdf]`](https://arxiv.org/pdf/1805.09521.pdf) 111 | - MVTec AD -- A Comprehensive Real-World Dataset for Unsupervised Anomaly Detection | **[CVPR' 19]** | [`[pdf]`](https://www.mvtec.com/fileadmin/Redaktion/mvtec.com/company/research/mvtec_ad.pdf) 112 | - Exploiting Epistemic Uncertainty of Anatomy Segmentation for Anomaly Detection in Retinal OCT | **[IEEE TMI' 19]** | [`[pdf]`](https://arxiv.org/pdf/1905.12806v1.pdf) 113 | - Uninformed Students: Student-Teacher Anomaly Detection with Discriminative Latent Embeddings | **[arXiv' 19]** | [`[pdf]`](https://arxiv.org/pdf/1911.02357.pdf) 114 | - Attention Guided Anomaly Detection and Localization in Images | **[arXiv' 19]** | [`[pdf]`](https://arxiv.org/pdf/1911.08616v1.pdf) 115 | 116 | -------------------------------------------------------------------------------- /records/difficulty.md: -------------------------------------------------------------------------------- 1 | 异常检测在图像领域困难点 2 | 3 | 1、维度灾难:图像维度高,传统机器学习领域方法无法有效应对维度灾难问题。 4 | 5 | 2、特征表征:图像特征包含较高得语义信息,在无监督信息下无法有效得提取,同时还得保持特征空间一致性。 6 | 7 | 3、理论困乏:目前针对图像领域,缺乏有效手段界定Anomaly Score。 8 | 9 | 10 | 思考点: 11 | 1、目前纯无监督学习,无法做到异常检测。参见[Anomaly Detection in Images](http://arxiv.org/pdf/1905.13147v1.pdf) 12 | 13 | 14 | 数据集: 15 | ![image](imgs/MVTec.png) 16 | 参见[MVTec AD--A Comprehensive Real-World Dataset for Unsupervised Anomaly Detection](http://openaccess.thecvf.com/content_CVPR_2019/papers/Bergmann_MVTec_AD_--_A_Comprehensive_Real-World_Dataset_for_Unsupervised_Anomaly_CVPR_2019_paper.pdf) 17 | 18 | 有效解决方案: 19 | 20 | 1、基于 student–teacher learning with Discriminative Latent Embeddings方式。参见[Uninformed Students: Student-Teacher Anomaly Detection with Discriminative Latent Embeddings](http://arxiv.org/pdf/1911.02357v1.pdf) 21 | ![image](imgs/image001.png) 22 | ![image](imgs/image002.png) 23 | 24 | 采用了度量学习方式,同时基于student–teacher网络在特征空间上对每个feature元素做密集回归的方式,学习异常分布。 25 | 26 | 2、基于迭代的能量优化模型 参见[Iterative energy-based projection on a normal data manifold for anomaly localization](https://openreview.net/pdf?id=HJx81ySKwr) 27 | ![image](imgs/image003.png) 28 | 29 | ![image](imgs/image004.png) 30 | ![image](imgs/image005.jpg) 31 | ![image](imgs/image006.jpg) 32 | 33 | ![image](imgs/image007.png) 34 | 35 | 以梯度迭代优化的思想,构建能量优化函数,循环迭代,恢复正常流形空间。借鉴了图像修复的思想。 36 | 37 | 3、基于Memory-augmented思想 构建Autoencoder。使得恢复图像是由正常样本embedding组合而成,避免了恢复出异常图像的可能性。参见[Memorizing Normality to Detect Anomaly: Memory-augmented Deep Autoencoder for Unsupervised Anomaly Detection](http://arxiv.org/pdf/1904.02639v1.pdf) 38 | ![image](imgs/image008.png) 39 | 40 | 文中提到Autoencoder以及VAE 并不能有效的将异常图像恢复出正常图像,用存储器模块来增强自动编码器,并开发一种称为存储器增强自动编码器的改进的自动编码器,即MemAEMemAE首先从编码器获取编码,然后将其用作查询以检索用于重建的最相关的存储器项。在训练阶段,更新存储器内容并鼓励它们表示正常数据的原型元素。在测试阶段,学习的存储器将被固定,并且从正常数据的一些选定的存储器记录中获得重建。因此,重建将倾向于接近正常样本。 41 | 42 | 相似的工作 参见[Memory Augmented Generative Adversarial Networks for Anomaly Detection](http://arxiv.org/pdf/2002.02669v1.pdf) 43 | 44 | [History-based Anomaly Detector: an Adversarial Approach to Anomaly Detection](http://arxiv.org/pdf/1912.11843v1.pdf) 45 | 46 | 47 | 4、尝试基于MMD学习异常scores,比较硬核。参见[Anomaly scores for generative models](http://arxiv.org/pdf/1905.11890v1.pdf) 48 | 49 | 5、尝试基于图像视觉思想解决问题,可以从ChangeDetection+self supervised learning+transfer learning考虑。 50 | 51 | ![image](imgs/7.jpg) 52 | ![image](imgs/BP202190822100682_3_3.jpg) 53 | 54 | 6、基于图像修复方法进行 55 | 56 | ![image](imgs/image009.jpeg) 57 | 58 | * 提前了解正常图像的异常检测。 59 | * 另外,用普通图像训练部分卷积。 60 | * 遮罩原始图像(蒙版图像)。 61 | * 将其输入到部分卷积中,以获得与蒙版部分互补的图像(预测图像)。 62 | * 对所获取的图像执行异常检测以获得异常得分(Anomaly Score)。 63 | * 最后,将获得的异常分数代入热图(热图)的掩盖部分。 64 | * 移动遮罩并重复该过程。 65 | 66 | ![image](imgs/image010.png) 67 | ![image](imgs/image011.png) 68 | -------------------------------------------------------------------------------- /records/imgs/7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/7.jpg -------------------------------------------------------------------------------- /records/imgs/BP202190822100682_3_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/BP202190822100682_3_3.jpg -------------------------------------------------------------------------------- /records/imgs/MVTec.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/MVTec.png -------------------------------------------------------------------------------- /records/imgs/anomaly_detection_example1.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/anomaly_detection_example1.PNG -------------------------------------------------------------------------------- /records/imgs/anomaly_detection_types.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/anomaly_detection_types.png -------------------------------------------------------------------------------- /records/imgs/image001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/image001.png -------------------------------------------------------------------------------- /records/imgs/image002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/image002.png -------------------------------------------------------------------------------- /records/imgs/image003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/image003.png -------------------------------------------------------------------------------- /records/imgs/image004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/image004.png -------------------------------------------------------------------------------- /records/imgs/image005.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/image005.jpg -------------------------------------------------------------------------------- /records/imgs/image006.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/image006.jpg -------------------------------------------------------------------------------- /records/imgs/image007.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/image007.png -------------------------------------------------------------------------------- /records/imgs/image008.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/image008.png -------------------------------------------------------------------------------- /records/imgs/image009.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/image009.jpeg -------------------------------------------------------------------------------- /records/imgs/image010.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/image010.png -------------------------------------------------------------------------------- /records/imgs/image011.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/image011.png -------------------------------------------------------------------------------- /resources.md: -------------------------------------------------------------------------------- 1 | # Awesome Anomaly Detection 2 | A list of Papers on anomaly detection. 3 | You are welcome to open an issue and pull your requests if you think any paper that is important but not are inclueded in this repo. 4 | The papers are orgnized in classical method, deep learning method, application and survey. 5 | 6 | 7 | ## Classical Method 8 | - [Isolation Forest](https://cs.nju.edu.cn/zhouzh/zhouzh.files/publication/icdm08b.pdf) - ICDM 2008. 9 | 10 | - [LOF: Identifying Density-Based Local Outliers](http://www.dbs.ifi.lmu.de/Publikationen/Papers/LOF.pdf) - SIGMOD 2000. 11 | 12 | - [Extended Isolation Forest](http://matias-ck.com/files/papers/Extended_Isolation_Forest.pdf) 13 | 14 | - [Support Vector Method for Novelty Detection](https://papers.nips.cc/paper/1723-support-vector-method-for-novelty-detection.pdf) - NIPS 2000 15 | 16 | ### One-Class Classification 17 | 18 | - [One-Class SVMs for Document Classification](http://www.jmlr.org/papers/volume2/manevitz01a/manevitz01a.pdf) - JMLR 2001. 19 | 20 | - [Support Vector Data Description](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.100.1425&rep=rep1&type=pdf) 21 | 22 | - [Can I Trust My One-Class Classification?](http://www.ipb.uni-bonn.de/pdfs/Mack2014Can.pdf) 23 | 24 | - [Efficient Anomaly Detection via Matrix Sketching](https://arxiv.org/pdf/1804.03065.pdf) - NIPS 2018 25 | 26 | ### PCA-based 27 | 28 | - [robust deep and inductive anomaly detection](https://arxiv.org/abs/1704.06743) - ECML PKDD 2017 29 | 30 | - [A loss framework for calibrated anomaly detection](https://papers.nips.cc/paper/7422-a-loss-framework-for-calibrated-anomaly-detection.pdf) - NIPS 2018 31 | 32 | 33 | ### Clustering 34 | 35 | - [A Practical Algorithm for Distributed Clustering and Outlier Detection](https://arxiv.org/pdf/1805.09495.pdf) - NIPS 2018 36 | 37 | ### Correlation 38 | 39 | - [Detecting Multiple Periods and Periodic Patterns in Event Time Sequences](http://chaozhang.org/papers/cikm17a.pdf) - CIKM 2017. 40 | 41 | ### Ranking 42 | 43 | - [ranking causal anomalies via temporal and dynamical analysis on vanishing correlations](https://www.kdd.org/kdd2016/papers/files/rfp0445-chengAemb.pdf) - KDD 2016. 44 | 45 | ## Deep Learning Method 46 | 47 | ### Generative Methods 48 | - [Variational Autoencoder based Anomaly Detection using Reconstruction Probability](http://dm.snu.ac.kr/static/docs/TR/SNUDM-TR-2015-03.pdf) 49 | 50 | #### Auto-encoder 51 | 52 | - [Learning sparse representation with variational auto-encoder for anomaly detection](https://ieeexplore.ieee.org/document/8386760/) 53 | 54 | - [Anomaly Detection with Robust Deep Autoencoders](http://dl.acm.org/authorize?N33358) - KDD 2017. 55 | 56 | - [DEEP AUTOENCODING GAUSSIAN MIXTURE MODEL FOR UNSUPERVISED ANOMALY DETECTION](https://www.cs.ucsb.edu/~bzong/doc/iclr18-dagmm.pdf) - ICLR 2018. 57 | 58 | - [Generative Probabilistic Novelty Detection with Adversarial Autoencoders](https://papers.nips.cc/paper/7915-generative-probabilistic-novelty-detection-with-adversarial-autoencoders.pdf) - NIPS 2018 59 | #### Variational Auto-encoder 60 | 61 | - [Multidimensional Time Series Anomaly Detection: A GRU-based Gaussian Mixture Variational Autoencoder Approach](http://proceedings.mlr.press/v95/guo18a/guo18a.pdf) - ACML 2018 62 | 63 | - [A Multimodel Anomaly Detector for Robot-Assisted Feeding Using an LSTM-based Variational Autoencoder](https://arxiv.org/pdf/1711.00614.pdf) - IEEE Robotics and Automation Letters 2018. 64 | 65 | #### GAN based 66 | 67 | - [Unsupervised Anomaly Detection with Generative Adversarial Networks to Guide Marker Discovery](https://arxiv.org/pdf/1703.05921.pdf) - IPMI 2017. 68 | 69 | - [Efficient-GAN-Based Anomaly Detection](https://github.com/houssamzenati/Efficient-GAN-Anomaly-Detection) ICLR Workshop 2018. 70 | 71 | - [Anomaly detection with generative adversarial networks](https://openreview.net/pdf?id=S1EfylZ0Z) - Reject by ICLR 2018, but was used as baseline method in recent published NIPS paper. 72 | 73 | ### Hypersphereical Learning 74 | 75 | - [Anomaly Detection in Dynamic Networks using Multi-view Time-Series Hypersphere Learning](https://dl.acm.org/citation.cfm?id=3132964) - CIKM 2017. 76 | 77 | - [Deep into Hypersphere: Robust and Unsupervised Anomaly Discovery in Dynamic Networks](https://www.ijcai.org/proceedings/2018/0378.pdf) - IJCAI 2018. 78 | 79 | ### One-Class Classification 80 | 81 | - [High-dimensional and large-scale anomaly detection using a linear one-class SVM with deep learning](https://www.sciencedirect.com/science/article/abs/pii/S0031320316300267) - Pattern Recognition 2018. 82 | 83 | - [Optimal single-class classification strategies](https://papers.nips.cc/paper/2987-optimal-single-class-classification-strategies.pdf) - NIPS 2007 84 | 85 | - [Deep One-Class Classification](http://proceedings.mlr.press/v80/ruff18a/ruff18a.pdf) - ICML 2018. 86 | 87 | ### Energy-based 88 | 89 | - [Deep structured energy based models for anomaly detection](https://arxiv.org/pdf/1605.07717.pdf) - ICML 2016 90 | 91 | ### Time series 92 | 93 | - [A Generalized Student-t Based Approach to Mixed-Type Anomaly Detection](http://www.nvc.cs.vt.edu/~ctlu/Publication/2013/AAAI-Lu-2013.pdf) - AAAI 2013 94 | 95 | - [Stochastic Online Anomaly Analysis for Streaming Time Series](https://www.ijcai.org/proceedings/2017/0445.pdf) - IJCAI 2017 96 | 97 | - [Long short term memory networks for anmomaly detection in time series](https://www.elen.ucl.ac.be/Proceedings/esann/esannpdf/es2015-56.pdf) 98 | 99 | - [LSTM-based Encoder-Decoder for Multi-sensor Anomaly Detection](https://arxiv.org/pdf/1607.00148.pdf) - ICML 2016 Workshop. 100 | 101 | ### Interpretation 102 | 103 | - [Contextual Outlier Interpretation](https://www.ijcai.org/proceedings/2018/0341.pdf) -IJCAI 2018 104 | 105 | ### Evaulation Metrics 106 | 107 | - [Precision and Recall for Time Series](http://papers.nips.cc/paper/7462-precision-and-recall-for-time-series.pdf) - NIPS 2018 108 | 109 | ### Geometric transformation 110 | 111 | - [Deep Anomaly Detection Using Geometric Transformations](https://arxiv.org/pdf/1805.10917.pdf) - NIPS 2018 112 | 113 | 114 | ### FeedBack 115 | - [Incorporating Feedback into Tree-based Anomaly Detection](https://github.com/ai/size-limit) - KDD 2017 Workshop on Interactive Data Exploration and Analytics. 116 | 117 | - [Feedback-Guided Anomaly Discovery via Online Optimization](http://web.engr.oregonstate.edu/~afern/papers/kdd18-siddiqui.pdf) - KDD 2018. 118 | 119 | ## Anomaly Detection Applications 120 | 121 | ### KPI 122 | - [Unsupervised Anomaly Detection via Variational Auto-Encoder for Seasonal KPIs in Web Applications](https://arxiv.org/pdf/1802.03903) - WWW 2018. 123 | ### Log 124 | 125 | - [DeepLog: Anomaly Detection and Diagnosis from System Logs through Deep Learning](https://acmccs.github.io/papers/p1285-duA.pdf) - CCS 2017. 126 | 127 | - [Mining Invariants from Logs for System Problem Detection](https://www.usenix.org/legacy/event/atc10/tech/slides/lou.pdf) - USENIX 2010 128 | 129 | 130 | ## Survey 131 | 132 | - [Anomaly detection in dynamic networks: a survey](https://onlinelibrary.wiley.com/doi/pdf/10.1002/wics.1347) 133 | 134 | - [Anomaly Detection : A Survey](http://cucis.ece.northwestern.edu/projects/DMS/publications/AnomalyDetection.pdf) 135 | 136 | - [A Survey of Recent Trends in One Class Classification](https://link.springer.com/chapter/10.1007/978-3-642-17080-5_21) 137 | 138 | - [A survey on unsupervised outlier detection in high‐dimensional numerical data](https://onlinelibrary.wiley.com/doi/abs/10.1002/sam.11161) 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | --------------------------------------------------------------------------------