├── .gitignore
├── .vscode
    └── settings.json
├── DeepOneClass
    ├── code.py
    ├── imgs
    │   └── 001.jpg
    ├── readme.md
    └── total.py
├── LICENSE
├── Mahalanobis
    ├── Mahalanobis.py
    ├── Pics
    │   ├── Mahdist_verify_result.jpg
    │   ├── mahal_dist.jpg
    │   ├── 变体参数含义.jpg
    │   └── 马氏距离变体.jpg
    ├── README.md
    ├── data
    │   ├── forest_cover
    │   │   └── README.md
    │   ├── kdd_http
    │   │   └── README.md
    │   ├── kdd_smtp
    │   │   └── README.md
    │   └── shuttle
    │   │   └── README.md
    ├── mahal_dist.py
    ├── mahal_dist_variant.py
    ├── main.py
    ├── modules
    │   ├── autoencoder.py
    │   └── mahalanobis.py
    ├── requirements.txt
    ├── run.sh
    ├── utils
    │   ├── dataloading.py
    │   ├── experiment.py
    │   └── tracking.py
    └── verify_mahal_equivalence.py
├── README.md
├── adVAE
    ├── imgs
    │   └── advae.png
    └── readme.md
├── anomalyLocalization
    ├── README.md
    ├── code
    │   ├── dataset.py
    │   ├── eval.py
    │   ├── eval.sh
    │   ├── network.py
    │   ├── train.py
    │   └── train.sh
    └── imgs
    │   ├── 001.png
    │   ├── 002.png
    │   ├── 003.png
    │   └── face - 副本.png
├── dataset
    ├── imgs
    │   └── 001.png
    └── readme.md
├── memae
    ├── imgs
    │   └── memae.png
    ├── memoryzing_normality_to_detect_anomaly.py
    └── readme.md
├── projects.md
├── records
    ├── README.md
    ├── difficulty.md
    └── imgs
    │   ├── 7.jpg
    │   ├── BP202190822100682_3_3.jpg
    │   ├── MVTec.png
    │   ├── anomaly_detection_example1.PNG
    │   ├── anomaly_detection_types.png
    │   ├── image001.png
    │   ├── image002.png
    │   ├── image003.png
    │   ├── image004.png
    │   ├── image005.jpg
    │   ├── image006.jpg
    │   ├── image007.png
    │   ├── image008.png
    │   ├── image009.jpeg
    │   ├── image010.png
    │   └── image011.png
└── resources.md


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | 
132 | #myself
133 | .mypy_cache
134 | *.pt
135 | *.gz
136 | memae/data/
137 | .vscode/settings.json
138 | records/*.pdf


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "python.pythonPath": "C:\\Users\\forrest\\Anaconda3\\python.exe"
3 | }


--------------------------------------------------------------------------------
/DeepOneClass/code.py:
--------------------------------------------------------------------------------
 1 | from keras.datasets import fashion_mnist
 2 | from keras.utils import to_categorical
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | # dataset
 7 | (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
 8 | 
 9 | x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
10 | x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
11 | 
12 | x_train = x_train.astype('float32') / 255
13 | x_test = x_test.astype('float32') / 255
14 | 
15 | #学習データ
16 | x_train_s, x_test_s, x_test_b = [], [], []
17 | x_ref, y_ref = [], []
18 | 
19 | x_train_shape = x_train.shape
20 | 
21 | 
22 | for i in range(len(x_train)):
23 |     if y_train[i] == 7:#スニーカーは7
24 |         temp = x_train[i]
25 |         x_train_s.append(temp.reshape((x_train_shape[1:])))
26 |     else:
27 |         temp = x_train[i]
28 |         x_ref.append(temp.reshape((x_train_shape[1:])))
29 |         y_ref.append(y_train[i])
30 | 
31 | x_ref = np.array(x_ref)
32 | 
33 | #refデータからランダムに6000個抽出
34 | number = np.random.choice(np.arange(0,x_ref.shape[0]),6000,replace=False)
35 | 
36 | x, y = [], []
37 | 
38 | x_ref_shape = x_ref.shape
39 | 
40 | for i in number:
41 |     temp = x_ref[i]
42 |     x.append(temp.reshape((x_ref_shape[1:])))
43 |     y.append(y_ref[i])
44 | 
45 | x_train_s = np.array(x_train_s)
46 | x_ref = np.array(x)
47 | y_ref = to_categorical(y)
48 | 
49 | #テストデータ
50 | for i in range(len(x_test)):
51 |     if y_test[i] == 7:#スニーカーは7
52 |         temp = x_test[i,:,:,:]
53 |         x_test_s.append(temp.reshape((x_train_shape[1:])))
54 | 
55 |     if y_test[i] == 9:#ブーツは9
56 |         temp = x_test[i,:,:,:]
57 |         x_test_b.append(temp.reshape((x_train_shape[1:])))
58 | 
59 | x_test_s = np.array(x_test_s)
60 | x_test_b = np.array(x_test_b)
61 | 
62 | 
63 | 
64 | import cv2
65 | from PIL import Image
66 | 
67 | def resize(x):
68 |     x_out = []
69 | 
70 |     for i in range(len(x)):
71 |         img = cv2.cvtColor(x[i], cv2.COLOR_GRAY2RGB)
72 |         img = cv2.resize(img,dsize=(96,96))
73 |         x_out.append(img)
74 | 
75 |     return np.array(x_out)
76 | 
77 | X_train_s = resize(x_train_s)
78 | X_ref = resize(x_ref)
79 | X_test_s = resize(x_test_s)
80 | X_test_b = resize(x_test_b)
81 | 
82 | 
83 | def original_loss(y_true, y_pred):
84 |     lc = 1/(classes*batchsize) * batchsize**2 * K.sum((y_pred -K.mean(y_pred,axis=0))**2,axis=[1]) / ((batchsize-1)**2)
85 |     return lc
86 | 
87 | #target data
88 | #学習しながら、損失を取得
89 | lc.append(model_t.train_on_batch(batch_target, np.zeros((batchsize, feature_out))))
90 | 
91 | #reference data
92 | #学習しながら、損失を取得
93 | ld.append(model_r.train_on_batch(batch_ref, batch_y))


--------------------------------------------------------------------------------
/DeepOneClass/imgs/001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/DeepOneClass/imgs/001.jpg


--------------------------------------------------------------------------------
/DeepOneClass/readme.md:
--------------------------------------------------------------------------------
 1 | ## Learning Deep Features for One-Class Classification
 2 | 
 3 | 
 4 | ## Architecture
 5 | ![img](./imgs/001.jpg)
 6 | 
 7 | 
 8 | ## Reference
 9 | [1] Dong Gong et al. (2019). <a href="https://arxiv.org/abs/1801.05365">Learning Deep Features for One-Class Classification</a>. IEEE Transactions on Image Processing 28.11 (2019): 5450-5463
10 | 
11 | 
12 | [github](https://github.com/PINTO0309/Keras-OneClassAnomalyDetection)


--------------------------------------------------------------------------------
/DeepOneClass/total.py:
--------------------------------------------------------------------------------
  1 | from keras.applications import MobileNetV2, VGG16
  2 | from keras.optimizers import SGD
  3 | from keras.models import Model
  4 | from keras.layers import GlobalAveragePooling2D, Dense
  5 | from keras import backend as K
  6 | from keras.engine.network import Network
  7 | 
  8 | input_shape = (96, 96, 3)
  9 | classes = 10
 10 | batchsize = 128
 11 | #feature_out = 512 #secondary network out for VGG16
 12 | feature_out = 1280 #secondary network out for MobileNet
 13 | alpha = 0.5 #for MobileNetV2
 14 | lambda_ = 0.1 #for compact loss
 15 | 
 16 | #損失関数
 17 | def original_loss(y_true, y_pred):
 18 |     lc = 1/(classes*batchsize) * batchsize**2 * K.sum((y_pred -K.mean(y_pred,axis=0))**2,axis=[1]) / ((batchsize-1)**2)
 19 |     return lc
 20 | 
 21 | #学習
 22 | def train(x_target, x_ref, y_ref, epoch_num):
 23 | 
 24 |     # VGG16読み込み, S network用
 25 |     print("Model build...")
 26 |     #mobile = VGG16(include_top=False, input_shape=input_shape, weights='imagenet')
 27 | 
 28 |     # mobile net読み込み, S network用
 29 |     mobile = MobileNetV2(include_top=True, input_shape=input_shape, alpha=alpha,
 30 |                          , weights='imagenet')
 31 | 
 32 |     #最終層削除
 33 |     mobile.layers.pop()
 34 | 
 35 |     # 重みを固定
 36 |     for layer in mobile.layers:
 37 |         if layer.name == "block_13_expand": # "block5_conv1": for VGG16
 38 |             break
 39 |         else:
 40 |             layer.trainable = False
 41 | 
 42 |     model_t = Model(inputs=mobile.input,outputs=mobile.layers[-1].output)
 43 | 
 44 |     # R network用　Sと重み共有
 45 |     model_r = Network(inputs=model_t.input,
 46 |                       outputs=model_t.output,
 47 |                       name="shared_layer")
 48 | 
 49 |     #Rに全結合層を付ける
 50 |     prediction = Dense(classes, activation='softmax')(model_t.output)
 51 |     model_r = Model(inputs=model_r.input,outputs=prediction)
 52 | 
 53 |     #コンパイル
 54 |     optimizer = SGD(lr=5e-5, decay=0.00005)
 55 |     model_r.compile(optimizer=optimizer, loss="categorical_crossentropy")
 56 |     model_t.compile(optimizer=optimizer, loss=original_loss)
 57 | 
 58 |     model_t.summary()
 59 |     model_r.summary()
 60 | 
 61 |     print("x_target is",x_target.shape[0],'samples')
 62 |     print("x_ref is",x_ref.shape[0],'samples')
 63 | 
 64 |     ref_samples = np.arange(x_ref.shape[0])
 65 |     loss, loss_c = [], []
 66 | 
 67 |     print("training...")
 68 | 
 69 |     #学習
 70 |     for epochnumber in range(epoch_num):
 71 |         x_r, y_r, lc, ld = [], [], [], []
 72 | 
 73 |         #ターゲットデータシャッフル
 74 |         np.random.shuffle(x_target)
 75 | 
 76 |         #リファレンスデータシャッフル
 77 |         np.random.shuffle(ref_samples)
 78 |         for i in range(len(x_target)):
 79 |             x_r.append(x_ref[ref_samples[i]])
 80 |             y_r.append(y_ref[ref_samples[i]])
 81 |         x_r = np.array(x_r)
 82 |         y_r = np.array(y_r)
 83 | 
 84 |         for i in range(int(len(x_target) / batchsize)):
 85 | 
 86 |             #batchsize分のデータロード
 87 |             batch_target = x_target[i*batchsize:i*batchsize+batchsize]
 88 |             batch_ref = x_r[i*batchsize:i*batchsize+batchsize]
 89 |             batch_y = y_r[i*batchsize:i*batchsize+batchsize]
 90 | 
 91 |             #target data
 92 |             #学習しながら、損失を取得
 93 |             lc.append(model_t.train_on_batch(batch_target, np.zeros((batchsize, feature_out))))
 94 | 
 95 |             #reference data
 96 |             #学習しながら、損失を取得
 97 |             ld.append(model_r.train_on_batch(batch_ref, batch_y))
 98 | 
 99 |         loss.append(np.mean(ld))
100 |         loss_c.append(np.mean(lc))
101 | 
102 |         if (epochnumber+1) % 5 == 0:
103 |             print("epoch:",epochnumber+1)
104 |             print("Descriptive loss:", loss[-1])
105 |             print("Compact loss", loss_c[-1])
106 | 
107 |     #結果グラフ
108 |     plt.plot(loss,label="Descriptive loss")
109 |     plt.xlabel("epoch")
110 |     plt.legend()
111 |     plt.show()
112 | 
113 |     plt.plot(loss_c,label="Compact loss")
114 |     plt.xlabel("epoch")
115 |     plt.legend()
116 |     plt.show()    
117 | 
118 |     return model_t
119 | 
120 | model = train(X_train_s, X_ref, y_ref, 5)
121 | 
122 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Forrest-Zhu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Mahalanobis/Mahalanobis.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | x=np.random.random(10)
 3 | y=np.random.random(10)
 4 | 
 5 | #马氏距离要求样本数要大于维数，否则无法求协方差矩阵
 6 | #此处进行转置，表示10个样本，每个样本2维
 7 | X=np.vstack([x,y])
 8 | print(X)
 9 | XT=X.T
10 | 
11 | #方法一：根据公式求解
12 | S=np.cov(X)   #两个维度之间协方差矩阵
13 | SI = np.linalg.inv(S) #协方差矩阵的逆矩阵
14 | #马氏距离计算两个样本之间的距离，此处共有10个样本，两两组合，共有45个距离。
15 | n=XT.shape[0]
16 | d1=[]
17 | for i in range(0,n):
18 |     for j in range(i+1,n):
19 |         delta=XT[i]-XT[j]
20 |         d=np.sqrt(np.dot(np.dot(delta,SI),delta.T))
21 |         d1.append(d)
22 | print(d1)    
23 | #方法二：根据scipy库求解
24 | from scipy.spatial.distance import pdist
25 | d2=pdist(XT,'mahalanobis')
26 | print(d2)


--------------------------------------------------------------------------------
/Mahalanobis/Pics/Mahdist_verify_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/Mahalanobis/Pics/Mahdist_verify_result.jpg


--------------------------------------------------------------------------------
/Mahalanobis/Pics/mahal_dist.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/Mahalanobis/Pics/mahal_dist.jpg


--------------------------------------------------------------------------------
/Mahalanobis/Pics/变体参数含义.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/Mahalanobis/Pics/变体参数含义.jpg


--------------------------------------------------------------------------------
/Mahalanobis/Pics/马氏距离变体.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/Mahalanobis/Pics/马氏距离变体.jpg


--------------------------------------------------------------------------------
/Mahalanobis/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ## 1. 马氏距离
 4 | 
 5 | #### 1.1 马氏距离等价于【规范化的主成分空间内的欧氏距离】
 6 |   
 7 | - **规范化的主成分空间**
 8 |   - 对数据集进行主成分分析，即对数据集的协方差矩阵进行特征值分解，求主成分（特征向量）
 9 |   - 对所有主成分进行归一化处理，这些规范化的主成分即构成了规范化主成分空间的坐标轴
10 | 
11 | - **将样本映射至规范化主成分空间，意味着数据从超椭圆(ellipsoidal)分布转化为超球面(spherical)分布**
12 |   - 样本在规范化主成分空间各坐标轴上的投影(坐标分量)，可通过计算样本向量与规范化主成分的内积求得
13 | 
14 | - **两个向量的马氏距离等价于两者在规范化的主成分空间内的欧氏距离** 
15 |   - If each of these axes is re-scaled to have unit variance, then the Mahalanobis distance corresponds to standard Euclidean distance in the transformed space. 
16 | 
17 | 
18 | #### 1.2 马氏距离的特点
19 | - **特点一：马氏距离是无单位化的、尺度无关的，它内生地考虑到了数据集各坐标轴之间的相关性**
20 |   - The Mahalanobis distance is thus unitless and scale-invariant, and takes into account the correlations of the data set.
21 |  
22 | - **特点二：马氏距离与样本在各主成分上的偏离度成正比**
23 |    - This distance is zero if P is at the mean of D, and grows as P moves away from the mean along each principal component axis
24 | 
25 |    - The Mahalanobis distance measures the number of standard deviations from P to the mean of D. 
26 | 
27 | - 参考资料：[Wikipedia : Mahalanobis distance](https://en.wikipedia.org/wiki/Mahalanobis_distance) 
28 | 
29 | ---
30 | 
31 | ## 2. 马氏距离的计算方法及其代码实现
32 | #### 2.1 Python代码实现：[mahal_dist](./mahal_dist.py) 
33 | 
34 | #### 2.2 计算样本点x距离样本集中心的马氏距离公式   
35 | ![马氏距离](./Pics/mahal_dist.jpg)
36 | 
37 | ---
38 | 
39 | ## 3. 马氏距离的变体及其代码实现   
40 | #### 3.1 Python代码实现： [mahal_dist_variant](./mahal_dist_variant.py)
41 | 
42 | #### 3.2 论文出处： [A Novel Anomaly Detection Scheme Based on Principal Component Classifier](https://github.com/Albertsr/Anomaly-Detection/blob/master/UnSupervised-Based%20on%20PCA/Papers/A%20Novel%20Anomaly%20Detection%20Scheme%20Based%20on%20Principal%20Component%20Classifier.pdf) 
43 | 
44 | #### 3.3 计算方法
45 | 
46 |   ![马氏距离变体](./Pics/%E9%A9%AC%E6%B0%8F%E8%B7%9D%E7%A6%BB%E5%8F%98%E4%BD%93.jpg)
47 | 
48 | - **参数含义**
49 | 
50 |    ![参数含义](./Pics/%E5%8F%98%E4%BD%93%E5%8F%82%E6%95%B0%E5%90%AB%E4%B9%89.jpg)
51 |    
52 | - **异常样本的判定：** 当Score(x)大于某个阈值时，便可将样本x判定为异常样本
53 | 
54 | ---
55 | 
56 | ## 4. 马氏距离及其变体【对样本的异常程度评估完全一致】
57 | 
58 | #### 4.1 验证方法
59 | - 根据多个不同的随机种子生成多组实验数据集
60 | - 根据两种方法返回的分数对样本集的索引进行升序或降序排列，例如数值最大的样本其对应的索引排在最前面，依次类推；
61 | - 若分别根据马氏距离及其变体返回的数值大小对样本索引降序排列，若两个索引序列完全一致，则证明这两种方法对样本集中每一个样本的异常程度评估是完全一致的
62 | - 换句话说，在数据集中随机抽取两个不同样本a与b，若马氏距离返回的数据显示样本a比样本b更偏离数据数据中心，则马氏距离变体对这种大小关系有一致的判定
63 | 
64 | #### 4.2 验证代码：[verify_mahal_equivalence](./verify_mahal_equivalence.py)
65 | 
66 | #### 4.3 验证结论
67 | - 马氏距离及其变体对**各样本在数据集中的异常程度大小关系是完全一致的**
68 | - 根据随机生成的多个数据集进行验证，**实验结果表明上述结论是完全正确的**
69 |   - 每个数据集的行数、列数、异常样本比例均在一定区间内随机生成
70 |   - 正常样本服从标准正态分布，异常样本由两组异常样本子集构成，分别服从伽玛分布、指数分布
71 |   - 更多细节请查阅上述验证代码
72 |   
73 |    ![Mahdist_verify_result](./Pics/Mahdist_verify_result.jpg)
74 | 


--------------------------------------------------------------------------------
/Mahalanobis/data/forest_cover/README.md:
--------------------------------------------------------------------------------
1 | Source: http://odds.cs.stonybrook.edu/forestcovercovertype-dataset/
2 | 
3 | 286048 observations, 0.9% anomalous
4 | 
5 | Description:
6 | The original ForestCover/Covertype dataset from UCI machine learning repository is a multiclass classification dataset. It is used in predicting forest cover type from cartographic variables only (no remotely sensed data). This study area includes four wilderness areas located in the Roosevelt National Forest of northern Colorado. These areas represent forests with minimal human-caused disturbances, so that existing forest cover types are more a result of ecological processes rather than forest management practices. This dataset has 54 attributes (10 quantitative variables, 4 binary wilderness areas and 40 binary soil type variables). Here, outlier detection dataset is created using only 10 quantitative attributes. Instances from class 2 are considered as normal points and instances from class 4 are anomalies. The anomalies ratio is 0.9%. Instances from the other classes are omitted.


--------------------------------------------------------------------------------
/Mahalanobis/data/kdd_http/README.md:
--------------------------------------------------------------------------------
1 | Source: http://odds.cs.stonybrook.edu/http-kddcup99-dataset/
2 | 
3 | 567479 observations, 0.4% anomalous
4 | 
5 | Description:
6 | The original KDD Cup 1999 dataset from UCI machine learning repository contains 41 attributes (34 continuous, and 7 categorical), however, they are reduced to 4 attributes (service, duration, src_bytes, dst_bytes) as these attributes are regarded as the most basic attributes (see kddcup.names), where only ‘service’ is categorical. Using the ‘service’ attribute, the data is divided into {http, smtp, ftp, ftp_data, others} subsets. Here, only ‘http’ service data is used. Since the continuous attribute values are concentrated around ‘0’, we transformed each value into a value far from ‘0’, by y = log(x + 0.1). The original data set has 3,925,651 attacks (80.1%) out of 4,898,431 records. A smaller set is forged by having only 3,377 attacks (0.35%) of 976,157 records, where attribute ‘logged_in’ is positive. From this forged dataset 567,497 ‘http’ service data is used to construct the http (KDDCUP99) dataset. 


--------------------------------------------------------------------------------
/Mahalanobis/data/kdd_smtp/README.md:
--------------------------------------------------------------------------------
1 | Source: 
2 | http://odds.cs.stonybrook.edu/smtp-kddcup99-dataset/
3 | 
4 | 95156 observations, 0.03% anomalous
5 | 
6 | Description:
7 | The original KDD Cup 1999 dataset from UCI machine learning repository contains 41 attributes (34 continuous, and 7 categorical), however, they are reduced to 4 attributes (service, duration, src_bytes, dst_bytes) as these attributes are regarded as the most basic attributes(see kddcup.names), where only ‘service’ is categorical. Using the ‘service’ attribute, the data is divided into {http, smtp, ftp, ftp_data, others} subsets. Here, only ‘smtp’ service data is used. Since the continuous attribute values are concentrated around ‘0’, we transformed each value into a value far from ‘0’, by y = log(x + 0.1). The original data set has 3,925,651 attacks (80.1%) out of 4,898,431 records. A smaller set is forged by having only 3,377 attacks (0.35%) of 976,157 records, where attribute ‘logged_in’ is positive. From this forged dataset 95,156 ‘smtp’ service data is used to construct the Smtp (KDDCUP99) dataset. 


--------------------------------------------------------------------------------
/Mahalanobis/data/shuttle/README.md:
--------------------------------------------------------------------------------
1 | Source: http://odds.cs.stonybrook.edu/shuttle-dataset/
2 | 
3 | 49097 observations, 7% anomalous
4 | 
5 | Description:
6 | The original Statlog (Shuttle) dataset from UCI machine learning repository is a multi-class classification dataset with dimensionality 9. Here, the training and test data are combined. The smallest five classes, i.e. 2, 3, 5, 6, 7 are combined to form the outliers class, while class 1 forms the inlier class. Data for class 4 is discarded.


--------------------------------------------------------------------------------
/Mahalanobis/mahal_dist.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import numpy as np
 4 | from numpy import linalg as LA
 5 | 
 6 | 
 7 | def mahal_dist(matrix):
 8 |     # 计算样本矩阵的中心向量
 9 |     matrix_mean = np.mean(matrix, axis=0)
10 |     # 计算各样本与中心向量之间的差异
11 |     delta = matrix - matrix_mean
12 |     
13 |     # 求协方差矩阵及其逆矩阵
14 |     cov_matrix = np.cov(matrix, rowvar=False, ddof=1)
15 |     cov_matrix_inv = LA.inv(cov_matrix)  
16 | 
17 |     # 求单个样本向量与样本中心的马氏距离
18 |     def md_vector(vector):        
19 |         inner_prod = np.dot(vector, cov_matrix_inv)
20 |         inner_product = np.dot(inner_prod, vector)
21 |         dist = np.sqrt(inner_product)
22 |         return dist
23 |     
24 |     # 求矩阵中所有样本与中心之间的马氏距离
25 |     mahal_dist = np.apply_along_axis(arr=delta, axis=1, func1d=md_vector)
26 |     return mahal_dist
27 | 


--------------------------------------------------------------------------------
/Mahalanobis/mahal_dist_variant.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import numpy as np
 4 | from numpy import linalg as LA
 5 | from sklearn.preprocessing import StandardScaler 
 6 | 
 7 | 
 8 | def mahal_dist_variant(matrix):
 9 |     # 将数据集标准化
10 |     matrix = StandardScaler().fit_transform(matrix)
11 |     # 对数据集进行主成分分析
12 |     cov_matrix = np.cov(matrix, rowvar=False, ddof=1)
13 |     eigen_values, eigen_vectors = LA.eig(cov_matrix)
14 |         
15 |     # 函数get_score用于返回数据集在单个主成分上的分数
16 |     # 参数pc_idx表示主成分的索引
17 |     def get_score(pc_idx):
18 |         # eigen_vectors[pc_idx]表示第idx个主成分构成的列向量
19 |         inner_product = np.dot(matrix, eigen_vectors[pc_idx])
20 |         score = np.square(inner_product) / eigen_values[pc_idx]
21 |         return score
22 |     # 返回训练集每一个样本在所有主成分上的分数，并分别求和
23 |     mahal_dist = sum(map(get_score, range(len(eigen_values))))
24 |     return mahal_dist
25 | 


--------------------------------------------------------------------------------
/Mahalanobis/main.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | import argparse
 4 | 
 5 | from modules.autoencoder import Autoencoder
 6 | from utils.dataloading import load_dataset
 7 | from utils.tracking import Tracker
 8 | from utils.experiment import train_model
 9 | 
10 | parser = argparse.ArgumentParser(description='Automahalanobis experiment')
11 | 
12 | # Autoencoder args
13 | parser.add_argument('--mahalanobis', dest='mahalanobis', action='store_true')
14 | parser.set_defaults(mahalanobis=False)
15 | parser.add_argument('--mahalanobis_cov_decay', type=float, default=1E-4)
16 | parser.add_argument('--distort_inputs', dest='distort_inputs',
17 |                     action='store_true')
18 | parser.set_defaults(distort_inputs=False)
19 | parser.add_argument('--distort_targets', dest='distort_targets',
20 |                     action='store_true')
21 | parser.set_defaults(distort_targets=False)
22 | 
23 | # Dataset args
24 | parser.add_argument('--dataset_name', type=str, default='forest_cover',
25 |                     help='name of the dataset')
26 | parser.add_argument('--test_prop', type=str, default=0.2)
27 | parser.add_argument('--val_prop', type=str, default=0.2)
28 | 
29 | # Training args
30 | parser.add_argument('--n_epochs', type=int, default=500)
31 | parser.add_argument('--batch_size', type=int, default=512)
32 | parser.add_argument('--no_adam',  dest='adam', action='store_false',
33 |                     help='boolean whether to not use adam optimizer but SGD with momentum')
34 | parser.set_defaults(adam=True)
35 | parser.add_argument('--no_cuda', dest='cuda', action='store_false')
36 | parser.set_defaults(cuda=True)
37 | parser.add_argument('--no_tensorboard', dest='tensorboard', action='store_false')
38 | parser.set_defaults(tensorboard=True)
39 | 
40 | # Collect args and kwargs
41 | args = parser.parse_args()
42 | args.cuda = args.cuda if torch.cuda.is_available() else False
43 | kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
44 | 
45 | # Set model name
46 | args.model_name = 'ae'
47 | args.model_name += '-mahalanobis' if args.mahalanobis else '-vanilla'
48 | args.model_name += '-distortinputs' if args.distort_inputs else ''
49 | args.model_name += '-distorttargets' if args.distort_targets else ''
50 | 
51 | if __name__ == '__main__':
52 | 
53 |     # Load data
54 |     train_loader, val_loader, test_loader, scaler, model_args = \
55 |         load_dataset(args, **kwargs)
56 | 
57 |     # Construct model and cast to double
58 |     model = Autoencoder(model_args.layer_dims, args.mahalanobis,
59 |                         args.mahalanobis_cov_decay, args.distort_inputs)
60 |     model.double()
61 | 
62 |     # Determine device and copy model and scaler
63 |     device = torch.device("cuda:0" if args.cuda else "cpu")
64 |     model.to(device)
65 |     scaler.to(device)
66 | 
67 |     # Instantiate tracker
68 |     tracker = Tracker(args)
69 | 
70 |     # Construct loss function
71 |     criterion = torch.nn.L1Loss()
72 | 
73 |     # Construct optimizer
74 |     if args.adam:
75 |         optimizer = torch.optim.Adam(model.parameters())
76 |     else:
77 |         optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9,
78 |                                     nesterov=False)
79 | 
80 |     # Train the model
81 |     model, epoch = train_model(model, criterion, optimizer, train_loader,
82 |                                val_loader, scaler, tracker, args, device)
83 | 
84 |     print("Trained model on device: {}".format(device))
85 | 
86 |     state = {
87 |         'epoch': epoch,
88 |         'state_dict': model.state_dict(),
89 |         'optimizer': optimizer.state_dict()
90 |     }
91 |     torch.save(state, tracker.dir+'model_state')
92 | 
93 |     # state = torch.load()
94 |     # model.load_state_dict(state['state_dict'])
95 |     # optimizer.load_state_dict(state['optimizer'])
96 | 
97 | 
98 | 


--------------------------------------------------------------------------------
/Mahalanobis/modules/autoencoder.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Autoencoder module
  4 | --------------------------
  5 | """
  6 | import torch
  7 | import torch.nn as nn
  8 | from modules.mahalanobis import MahalanobisLayer
  9 | 
 10 | class Autoencoder(nn.Module):
 11 | 
 12 |     def __init__(self, layer_dims, mahalanobis=False,
 13 |                  mahalanobis_cov_decay=0.1, distort_inputs=False):
 14 |         super(Autoencoder, self).__init__()
 15 | 
 16 |         self.layer_dims = layer_dims
 17 | 
 18 |         self.encoding_layers = torch.nn.Sequential(
 19 |             nn.Linear(layer_dims[0], layer_dims[1]),  # 1st hidden layer
 20 |             nn.Tanh(),                                # 1st hidden layer
 21 |             nn.Linear(layer_dims[1], layer_dims[2])   # Compression layer
 22 |         )
 23 | 
 24 |         self.decoding_layers = torch.nn.Sequential(
 25 |             nn.Linear(layer_dims[2], layer_dims[3]),  # 3rd hidden layer
 26 |             nn.Tanh(),                                # 3d hidden layer
 27 |             nn.Linear(layer_dims[3], layer_dims[4])   # Output layer
 28 |         )
 29 | 
 30 |         self.mahalanobis = mahalanobis
 31 | 
 32 |         if mahalanobis:
 33 |             self.mahalanobis_layer = MahalanobisLayer(layer_dims[0],
 34 |                                                       mahalanobis_cov_decay)
 35 | 
 36 |         self.distort_input = distort_inputs
 37 | 
 38 |     def forward(self, x):
 39 |         x_in = x + torch.randn_like(x) if self.distort_input else x
 40 |         x_enc = self.encoding_layers(x_in)
 41 |         x_fit = self.decoding_layers(x_enc)
 42 |         if self.mahalanobis:
 43 |             x_fit = self.mahalanobis_layer(x, x_fit)
 44 |         return x_fit
 45 | 
 46 |     def encode(self, x):
 47 |         return self.encoding_layers(x)
 48 | 
 49 |     def decode(self, x):
 50 |         return self.decoding_layers(x)
 51 | 
 52 |     def reconstruct(self, x):
 53 |         x = self.encoding_layers(x)
 54 |         x = self.decoding_layers(x)
 55 |         return x
 56 | 
 57 | 
 58 | if __name__ == "__main__":
 59 |     batch_size = 128
 60 |     layer_dims = 10, 30, 5, 30, 10
 61 | 
 62 |     # Create random Tensors to hold inputs and outputs
 63 |     x = torch.Tensor(torch.randn(batch_size, layer_dims[0]))
 64 | 
 65 |     # Construct our model by instantiating the class defined above
 66 |     model = Autoencoder(layer_dims, True, 0.001, True)
 67 | 
 68 |     # Select device to train model on and copy model to device
 69 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 70 |     model.to(device)
 71 | 
 72 |     # Copy data to device
 73 |     x = x.to(device)
 74 | 
 75 |     # Construct our loss function and an optimizer
 76 |     criterion = nn.L1Loss()
 77 |     optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0)
 78 | 
 79 |     for t in range(2000):
 80 |         # Forward pass: Compute predicted y by passing x to the model
 81 |         errors = model(x)
 82 | 
 83 |         # Compute and print loss
 84 |         loss = criterion(errors, torch.zeros(errors.size(), device=device))
 85 |         print(t, loss.item())
 86 | 
 87 |         # Zero gradients, perform a backward pass, and update the weights.
 88 |         optimizer.zero_grad()
 89 |         loss.backward()
 90 |         optimizer.step()
 91 | 
 92 |         if model.mahalanobis_layer:
 93 |             with torch.no_grad():
 94 |                 x_fit = model.reconstruct(x)
 95 |                 model.mahalanobis_layer.update(x, x_fit)
 96 | 
 97 |     print("Trained model on device: {}".format(device))
 98 | 
 99 |     print(errors)
100 |     print(x)
101 |     print(model.reconstruct(x))
102 |     if model.mahalanobis:
103 |         print(model.mahalanobis_layer.S)
104 |         print(model.mahalanobis_layer.S_inv)
105 | 


--------------------------------------------------------------------------------
/Mahalanobis/modules/mahalanobis.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Mahalanobis module
 4 | --------------------------
 5 | """
 6 | import torch
 7 | import torch.nn as nn
 8 | 
 9 | class MahalanobisLayer(nn.Module):
10 | 
11 |     def __init__(self, dim, decay = 0.1):
12 |         super(MahalanobisLayer, self).__init__()
13 |         self.register_buffer('S', torch.eye(dim))
14 |         self.register_buffer('S_inv', torch.eye(dim))
15 |         self.decay = decay
16 | 
17 |     def forward(self, x, x_fit):
18 |         """
19 |         Calculates the squared Mahalanobis distance between x and x_fit
20 |         """
21 | 
22 |         delta = x - x_fit
23 |         m = torch.mm(torch.mm(delta, self.S_inv), delta.t())
24 |         return torch.diag(m)
25 | 
26 |     def cov(self, x):
27 |         x -= torch.mean(x, dim=0)
28 |         return 1 / (x.size(0) - 1) * x.t().mm(x)
29 | 
30 |     def update(self, X, X_fit):
31 |         delta = X - X_fit
32 |         self.S = (1 - self.decay) * self.S + self.decay * self.cov(delta)
33 |         self.S_inv = torch.pinverse(self.S)
34 | 
35 | if __name__ == "__main__":
36 | 
37 |     from scipy.spatial import distance
38 |     import numpy as np
39 | 
40 |     # Some example data for testing
41 |     v  = torch.Tensor([[1, 0.5, 0.5], [0.5, 1, 0.5], [0.5, 0.5, 1]])
42 |     iv = torch.inverse(v)
43 |     X1 = torch.Tensor([[1, 0, 0], [0, 1, 0], [0, 2, 0]])
44 |     X2 = torch.Tensor([[0, 1, 0], [0, 2, 0], [0, 2, 0]])
45 | 
46 |     # Squared Mahalanobis distance using scipy
47 |     scipy_dist_list = [distance.mahalanobis(x1.numpy(), x2.numpy(), iv.numpy()) for x1, x2 in zip(X1, X2)]
48 |     scipy_dist = np.array(scipy_dist_list)**2
49 | 
50 |     # Mahalanobis distance pytorch implementation
51 |     mah_layer = MahalanobisLayer(3, decay=0.99)
52 |     mah_layer.S_inv = iv
53 | 
54 |     pytorch_dist = mah_layer(X1, X2)
55 | 
56 |      # Check if almost equal
57 |     np.testing.assert_almost_equal(scipy_dist, pytorch_dist.numpy())
58 | 
59 |     # Covariance method
60 |     X = torch.rand(10, 3)
61 |     np_cov_X = np.cov(X.numpy(), rowvar=False)
62 |     pytorch_cov_X = mah_layer.cov(X)
63 | 
64 |     # Check if almost equal
65 |     np.testing.assert_almost_equal(np_cov_X, pytorch_cov_X.numpy())
66 | 
67 |     # Update method
68 |     X_fit = torch.rand(10, 3)
69 |     delta = X - X_fit
70 |     np_cov_delta = np.cov(delta.numpy(), rowvar=False)
71 |     pytorch_cov_delta = mah_layer.cov(delta)
72 | 
73 |     # Check if almost equal after enough updates
74 |     for i in range(20):
75 |         mah_layer.update(X, X_fit)
76 |     np.testing.assert_almost_equal(np_cov_delta, mah_layer.S.numpy())
77 | 
78 |     # Test if numpy inverse and pytorch pseudo inverse are close
79 |     np.testing.assert_almost_equal(np.linalg.inv(np_cov_delta), mah_layer.S_inv.numpy(), decimal=5)
80 | 


--------------------------------------------------------------------------------
/Mahalanobis/requirements.txt:
--------------------------------------------------------------------------------
 1 | name: pytorch
 2 | channels:
 3 |   - pytorch
 4 |   - defaults
 5 | dependencies:
 6 |   - blas=1.0=mkl
 7 |   - ca-certificates=2018.03.07=0
 8 |   - certifi=2018.10.15=py37_0
 9 |   - cffi=1.11.5=py37he75722e_1
10 |   - cycler=0.10.0=py37_0
11 |   - dbus=1.13.2=h714fa37_1
12 |   - expat=2.2.6=he6710b0_0
13 |   - fontconfig=2.13.0=h9420a91_0
14 |   - freetype=2.9.1=h8a8886c_1
15 |   - glib=2.56.2=hd408876_0
16 |   - gst-plugins-base=1.14.0=hbbd80ab_1
17 |   - gstreamer=1.14.0=hb453b48_1
18 |   - icu=58.2=h9c2bf20_1
19 |   - intel-openmp=2019.0=118
20 |   - jpeg=9b=h024ee3a_2
21 |   - kiwisolver=1.0.1=py37hf484d3e_0
22 |   - libedit=3.1.20170329=h6b74fdf_2
23 |   - libffi=3.2.1=hd88cf55_4
24 |   - libgcc-ng=8.2.0=hdf63c60_1
25 |   - libgfortran-ng=7.3.0=hdf63c60_0
26 |   - libpng=1.6.35=hbc83047_0
27 |   - libstdcxx-ng=8.2.0=hdf63c60_1
28 |   - libtiff=4.0.9=he85c1e1_2
29 |   - libuuid=1.0.3=h1bed415_2
30 |   - libxcb=1.13=h1bed415_1
31 |   - libxml2=2.9.8=h26e45fe_1
32 |   - matplotlib=3.0.1=py37h5429711_0
33 |   - mkl=2019.0=118
34 |   - mkl_fft=1.0.6=py37h7dd41cf_0
35 |   - mkl_random=1.0.1=py37h4414c95_1
36 |   - ncurses=6.1=hf484d3e_0
37 |   - ninja=1.8.2=py37h6bb024c_1
38 |   - numpy=1.15.4=py37h1d66e8a_0
39 |   - numpy-base=1.15.4=py37h81de0dd_0
40 |   - olefile=0.46=py37_0
41 |   - openssl=1.0.2p=h14c3975_0
42 |   - pandas=0.23.4=py37h04863e7_0
43 |   - patsy=0.5.1=py37_0
44 |   - pcre=8.42=h439df22_0
45 |   - pillow=5.3.0=py37h34e0f95_0
46 |   - pip=18.1=py37_0
47 |   - pycparser=2.19=py37_0
48 |   - pyparsing=2.3.0=py37_0
49 |   - pyqt=5.9.2=py37h05f1152_2
50 |   - python=3.7.0=h6e4f718_3
51 |   - python-dateutil=2.7.5=py37_0
52 |   - pytz=2018.7=py37_0
53 |   - qt=5.9.6=h8703b6f_2
54 |   - readline=7.0=h7b6447c_5
55 |   - scikit-learn=0.20.0=py37h4989274_1
56 |   - scipy=1.1.0=py37hfa4b5c9_1
57 |   - seaborn=0.9.0=py37_0
58 |   - setuptools=40.5.0=py37_0
59 |   - sip=4.19.8=py37hf484d3e_0
60 |   - six=1.11.0=py37_1
61 |   - sqlite=3.25.2=h7b6447c_0
62 |   - statsmodels=0.9.0=py37h035aef0_0
63 |   - tk=8.6.8=hbc83047_0
64 |   - tornado=5.1.1=py37h7b6447c_0
65 |   - wheel=0.32.2=py37_0
66 |   - xz=5.2.4=h14c3975_4
67 |   - zlib=1.2.11=ha838bed_2
68 |   - pytorch=0.4.1=py37_py36_py35_py27__9.0.176_7.1.2_2
69 |   - torchvision=0.2.1=py37_1
70 |   - pip:
71 |     - torch==0.4.1.post2
72 | prefix: /home/bart/anaconda3/envs/pytorch
73 | 
74 | 


--------------------------------------------------------------------------------
/Mahalanobis/run.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | source activate pytorch
 4 | 
 5 | # Forest cover
 6 | python3 main.py --dataset_name forest_cover
 7 | python3 main.py --mahalanobis --dataset_name forest_cover
 8 | python3 main.py --mahalanobis --distort_inputs --dataset_name forest_cover
 9 | python3 main.py --mahalanobis --distort_targets --dataset_name forest_cover
10 | 
11 | # Kdd smtp
12 | python3 main.py --dataset_name kdd_smtp
13 | python3 main.py --mahalanobis --dataset_name kdd_smtp
14 | python3 main.py --mahalanobis --distort_inputs --dataset_name kdd_smtp
15 | python3 main.py --mahalanobis --distort_targets --dataset_name kdd_smtp
16 | 
17 | # Kdd http
18 | python3 main.py --dataset_name kdd_http
19 | python3 main.py --mahalanobis --dataset_name kdd_http
20 | python3 main.py --mahalanobis --distort_inputs --dataset_name kdd_http
21 | python3 main.py --mahalanobis --distort_targets --dataset_name kdd_http
22 | 
23 | # Shuttle
24 | python3 main.py --dataset_name shuttle
25 | python3 main.py --mahalanobis --dataset_name shuttle
26 | python3 main.py --mahalanobis --distort_inputs --dataset_name shuttle
27 | python3 main.py --mahalanobis --distort_targets --dataset_name shuttle
28 | 
29 | # Exit script
30 | exit 0


--------------------------------------------------------------------------------
/Mahalanobis/utils/dataloading.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import torch
  3 | import torch.utils.data as data_utils
  4 | import numpy as np
  5 | 
  6 | import h5py
  7 | from scipy.io import loadmat
  8 | 
  9 | 
 10 | class Scaler:
 11 | 
 12 |     def __init__(self, x):
 13 |         # Numpy array input to tensor
 14 |         x = torch.from_numpy(x).double()
 15 | 
 16 |         # Calculate mean and standard deviation of train
 17 |         self.mean_vec = torch.mean(x, dim=0)
 18 |         self.sd_vec = torch.std(x, dim=0)
 19 | 
 20 |     def to(self, device):
 21 |         self.mean_vec = self.mean_vec.to(device)
 22 |         self.sd_vec = self.sd_vec.to(device)
 23 | 
 24 |     def normalize(self, x):
 25 |         return (x - self.mean_vec) / self.sd_vec
 26 | 
 27 | 
 28 | def np_shuffle_arrays(a, b):
 29 |     assert len(a) == len(b)
 30 |     p = np.random.permutation(len(a))
 31 |     return a[p], b[p]
 32 | 
 33 | 
 34 | def read_mat(path: str, transpose=True, print_dim=False):
 35 | 
 36 |     # Read data - different .mat versions: first try h5py, then scipy
 37 |     try:
 38 |         file = h5py.File(path, 'r')
 39 |     except OSError:
 40 |         file = loadmat(path)
 41 | 
 42 |     # Extract X and labels
 43 |     X = np.array(file.get('X'))
 44 |     labels = np.array(file.get('y'))
 45 | 
 46 |     # Transpose data
 47 |     if transpose:
 48 |         X = X.transpose()
 49 |         labels = labels.transpose()
 50 | 
 51 |     if print_dim:
 52 |         print('Input data dim:')
 53 |         print(' X:      {}'.format(X.shape))
 54 |         print(' labels: {}'.format(labels.shape))
 55 | 
 56 |     return X, labels
 57 | 
 58 | 
 59 | def generate_loaders(X, labels, args, **kwargs):
 60 | 
 61 |     # Train validation test split
 62 |     X, labels = np_shuffle_arrays(X, labels)
 63 | 
 64 |     data_nrows = X.shape[0]
 65 |     val_size = int(args.val_prop * data_nrows)
 66 |     test_size = int(args.test_prop * data_nrows)
 67 | 
 68 |     splits = [data_nrows - val_size - test_size, data_nrows - val_size]
 69 |     X_train, X_val, X_test = np.split(X, splits)
 70 |     labels_train, labels_val, labels_test = np.split(labels, splits)
 71 | 
 72 |     # Fit scaler
 73 |     scaler = Scaler(X_train)
 74 | 
 75 |     # Pytorch data loaders
 76 |     train = data_utils.TensorDataset(torch.from_numpy(X_train).double(),
 77 |                                      torch.from_numpy(labels_train).double())
 78 |     train_loader = data_utils.DataLoader(train,
 79 |                                          batch_size=args.batch_size,
 80 |                                          shuffle=True, **kwargs)
 81 | 
 82 |     validation = data_utils.TensorDataset(torch.from_numpy(X_val).double(),
 83 |                                           torch.from_numpy(labels_val).double())
 84 |     val_loader = data_utils.DataLoader(validation,
 85 |                                        batch_size=args.batch_size,
 86 |                                        shuffle=False, **kwargs)
 87 | 
 88 |     test = data_utils.TensorDataset(torch.from_numpy(X_test).double(),
 89 |                                     torch.from_numpy(labels_test).double())
 90 |     test_loader = data_utils.DataLoader(test,
 91 |                                         batch_size=args.batch_size,
 92 |                                         shuffle=False, **kwargs)
 93 | 
 94 |     return train_loader, val_loader, test_loader, scaler
 95 | 
 96 | 
 97 | def load_kdd_smtp(args, as_numpy, **kwargs):
 98 | 
 99 |     # Set args
100 |     args.layer_dims = (3, 10, 2, 10, 3)
101 | 
102 |     # Load data
103 |     X, labels = read_mat('./data/kdd_smtp/kdd_smtp.mat',
104 |                          transpose=True, print_dim=True)
105 | 
106 |     if as_numpy:
107 |         return X, labels
108 | 
109 |     # Split data and generate the data loaders
110 |     train_loader, val_loader, test_loader, scaler = \
111 |         generate_loaders(X, labels, args, **kwargs)
112 | 
113 |     return train_loader, val_loader, test_loader, scaler, args
114 | 
115 | 
116 | def load_kdd_http(args, as_numpy, **kwargs):
117 | 
118 |     # Set args
119 |     args.layer_dims = (3, 10, 2, 10, 3)
120 | 
121 |     # Load data
122 |     X, labels = read_mat('./data/kdd_http/kdd_http.mat',
123 |                          transpose=True, print_dim=True)
124 | 
125 |     if as_numpy:
126 |         return X, labels
127 | 
128 |     # Split data and generate the data loaders
129 |     train_loader, val_loader, test_loader, scaler = \
130 |         generate_loaders(X, labels, args, **kwargs)
131 | 
132 |     return train_loader, val_loader, test_loader, scaler, args
133 | 
134 | 
135 | def load_shuttle(args, as_numpy, **kwargs):
136 | 
137 |     # Set args
138 |     args.layer_dims = (9, 20, 5, 20, 9)
139 | 
140 |     # Load data
141 |     X, labels = read_mat('./data/shuttle/shuttle.mat',
142 |                          transpose=False, print_dim=True)
143 | 
144 |     if as_numpy:
145 |         return X, labels
146 | 
147 |     # Split data and generate the data loaders
148 |     train_loader, val_loader, test_loader, scaler = \
149 |         generate_loaders(X, labels, args, **kwargs)
150 | 
151 |     return train_loader, val_loader, test_loader, scaler, args
152 | 
153 | 
154 | def load_forest_cover(args, as_numpy, **kwargs):
155 | 
156 |     # Set args
157 |     args.layer_dims = (10, 20, 5, 20, 10)
158 | 
159 |     # Load data
160 |     X, labels = read_mat('./data/forest_cover/forest_cover.mat',
161 |                          transpose=False, print_dim=True)
162 | 
163 |     if as_numpy:
164 |         return X, labels
165 | 
166 |     # Split data and generate the data loaders
167 |     train_loader, val_loader, test_loader, scaler = \
168 |         generate_loaders(X, labels, args, **kwargs)
169 | 
170 |     return train_loader, val_loader, test_loader, scaler, args
171 | 
172 | 
173 | def load_dataset(args, **kwargs):
174 |     '''
175 |     Load torch data loaders for datasets: kdd_smtp, kdd_http
176 | 
177 |     :param args: Namespace object created by argparse containing:
178 |         dataset_name, test_prop, val_prop, batch_size
179 |     :param kwargs: to be passed to torch.utils.data.DataLoader
180 |     :return: Tuple: train_loader, val_loader, test_loader, labels_split, args
181 |     '''
182 |     if args.dataset_name == 'kdd_smtp':
183 |         data_tuple = load_kdd_smtp(args, False, **kwargs)
184 |     elif args.dataset_name == 'kdd_http':
185 |         data_tuple = load_kdd_http(args, False, **kwargs)
186 |     elif args.dataset_name == 'shuttle':
187 |         data_tuple = load_shuttle(args, False, **kwargs)
188 |     elif args.dataset_name == 'forest_cover':
189 |         data_tuple = load_forest_cover(args, False, **kwargs)
190 |     else:
191 |         raise Exception('Wrong name of the dataset!')
192 |     return data_tuple
193 | 
194 | 
195 | if __name__ == "__main__":
196 | 
197 |     X_train = np.random.randn(20, 5)
198 |     scaler = Scaler(X_train)
199 |     X_scaled = scaler.normalize(X_train)
200 | 
201 |     np.testing.assert_almost_equal(np.array([0,0,0,0,0]),
202 |                                    np.mean(X_scaled, axis=0))
203 |     np.testing.assert_almost_equal(np.array([1, 1, 1, 1, 1]),
204 |                                    np.std(X_scaled, axis=0))
205 | 
206 |     from argparse import Namespace
207 |     data_args = Namespace(dataset_name='forest_cover',
208 |                           test_prop=0.2,
209 |                           val_prop=0.2,
210 |                           batch_size=128)
211 | 
212 |     train_loader, val_loader, test_loader, scaler, args= \
213 |         load_dataset(args=data_args)
214 | 


--------------------------------------------------------------------------------
/Mahalanobis/utils/experiment.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import math
  4 | import torch
  5 | 
  6 | def train_model(model, criterion, optimizer, train_loader, val_loader, scaler,
  7 |                 tracker, args, device):
  8 | 
  9 |     # Performance metrics and tracking
 10 |     val_loss, top1, top5, top10, top25 = \
 11 |         validate(val_loader, model, criterion, scaler, device)
 12 |     tracker.track(0, 0, val_loss, top1, top5, top10, top25)
 13 | 
 14 |     for epoch in range(1, args.n_epochs + 1):
 15 | 
 16 |         for X_batch, labels_batch in train_loader:
 17 | 
 18 |             # Copy data to device
 19 |             X_batch, labels_batch = X_batch.to(device), labels_batch.to(device)
 20 | 
 21 |             # Scale X
 22 |             X_batch = scaler.normalize(X_batch)
 23 | 
 24 |             # Forward pass: Compute predicted y by passing x to the model
 25 |             out = model(X_batch)
 26 | 
 27 |             # Construct y tensor
 28 |             y_batch = torch.zeros_like(out) if model.mahalanobis else X_batch
 29 | 
 30 |             # Compute and print loss
 31 |             loss = criterion(out, y_batch)
 32 |             print('Epoch: {}/{} -- Loss: {}'.format(epoch, args.n_epochs,
 33 |                                                     loss.item()))
 34 | 
 35 |             # Zero gradients, perform a backward pass, and update the weights.
 36 |             optimizer.zero_grad()
 37 |             loss.backward()
 38 |             optimizer.step()
 39 | 
 40 |             if model.mahalanobis:
 41 |                 with torch.no_grad():
 42 |                     X_fit = model.reconstruct(X_batch)
 43 |                     model.mahalanobis_layer.update(X_batch, X_fit)
 44 | 
 45 |         # Performance metrics and tracking
 46 |         val_loss, top1, top5, top10, top25 = \
 47 |             validate(val_loader, model, criterion, scaler, device)
 48 |         tracker.track(epoch, loss, val_loss, top1, top5, top10, top25)
 49 | 
 50 |     return model, epoch
 51 | 
 52 | def outlier_factor(x, x_val):
 53 |     err = x - x_val
 54 |     err = torch.pow(err, 2)
 55 |     err = torch.sum(err, 1)
 56 |     return err / len(err)
 57 | 
 58 | 
 59 | def performance(anomalies, scores, percentage):
 60 | 
 61 |     # Order anomalies (binary vector) by the anomaly score in descending order
 62 |     _, ordering = torch.sort(scores, descending=True)
 63 |     ordered_anomalies = anomalies[ordering.type(torch.LongTensor)]
 64 | 
 65 |     # Number of observations to include in top
 66 |     n_top = math.ceil(len(anomalies) * percentage / 100)
 67 | 
 68 |     return torch.sum(ordered_anomalies[:n_top]) / torch.sum(anomalies)
 69 | 
 70 | def validate(data_loader, model, criterion, scaler, device):
 71 | 
 72 |     class FillableArray:
 73 | 
 74 |         def __repr__(self):
 75 |             return self.X.__str__()
 76 | 
 77 |         def __init__(self, n, tensor=False):
 78 |             self.n = n
 79 |             self.X = torch.Tensor(torch.zeros(n)) if tensor else np.zeros(n)
 80 |             self.i = 0
 81 | 
 82 |         def fill(self, x):
 83 |             stop_ind = self.i + len(x)
 84 |             assert self.n >= stop_ind
 85 |             self.X[self.i:stop_ind] = x.flatten()
 86 |             self.i = stop_ind
 87 | 
 88 |     nrow = len(data_loader.dataset)
 89 |     anomalies = FillableArray(nrow, tensor=True)
 90 |     scores = FillableArray(nrow, tensor=True)
 91 |     loss  =0
 92 | 
 93 |     for i, (X_val, labels_val) in enumerate(data_loader):
 94 | 
 95 |         # Copy to device
 96 |         X_val, labels_val = X_val.to(device), labels_val.to(device)
 97 | 
 98 |         # Scale X
 99 |         X_val = scaler.normalize(X_val)
100 | 
101 |         # Calculate output of model: reconstructions or Mahalanobis distance
102 |         out = model(X_val)
103 | 
104 |         # Construct y tensor and calculate loss
105 |         y_val = torch.zeros_like(out) if model.mahalanobis else X_val
106 |         loss = criterion(out, y_val)
107 | 
108 |         # Determine anomaly scores
109 |         val_scores = out if model.mahalanobis else outlier_factor(out, X_val)
110 | 
111 |         # Fill anomaly and score tensors to compute performance on full set
112 |         anomalies.fill(labels_val)
113 |         scores.fill(val_scores)
114 | 
115 |     loss /= i + 1
116 |     top1 = performance(anomalies.X, scores.X, 1).item()
117 |     top5 = performance(anomalies.X, scores.X, 5).item()
118 |     top10 = performance(anomalies.X, scores.X, 10).item()
119 |     top25 = performance(anomalies.X, scores.X, 25).item()
120 | 
121 |     return loss.item(), top1, top5, top10, top25
122 | 
123 | if __name__=='__main__':
124 | 
125 |     x = torch.randn(10,3)
126 |     x_val = torch.randn_like(x)
127 |     print(outlier_factor(x, x_val))
128 | 
129 |     from utils.dataloading import load_dataset
130 |     from argparse import Namespace
131 |     from modules.autoencoder import Autoencoder
132 | 
133 |     data_args = Namespace(dataset_name='kdd_smtp',
134 |                           test_prop=0.2,
135 |                           val_prop=0.2,
136 |                           batch_size=128)
137 | 
138 |     train_loader, val_loader, test_loader, scaler, model_args = \
139 |         load_dataset(args=data_args)
140 | 
141 |     args = Namespace(mahalanobis=True,
142 |                      mahalanobis_cov_decay=0.9,
143 |                      distort_inputs=False)
144 | 
145 |     ae = Autoencoder(model_args.layer_dims, args.mahalanobis,
146 |                      args.mahalanobis_cov_decay, args.distort_inputs)
147 |     ae.double()
148 |     device = torch.device("cuda:0" if False else "cpu")
149 |     ae.to(device)
150 | 
151 |     criterion = torch.nn.L1Loss()
152 |     test = validate(train_loader, ae, criterion, scaler, device)


--------------------------------------------------------------------------------
/Mahalanobis/utils/tracking.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import datetime
 3 | import re
 4 | import os
 5 | import csv
 6 | import json
 7 | from tensorboardX import SummaryWriter
 8 | 
 9 | class Tracker:
10 | 
11 |     def __init__(self, args):
12 | 
13 |         # Make signature of experiment
14 |         time_signature = str(datetime.datetime.now())[:19]
15 |         time_signature = re.sub('[^0-9]', '_', time_signature)
16 |         signature = '{}_{}_{}'.format(time_signature, args.model_name,
17 |                                       args.dataset_name)
18 | 
19 |         # Set directory to store run
20 |         self.dir = './runs/{}/'.format(signature)
21 | 
22 |         if not os.path.exists(self.dir):
23 |             os.makedirs(self.dir)
24 | 
25 |         # Store settings
26 |         settings_dict = vars(args)
27 | 
28 |         with open(self.dir + 'settings.json', 'w') as file:
29 |             json.dump(settings_dict, file, sort_keys=True, indent=4)
30 | 
31 |         # Create csv file for appending stuff during training
32 |         with open(self.dir + 'train_metrics.csv', 'w') as file:
33 |             filewriter = csv.writer(file, delimiter=';')
34 |             filewriter.writerow(['epoch', 'train_loss', 'val_loss',
35 |                                  'top1_percent', 'top5_percent',
36 |                                  'top10_percent', 'top25_percent'])
37 | 
38 |         # Tensorboard writer
39 |         self.tensorboard=args.tensorboard
40 |         if self.tensorboard:
41 |             self.writer = SummaryWriter(log_dir=self.dir + 'tensorboard/')
42 |             self.k = 0  # Counter for tensorboard events
43 | 
44 |     def __del__(self):
45 |         if self.tensorboard:
46 |             self.writer.close()
47 | 
48 |     def track(self, epoch, train_loss, val_loss, top1_percent=0,
49 |               top5_percent=0, top10_percent=0, top25_percent=0):
50 | 
51 |         # Collect values in list
52 |         metrics = [epoch, train_loss, val_loss, top1_percent, top5_percent,
53 |                    top10_percent, top25_percent]
54 | 
55 |         # Append to csv file
56 |         with open(self.dir + 'train_metrics.csv', 'a') as f:
57 |             writer = csv.writer(f)
58 |             writer.writerow(metrics)
59 | 
60 |         # Write tensorboard events
61 |         if self.tensorboard:
62 |             self.writer.add_scalar('data/train_loss', train_loss, self.k)
63 |             self.writer.add_scalar('data/val_loss', val_loss, self.k)
64 |             self.writer.add_scalar('data/top1_percent', top1_percent, self.k)
65 |             self.writer.add_scalar('data/top5_percent', top5_percent, self.k)
66 |             self.writer.add_scalar('data/top10_percent', top10_percent, self.k)
67 |             self.writer.add_scalar('data/top25_percent', top25_percent, self.k)
68 |             self.k += 1
69 | 
70 | if __name__=='__main__':
71 | 
72 |     from argparse import Namespace
73 |     args = Namespace(dataset_name='shuttle',
74 |                      test_prop=0.2,
75 |                      val_prop=0.2,
76 |                      batch_size=128,
77 |                      model_name='autoencoder',
78 |                      tensorboard=True)
79 | 
80 |     t = Tracker(args)
81 | 
82 |     t.track(10,0.1,0.11,0.111,0.1111,0.11111)
83 | 


--------------------------------------------------------------------------------
/Mahalanobis/verify_mahal_equivalence.py:
--------------------------------------------------------------------------------
 1 | # Author：马肖
 2 | # E-mail：maxiaoscut@aliyun.com
 3 | # Github：https://github.com/Albertsr
 4 | 
 5 | import numpy as np
 6 | import pandas as pd
 7 | from mahal_dist import mahal_dist
 8 | from mahal_dist_variant import mahal_dist_variant
 9 | 
10 | def generate_dataset(seed):
11 |     rdg = np.random.RandomState(seed)
12 |     row = rdg.randint(8000, 10000)
13 |     col = rdg.randint(30, 35)
14 |     contamination = rdg.uniform(0.015, 0.025)
15 |     
16 |     outlier_num = int(row*contamination)
17 |     inlier_num = row - outlier_num
18 |     
19 |     # 正常样本集服从标准正态分布
20 |     inliers = rdg.randn(inlier_num, col)
21 |     
22 |     # 如果outlier_num为奇数，row_1=outlier_num//2，否则row_1=int(outlier_num/2)
23 |     row_1 = outlier_num//2 if np.mod(outlier_num, 2) else int(outlier_num/2)
24 |     row_2 = outlier_num - row_1
25 |     
26 |     # outliers_sub_1服从伽玛分布；outliers_sub_2服从指数分布
27 |     outliers_sub_1 = rdg.gamma(shape=2, scale=0.5, size=(row_1 , col))
28 |     outliers_sub_2 = rdg.exponential(1.5, size=(row_2, col))
29 |     outliers = np.r_[outliers_sub_1, outliers_sub_2]
30 |     
31 |     # 将inliers与outliers在axis=0方向上予以整合，构成实验数据集
32 |     dataset = np.r_[inliers, outliers]
33 |     outliers_indices = range(len(dataset))[inlier_num:]
34 |     return dataset
35 | 
36 | def verify_maldist_equivalence(dataset):
37 |     # 马氏距离的初始定义
38 |     dist_original = mahal_dist(dataset)
39 |     # 根据数值大小，对数据集索引降序排列
40 |     indices_desc_original = np.argsort(-dist_original)
41 |     
42 |     # 马氏距离的变体
43 |     dist_variant = mahal_dist_variant(dataset)
44 |     # 根据数值大小，对数据集索引降序排列
45 |     indices_desc_variant = np.argsort(-dist_variant)
46 |     
47 |     assert not np.allclose(dist_original, dist_variant), '马氏距离及其变体返回的数值一般不相等'
48 |     indices_verify_result = np.allclose(indices_desc_original, indices_desc_variant)
49 |     return indices_verify_result
50 | 
51 | # 生成一系列随机种子及其对应的数据集
52 | seeds = np.random.choice(range(1000), size=10, replace=False)
53 | datasets = list(map(generate_dataset, seeds))
54 | 
55 | # 返回验证结果
56 | verify_result = list(map(verify_maldist_equivalence, datasets))
57 | 
58 | # 输出验证结果
59 | if all(verify_result):
60 |     description = '经过{:}个不重复的随机数据集的测试，马氏距离及其变体对样本相对异常程度的评估是一致的\n'
61 |     print(description.format(len(seeds)))
62 | else:
63 |     print('经过随机数据集的测试，马氏距离及其变体对样本相对异常程度的评估不一致')
64 | 
65 | dataset_name = ['Dataset_' + str(i) for i in range(len(seeds))]
66 | verify_result = pd.DataFrame(verify_result, index=dataset_name, columns=['Equivalence'])
67 | print(verify_result.T)
68 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # AnomalyDetection
 2 | Anomaly Detection in computer vision
 3 | 
 4 | ## paper list in records
 5 | [paper](./records/README.md)
 6 | 
 7 | 
 8 | ## the difficulty and the Potential solution 
 9 | [solution](./records/difficulty.md)
10 | 
11 | 
12 | ## some resources about anomaly detection
13 | [resources](./resources.md)
14 | [projects](./projects.md)


--------------------------------------------------------------------------------
/adVAE/imgs/advae.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/adVAE/imgs/advae.png


--------------------------------------------------------------------------------
/adVAE/readme.md:
--------------------------------------------------------------------------------
 1 | ## Memorizing Normality to Detect Anomaly: Memory-augmented Deep Autoencoder for Unsupervised Anomaly Detection.
 2 | 
 3 | 
 4 | ## Architecture
 5 | ![img](https://github.com/YeongHyeon/adVAE/blob/master/figures/advae.png)
 6 | 
 7 | 
 8 | 
 9 | 
10 | ## Reference
11 | [adVAE](https://github.com/YeongHyeon/adVAE)
12 | 
13 | [1] Wang, Xuhong, et al. <a href="https://www.sciencedirect.com/science/article/pii/S0950705119305283">Advae: a self-adversarial variational autoencoder with gaussian anomaly prior knowledge for anomaly detection.</a>. Knowledge-Based Systems 190 (2020): 105187.
14 | 
15 | 


--------------------------------------------------------------------------------
/anomalyLocalization/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ## Iterative energy-based projection on a normal data manifold for anomaly localization
 4 | 
 5 | ## Architecture
 6 | ![img](./imgs/001.png)
 7 | 
 8 | 
 9 | ## Result
10 | ![img](./imgs/002.png)
11 | 
12 | ![img](./imgs/003.png)
13 | 
14 | 
15 | ## Reference
16 | [1] Dehaene, David, et al. <a href="https://openreview.net/forum?id=HJx81ySKwr">Iterative energy-based projection on a normal data manifold for anomaly localization</a>. arXiv preprint arXiv:2002.03734 (2020).
17 | 
18 | 
19 | reference:https://qiita.com/kogepan102/items/122b2862ad5a51180656


--------------------------------------------------------------------------------
/anomalyLocalization/code/dataset.py:
--------------------------------------------------------------------------------
 1 | # data loader 
 2 | import os
 3 | import numpy as np
 4 | from PIL import Image
 5 | 
 6 | import torch
 7 | from torch.utils import data
 8 | from torchvision import transforms as T
 9 | import torch.nn as nn
10 | import torch.optim as optim
11 | from torch.nn import functional as F
12 | 
13 | 
14 | 
15 | class MVTecAD(data.Dataset):
16 |     """Dataset class for the MVTecAD dataset."""
17 | 
18 |     def __init__(self, image_dir, transform):
19 |         """Initialize and preprocess the MVTecAD dataset."""
20 |         self.image_dir = image_dir
21 |         self.transform = transform
22 | 
23 |     def __getitem__(self, index):
24 |         """Return one image"""
25 |         filename = "{:03}.png".format(index)
26 |         image = Image.open(os.path.join(self.image_dir, filename))
27 |         return self.transform(image)
28 | 
29 |     def __len__(self):
30 |         """Return the number of images."""
31 |         return len(os.listdir(self.image_dir))
32 | 
33 | 
34 | def return_MVTecAD_loader(image_dir, batch_size=256, train=True):
35 |     """Build and return a data loader."""
36 |     transform = []
37 |     transform.append(T.Resize((512, 512)))
38 |     transform.append(T.RandomCrop((128,128)))
39 |     transform.append(T.RandomHorizontalFlip(p=0.5))
40 |     transform.append(T.RandomVerticalFlip(p=0.5))    
41 |     transform.append(T.ToTensor())
42 |     transform = T.Compose(transform)
43 | 
44 |     dataset = MVTecAD(image_dir, transform)
45 | 
46 |     data_loader = data.DataLoader(dataset=dataset,
47 |                                   batch_size=batch_size,
48 |                                   shuffle=train)
49 |     return data_loader


--------------------------------------------------------------------------------
/anomalyLocalization/code/eval.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.nn import functional as F
 4 | from dataset import return_MVTecAD_loader
 5 | from network import VAE,loss_function
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | def eval(model,test_loader,device):
 9 |     model.eval()
10 |     x_0 = iter(test_loader).next()
11 |     with torch.no_grad():
12 |         x_vae = model(x_0.to(device)).detach().cpu().numpy()
13 | 
14 | 
15 | def EBM(model,test_loader,device):
16 |     model.train()
17 |     x_0 = iter(test_loader).next()
18 |     alpha = 0.05
19 |     lamda = 1
20 |     x_0 = x_0.to(device).clone().detach().requires_grad_(True)
21 |     recon_x = model(x_0).detach()
22 |     loss = F.binary_cross_entropy(x_0, recon_x, reduction='sum')  
23 |     loss.backward(retain_graph=True)
24 | 
25 |     x_grad = x_0.grad.data
26 |     
27 |     x_t = x_0 - alpha * x_grad * (x_0 - recon_x) ** 2
28 | 
29 |     for i in range(15):
30 |         recon_x = model(x_t).detach()
31 |         loss = F.binary_cross_entropy(x_t, recon_x, reduction='sum') + lamda * torch.abs(x_t - x_0).sum()
32 |         loss.backward(retain_graph=True)
33 | 
34 |         x_grad = x_0.grad.data
35 |         #eps = 0.028
36 |         x_grad = F.normalize(x_grad)
37 |         eps = 0.4
38 |         x_t = x_t - eps * x_grad * (x_t - recon_x) ** 2
39 |         iterative_plot(x_t.detach().cpu().numpy(), i)
40 | 
41 |         
42 | # gif
43 | def iterative_plot(x_t, j):
44 |     plt.figure(figsize=(15, 4))
45 |     for i in range(10):
46 |         plt.subplot(1, 10, i+1)
47 |         plt.xticks([])
48 |         plt.yticks([])
49 |         plt.imshow(x_t[i][0], cmap=plt.cm.gray)
50 |     plt.subplots_adjust(wspace=0., hspace=0.)        
51 |     plt.savefig("./results/{}.png".format(j))
52 |     #plt.show()
53 |     
54 | def main():
55 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
56 |     print(device)
57 | 
58 |     seed = 42
59 |     out_dir = './logs'
60 |     if not os.path.exists(out_dir):
61 |         os.mkdir(out_dir)
62 |     checkpoints_dir ="./checkpoints"
63 |     if not os.path.exists(checkpoints_dir):
64 |         os.mkdir(out_dir)
65 |         
66 |     torch.manual_seed(seed)
67 |     if torch.cuda.is_available():
68 |         torch.cuda.manual_seed(seed)
69 |         
70 |     model = VAE(z_dim=512)
71 |     model.load_state_dict(torch.load("./checkpoints/500.pth"))
72 |     model=model.to(device)
73 |     
74 |     optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)
75 |     
76 |     test_loader = return_MVTecAD_loader(image_dir="./mvtec_anomaly_detection/grid/test/metal_contamination/", batch_size=10, train=False)    
77 |     #eval(model=model,test_loader=test_loader,device=device)
78 |     EBM(model,test_loader,device)
79 |     
80 | if __name__ == "__main__":
81 |     main()


--------------------------------------------------------------------------------
/anomalyLocalization/code/eval.sh:
--------------------------------------------------------------------------------
1 | CUDA_VISIBLE_DEVICES=2 python eval.py


--------------------------------------------------------------------------------
/anomalyLocalization/code/network.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.nn import functional as F
 4 | class VAE(nn.Module):
 5 | 
 6 |     def __init__(self, z_dim=128):
 7 |         super(VAE, self).__init__()
 8 | 
 9 |         # encode
10 |         self.conv_e = nn.Sequential(
11 |             nn.Conv2d(1, 32, kernel_size=4, stride=2, padding=1),    # 128 ⇒ 64
12 |             nn.BatchNorm2d(32),            
13 |             nn.LeakyReLU(0.2),
14 |             nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=1),  # 64 ⇒ 32
15 |             nn.BatchNorm2d(64),
16 |             nn.LeakyReLU(0.2),
17 |             nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),  # 32 ⇒ 16
18 |             nn.BatchNorm2d(128),
19 |             nn.LeakyReLU(0.2),     
20 |         )
21 |         self.fc_e = nn.Sequential(
22 |             nn.Linear(128 * 16 * 16, 1024),
23 |             nn.BatchNorm1d(1024),
24 |             nn.LeakyReLU(0.2),
25 |             nn.Linear(1024, z_dim*2),
26 |         )
27 | 
28 |         # decode
29 |         self.fc_d = nn.Sequential(
30 |             nn.Linear(z_dim, 1024),
31 |             nn.BatchNorm1d(1024),
32 |             nn.LeakyReLU(0.2),
33 |             nn.Linear(1024, 128 * 16 * 16),
34 |             nn.LeakyReLU(0.2)
35 |         )
36 |         self.conv_d = nn.Sequential(
37 |             nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
38 |             nn.BatchNorm2d(64),
39 |             nn.LeakyReLU(0.2),
40 |             nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1),
41 |             nn.BatchNorm2d(32),
42 |             nn.LeakyReLU(0.2),
43 |             nn.ConvTranspose2d(32, 1, kernel_size=4, stride=2, padding=1),
44 |             nn.Sigmoid()
45 |         )
46 | 
47 |         self.z_dim = z_dim
48 | 
49 |     def encode(self, input):
50 |         x = self.conv_e(input)
51 |         x = x.view(-1, 128*16*16)
52 |         x = self.fc_e(x)
53 |         return x[:, :self.z_dim], x[:, self.z_dim:]
54 | 
55 |     def reparameterize(self, mu, logvar):
56 |         if self.training:
57 |             std = logvar.mul(0.5).exp_()
58 |             eps = std.new(std.size()).normal_()
59 |             return eps.mul(std).add_(mu)
60 |         else:
61 |             return mu
62 | 
63 |     def decode(self, z):
64 |         h = self.fc_d(z)
65 |         h = h.view(-1, 128, 16, 16)
66 |         return self.conv_d(h)
67 | 
68 |     def forward(self, x):
69 |         mu, logvar = self.encode(x)
70 |         z = self.reparameterize(mu, logvar)
71 |         self.mu = mu
72 |         self.logvar = logvar
73 |         return self.decode(z)
74 | 
75 | 
76 | 
77 | def loss_function(recon_x, x, mu, logvar):
78 |     recon = F.binary_cross_entropy(recon_x, x, reduction='sum')
79 |     kld = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
80 |     return recon + kld
81 | 
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/anomalyLocalization/code/train.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.nn import functional as F
 4 | from dataset import return_MVTecAD_loader
 5 | from network import VAE,loss_function
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | def train(model,train_loader,device,optimizer,epoch):
 9 |     model.train()
10 |     train_loss = 0
11 |     for batch_idx, data in enumerate(train_loader):
12 |         data = data.to(device)
13 |         optimizer.zero_grad()
14 |         recon_batch = model(data)
15 |         loss = loss_function(recon_batch, data, model.mu, model.logvar)
16 |         loss.backward()
17 |         train_loss += loss.item()
18 |         optimizer.step()
19 |     train_loss /= len(train_loader.dataset)
20 |     return train_loss
21 | 
22 | 
23 | def eval(model,test_loader,device):
24 |     model.eval()
25 |     x_0 = iter(test_loader).next()
26 |     with torch.no_grad():
27 |         x_vae = model(x_0.to(device)).detach().cpu().numpy()
28 | 
29 | 
30 | def EBM(model,test_loader,device):
31 |     model.train()
32 |     x_0 = iter(test_loader).next()
33 |     alpha = 0.05
34 |     lamda = 1
35 |     x_0 = x_0.to(device).clone().detach().requires_grad_(True)
36 |     recon_x = model(x_0).detach()
37 |     loss = F.binary_cross_entropy(x_0, recon_x, reduction='sum')  
38 |     loss.backward(retain_graph=True)
39 | 
40 |     x_grad = x_0.grad.data
41 |     x_t = x_0 - alpha * x_grad * (x_0 - recon_x) ** 2
42 | 
43 |     for i in range(15):
44 |         recon_x = model(x_t).detach()
45 |         loss = F.binary_cross_entropy(x_t, recon_x, reduction='sum') + lamda * torch.abs(x_t - x_0).sum()
46 |         loss.backward(retain_graph=True)
47 | 
48 |         x_grad = x_0.grad.data
49 |         eps = 0.001
50 |         x_t = x_t - eps * x_grad * (x_t - recon_x) ** 2
51 |         iterative_plot(x_t.detach().cpu().numpy(), i)
52 | 
53 |         
54 | # gif
55 | def iterative_plot(x_t, j):
56 |     plt.figure(figsize=(15, 4))
57 |     for i in range(10):
58 |         plt.subplot(1, 10, i+1)
59 |         plt.xticks([])
60 |         plt.yticks([])
61 |         plt.imshow(x_t[i][0], cmap=plt.cm.gray)
62 |     plt.subplots_adjust(wspace=0., hspace=0.)        
63 |     plt.savefig("./results/{}.png".format(j))
64 |     #plt.show()
65 |     
66 | def main():
67 |     train_loader = return_MVTecAD_loader(image_dir="./mvtec_anomaly_detection/grid/train/good/", batch_size=256, train=True)
68 | 
69 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
70 |     print(device)
71 | 
72 |     seed = 42
73 |     out_dir = './logs'
74 |     if not os.path.exists(out_dir):
75 |         os.mkdir(out_dir)
76 |     checkpoints_dir ="./checkpoints"
77 |     if not os.path.exists(checkpoints_dir):
78 |         os.mkdir(out_dir)
79 |         
80 |     torch.manual_seed(seed)
81 |     if torch.cuda.is_available():
82 |         torch.cuda.manual_seed(seed)
83 |         
84 |     model = VAE(z_dim=512).to(device)
85 |     
86 |     optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)
87 |     num_epochs = 500
88 |     for epoch in range(num_epochs):
89 |         loss = train(model=model,train_loader=train_loader,device=device,optimizer=optimizer,epoch=epoch)
90 |         print('epoch [{}/{}], train loss: {:.4f}'.format(epoch + 1,num_epochs,loss))
91 |         if (epoch+1) % 10 == 0:
92 |             torch.save(model.state_dict(), os.path.join(checkpoints_dir,"{}.pth".format(epoch+1)))
93 |     test_loader = return_MVTecAD_loader(image_dir="./mvtec_anomaly_detection/grid/test/metal_contamination/", batch_size=10, train=False)    
94 |     eval(model=model,test_loader=test_loader,device=device)
95 |     EBM(model,test_loader,device)
96 |     
97 | if __name__ == "__main__":
98 |     main()


--------------------------------------------------------------------------------
/anomalyLocalization/code/train.sh:
--------------------------------------------------------------------------------
1 | CUDA_VISIBLE_DEVICES=1 python train.py


--------------------------------------------------------------------------------
/anomalyLocalization/imgs/001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/anomalyLocalization/imgs/001.png


--------------------------------------------------------------------------------
/anomalyLocalization/imgs/002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/anomalyLocalization/imgs/002.png


--------------------------------------------------------------------------------
/anomalyLocalization/imgs/003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/anomalyLocalization/imgs/003.png


--------------------------------------------------------------------------------
/anomalyLocalization/imgs/face - 副本.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/anomalyLocalization/imgs/face - 副本.png


--------------------------------------------------------------------------------
/dataset/imgs/001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/dataset/imgs/001.png


--------------------------------------------------------------------------------
/dataset/readme.md:
--------------------------------------------------------------------------------
1 | ## MVTec AD--A Comprehensive Real-World Dataset for Unsupervised Anomaly Detection
2 | 
3 | ## Presentation
4 | ![img](./imgs/001.png)
5 | 
6 | ## Reference
7 | [1] Bergmann, Paul, et al. <a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Bergmann_MVTec_AD_--_A_Comprehensive_Real-World_Dataset_for_Unsupervised_Anomaly_CVPR_2019_paper.html">MVTec AD--A Comprehensive Real-World Dataset for Unsupervised Anomaly Detection</a>. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2019.
8 | 


--------------------------------------------------------------------------------
/memae/imgs/memae.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/memae/imgs/memae.png


--------------------------------------------------------------------------------
/memae/memoryzing_normality_to_detect_anomaly.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Memorizing Normality to Detect Anomaly: Memory-augmented Deep Autoencoder for Unsupervised Anomaly Detection
  3 | https://arxiv.org/pdf/1904.02639.pdf
  4 | 
  5 | #https://github.com/VieVie31/cool-papers-in-pytorch/blob/master/memoryzing_normality_to_detect_anomaly.py
  6 | """
  7 | import numpy as np
  8 | import matplotlib.pyplot as plt
  9 | 
 10 | import torch
 11 | import torch as T
 12 | import torch.nn as nn
 13 | 
 14 | from torch.nn.modules import *
 15 | 
 16 | from tqdm import tqdm, trange
 17 | from torchvision import datasets, transforms
 18 | 
 19 | from sklearn.metrics import f1_score, accuracy_score
 20 | 
 21 | 
 22 | T.set_default_tensor_type('torch.FloatTensor')
 23 | 
 24 | batch_size = 32
 25 | nb_epochs  = 1
 26 | nb_digits  = 10
 27 | 
 28 | 
 29 | train_normals = [
 30 |     img for img, lbl in datasets.MNIST(
 31 |         './data', train=True, download=True,
 32 |         transform=transforms.Compose([
 33 |             transforms.ToTensor(), 
 34 |             #transforms.Normalize((0.1307,), (0.3081,))
 35 |         ])
 36 |     ) if lbl == 9
 37 | ]
 38 | train_normals = torch.utils.data.TensorDataset(
 39 |     torch.tensor([v.numpy() for v in train_normals])
 40 | )
 41 | train_normals_loader = T.utils.data.DataLoader(
 42 |     train_normals, 
 43 |     batch_size=batch_size, 
 44 |     shuffle=True
 45 | ) 
 46 | 
 47 | 
 48 | train_loader = T.utils.data.DataLoader(datasets.MNIST(
 49 |     './data', train=True, download=True,
 50 |     transform=transforms.Compose([
 51 |         transforms.ToTensor(),
 52 |     ])),
 53 |     batch_size=batch_size, shuffle=True
 54 | )
 55 |  
 56 | test_loader = T.utils.data.DataLoader(datasets.MNIST(
 57 |     './data', train=False, download=True,
 58 |     transform=transforms.Compose([
 59 |         transforms.ToTensor(),
 60 |     ])),
 61 |     batch_size=batch_size, shuffle=False
 62 | ) 
 63 | 
 64 | class Encoder(nn.Module):
 65 |     def __init__(self):
 66 |         super(Encoder, self).__init__()
 67 |         self.cnn = nn.Sequential(
 68 |             nn.Conv2d(1,  16, 1, stride=1),
 69 |             nn.BatchNorm2d(16),
 70 |             nn.ReLU(),
 71 |             nn.Conv2d(16, 32, 3, stride=2),
 72 |             nn.BatchNorm2d(32),
 73 |             nn.ReLU(),
 74 |             nn.Conv2d(32, 64, 3, stride=2),
 75 |             nn.BatchNorm2d(64),
 76 |             nn.ReLU(),
 77 |         )
 78 | 
 79 |     def forward(self, x):
 80 |         return self.cnn(x)
 81 | 
 82 | 
 83 | class Decoder(nn.Module):
 84 |     def __init__(self):
 85 |         super(Decoder, self).__init__()
 86 |         self.cnn = nn.Sequential(
 87 |             nn.ConvTranspose2d(64, 32, 2, stride=2),
 88 |             nn.BatchNorm2d(32),
 89 |             nn.ReLU(),
 90 |             nn.ConvTranspose2d(32, 16, 2, stride=2),
 91 |             nn.BatchNorm2d(16),
 92 |             nn.ReLU(),
 93 |             nn.ConvTranspose2d(16,  1, 3, ),
 94 |             nn.Sigmoid()
 95 |         )
 96 |     
 97 |     def forward(self, x):
 98 |         return self.cnn(x) #[B, 1, 26, 26]
 99 | 
100 | class Memory(nn.Module):
101 |     def __init__(self, dimention, capacity=100, lbd=.02):
102 |         super(Memory, self).__init__()
103 |         self.cap = capacity
104 |         self.dim = dimention
105 |         self.lbd = lbd
106 |         self.mem = T.rand((capacity, dimention), requires_grad=True)
107 |         self.cos_sim = nn.CosineSimilarity()
108 |         self.softmax = nn.Softmax(1)
109 | 
110 |     def forward(self, z):
111 |         #z should be : [BATCH, dimention]
112 |         z = z.unsqueeze(1)
113 |         #compute w with attention
114 |         w = self.softmax(self.cos_sim(
115 |             z.permute(0, 2, 1),
116 |             self.mem.expand(z.shape[0], self.cap, self.dim).permute(0, 2, 1)
117 |         ))
118 |         #hard-shrinking of w
119 |         t = w - self.lbd
120 |         w_hat = (T.max(t, T.zeros(w.shape)) * w) / (abs(t) + 1e-15)
121 |         print("average number of 0ed adresses", ((w_hat == 0).sum(1)).float().mean())
122 |         w_hat = (w_hat + 1e-15) / (w_hat + 1e-15).sum(1).reshape(-1, 1) #adding epsilon because of infinity graidnt => nan
123 |         #compute the w_hat enery by request
124 |         adressing_enery = (-w_hat * T.log(w_hat + 1e-3)).sum(0)
125 |         #get z_hat from memory with the computer soft adresseses w_hat
126 |         z_hat = w_hat.mm(self.mem)
127 |         return z_hat, adressing_enery
128 | 
129 | # Build the proposed model
130 | class MemAE(nn.Module):
131 |     def __init__(self, dimension=2304, capacity=100, lbd=.002):
132 |         super(MemAE, self).__init__()
133 |         self.encoder = Encoder()
134 |         self.decoder = Decoder()
135 |         self.memory  = Memory(dimention=dimension, capacity=capacity, lbd=lbd)
136 |     
137 |     def forward(self, x):
138 |         # Compute z and flatten it
139 |         z = self.encoder(x)
140 |         encoded_input_shape = z.shape
141 |         z = z.reshape(z.shape[0], -1)
142 |         # Get the new z_hat latent representation and the energy required for retriving it
143 |         z_hat, adressing_enery = self.memory(z)
144 |         # Decode the new latent representation
145 |         out = self.decoder(z_hat.reshape(encoded_input_shape))
146 |         return out, adressing_enery
147 | 
148 |     def parameters(self):
149 |         for p in self.encoder.parameters():
150 |             yield p
151 |         for p in self.decoder.parameters():
152 |             yield p
153 |         yield self.memory.mem
154 |         return
155 | 
156 | # Train a classic ConvAE for future comparison
157 | classic_AE = nn.Sequential(Encoder(), Decoder())
158 | 
159 | optimizer = torch.optim.Adam(classic_AE.parameters())
160 | loss_function = nn.BCELoss()
161 | 
162 | classic_AE.train()
163 | for (x,) in tqdm(train_normals_loader):
164 |     y = x[:, :, 1:-1, 1:-1]
165 |     optimizer.zero_grad()
166 |     yhat = classic_AE(x.view([x.shape[0], 1, 28, 28]))
167 |     loss = loss_function(yhat, y)
168 |     loss.backward()
169 |     optimizer.step()
170 | 
171 | 
172 | # Train the proposed anomaly detection autoencoder
173 | anomdec_memae = MemAE(lbd=.01)
174 | 
175 | optimizer = torch.optim.Adam(anomdec_memae.parameters())
176 | loss_function = nn.BCELoss()
177 | 
178 | anomdec_memae.train()
179 | for i in range(2):
180 |     for (x,) in tqdm(train_normals_loader):
181 |         y = x[:, :, 1:-1, 1:-1]
182 |         optimizer.zero_grad()
183 |         yhat, energy = anomdec_memae(x.view([x.shape[0], 1, 28, 28]))
184 |         loss = loss_function(yhat, y) + (.002 * energy).mean()
185 |         loss.backward()
186 |         optimizer.step()
187 |         #slowly augment the sparse regulariation for addressing
188 |         anomdec_memae.memory.lbd = min(anomdec_memae.memory.lbd + 1e-5, 0.01005)
189 |         print(loss.item(), energy.mean().item())
190 | 
191 | # Try to classify 9 or not 9 after learning only on 9 on the test set after fining the optimal threshold a posteriori
192 | 
193 | # Print the classical reconstruction error with normal AE (at 1.5 std)
194 | classic_recontruction = []
195 | labels = []
196 | for xx, yy in tqdm(test_loader):
197 |     classic_recontruction.extend(
198 |         ((classic_AE(xx) - xx[:, :, 1:-1, 1:-1]) ** 2).sum(1).sum(1).sum(1).detach().numpy()
199 |     )
200 |     labels.extend(yy.numpy())
201 | 
202 | print(
203 |     "classical mean training reconstruction error on normal : ", 
204 |     np.array(classic_recontruction)[np.array(labels) == 9].mean()
205 | )
206 | print(
207 |     "classical mean training reconstruction error on abnormal : ", 
208 |     np.array(classic_recontruction)[np.array(labels) != 9].mean()
209 | )
210 | 
211 | naive_th = np.array(classic_recontruction)[np.array(labels) == 9].mean() + 1.5 * np.array(classic_recontruction)[np.array(labels) == 9].std()
212 | 
213 | print("classical AE f1 :",       f1_score(np.array(labels) == 9, classic_recontruction < naive_th))
214 | print("classical AE acc:", accuracy_score(np.array(labels) == 9, classic_recontruction < naive_th))
215 | #classical AE f1 : 0.1899810019
216 | #classical AE acc: 0.1899
217 | 
218 | # Compare with the new method
219 | memae_recontruction = []
220 | labels = []
221 | for xx, yy in tqdm(test_loader):
222 |     memae_recontruction.extend(
223 |         ((anomdec_memae(xx)[0] - xx[:, :, 1:-1, 1:-1]) ** 2).sum(1).sum(1).sum(1).detach().numpy()
224 |     )
225 |     labels.extend(yy.numpy())
226 | 
227 | print(
228 |     "anomdec_memae mean training reconstruction error on normal : ", 
229 |     np.array(memae_recontruction)[np.array(labels) == 9].mean()
230 | )
231 | print(
232 |     "anomdec_memae mean training reconstruction error on abnormal : ", 
233 |     np.array(memae_recontruction)[np.array(labels) != 9].mean()
234 | )
235 | 
236 | naive_th = np.array(memae_recontruction)[np.array(labels) == 9].mean() + 1.5 * np.array(memae_recontruction)[np.array(labels) == 9].std()
237 | 
238 | print("memory AE f1 :",       f1_score(np.array(labels) == 9, memae_recontruction < naive_th))
239 | print("memory AE acc:", accuracy_score(np.array(labels) == 9, memae_recontruction < naive_th))
240 | #memory AE f1 : 0.455628495016
241 | #memory AE acc: 0.7761
242 | 


--------------------------------------------------------------------------------
/memae/readme.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Memorizing Normality to Detect Anomaly: Memory-augmented Deep Autoencoder for Unsupervised Anomaly Detection.
 3 | 
 4 | 
 5 | ## Architecture
 6 | ![img](https://github.com/YeongHyeon/MemAE/blob/master/figures/memae.png)
 7 | 
 8 | 
 9 | ## Reference
10 | [MemAE](https://github.com/YeongHyeon/MemAE)
11 | 
12 | [1] Dong Gong et al. (2019). <a href="https://arxiv.org/abs/1904.02639">Memorizing Normality to Detect Anomaly: Memory-augmented Deep Autoencoder for Unsupervised Anomaly Detection</a>. arXiv preprint arXiv:1904.02639.
13 | 


--------------------------------------------------------------------------------
/projects.md:
--------------------------------------------------------------------------------
1 | [pyod](https://github.com/yzhao062/pyod)
2 | 
3 | [SUOD](https://github.com/yzhao062/SUOD)
4 | 
5 | [anomaly-detection-resources](https://github.com/yzhao062/anomaly-detection-resources)
6 | 
7 | 


--------------------------------------------------------------------------------
/records/README.md:
--------------------------------------------------------------------------------
  1 | # awesome anomaly detection
  2 | A curated list of awesome anomaly detection resources. Inspired by [`awesome-architecture-search`](https://github.com/sdukshis/awesome-ml) and [`awesome-automl`](https://github.com/hibayesian/awesome-automl-papers).  
  3 | 
  4 | *Last updated: 2020/02/27*
  5 | 
  6 | ## What is anomaly detection?
  7 | 
  8 | ![img](./imgs/anomaly_detection_example1.PNG)
  9 | 
 10 | 
 11 | Anomaly detection is a technique used to identify unusual patterns that do not conform to expected behavior, called outliers. Typically, this is treated as an unsupervised learning problem where the anomalous samples are not known a priori and it is assumed that the majority of the training dataset consists of “normal” data (here and elsewhere the term “normal” means *not anomalous* and is unrelated to the Gaussian distribution). [Lukas Ruff et al., 2018; Deep One-Class Classification]
 12 | 
 13 | In general, Anomaly detection is also called `Novelty Detection` or `Outlier Detection`, `Forgery Detection` and `Out-of-distribution Detection`.   
 14 | 
 15 | Each term has slightly different meanings. Mostly, on the assumption that you do not have unusual data, this problem is especially called `One Class Classification`, `One Class Segmentation`.  
 16 | 
 17 | ![img](./imgs/anomaly_detection_types.png)
 18 | 
 19 | 
 20 | and `Novelty Detection` and `Outlier Detection` have slightly different meanings. Figure below shows the differences of two terms.
 21 | 
 22 | Also, there are two types of target data. (`time-series data`, and `image data`)  
 23 | In time-series data, it is aimed to detect a abnormal sections or frames in input data. (ex, videos, signal, etc)  
 24 | In image data, it is aimed to classify abnormal images or to segment abnormal regions, for example, defect in some manufacturing data.  
 25 | 
 26 | ## Survey Paper
 27 | - Deep Learning for Anomaly Detection: A Survey | Raghavendra Chalapathy, Sanjay Chawla  | **[arXiv' 19]** |[`[pdf]`](https://arxiv.org/pdf/1901.03407.pdf)
 28 | 
 29 | 
 30 | ## Table of Contents
 31 | 
 32 | (#time-series-anomaly-detection)
 33 | - [Image-level anomaly detection](#image-level-anomaly-detection)
 34 |   - [Anomaly Classification target](#anomaly-classification-target)
 35 |   - [Out-Of-Distribution(OOD) Detction target](#out-of-distributionood-detction-target)
 36 |   - [Anomaly Segmentation target](#anomaly-segmentation-target)
 37 | 
 38 | 
 39 | ## Image-level anomaly detection
 40 | 
 41 | ### One Class (Anomaly) Classification target
 42 | - Estimating the Support of a High- Dimensional Distribution [**OC-SVM**]  | **[Journal of Neural Computation' 01]** | [`[pdf]`](http://users.cecs.anu.edu.au/~williams/papers/P132.pdf)
 43 | - A Survey of Recent Trends in One Class Classification  | **[AICS' 09]** |  [`[pdf]`](https://aran.library.nuigalway.ie/xmlui/bitstream/handle/10379/1472/camera_ready_occ_lnai.pdf?sequence=1)
 44 | - Anomaly detection using autoencoders with nonlinear dimensionality reduction  | **[MLSDA Workshop' 14]** | [`[link]`](https://dl.acm.org/citation.cfm?id=2689747)
 45 | - A review of novelty detection | **[Signal Processing' 14]** |  [`[link]`](https://www.sciencedirect.com/science/article/pii/S016516841300515X)
 46 | - Variational Autoencoder based Anomaly Detection using Reconstruction Probability |  **[SNU DMC Tech' 15]** | [`[pdf]`](http://dm.snu.ac.kr/static/docs/TR/SNUDM-TR-2015-03.pdf)
 47 | - High-dimensional and large-scale anomaly detection using a linear one-class SVM with deep learning | **[Pattern Recognition' 16]** | [`[link]`](https://dl.acm.org/citation.cfm?id=2952200)
 48 | - Transfer Representation-Learning for Anomaly Detection | **[ICML' 16]** | [`[pdf]`](https://pdfs.semanticscholar.org/c533/52a4239568cc915ad968aff51c49924a3072.pdf)
 49 | - Outlier Detection with Autoencoder Ensembles  | **[SDM' 17]** | [`[pdf]`](http://saketsathe.net/downloads/autoencode.pdf)
 50 | - Provable self-representation based outlier detection in a union of subspaces | **[CVPR' 17]** | [`[pdf]`](https://arxiv.org/pdf/1704.03925.pdf)
 51 | - [**ALOCC**]Adversarially Learned One-Class Classifier for Novelty Detection  | **[CVPR' 18]** |  [`[pdf]`](https://arxiv.org/pdf/1802.09088.pdf) [`[code]`](https://github.com/khalooei/ALOCC-CVPR2018)
 52 | - Learning Deep Features for One-Class Classification | **[arXiv' 18]** |   [`[pdf]`](https://arxiv.org/pdf/1801.05365.pdf) [`[code]`](https://github.com/PramuPerera/DeepOneClass)
 53 | - Efficient GAN-Based Anomaly Detection  | **[arXiv' 18]** |  [`[pdf]`](https://arxiv.org/pdf/1802.06222.pdf)
 54 | - Hierarchical Novelty Detection for Visual Object Recognition  | **[CVPR' 18]** | [`[pdf]`](https://arxiv.org/pdf/1804.00722.pdf)
 55 | - Deep One-Class Classification | **[ICML' 18]** | [`[pdf]`](http://data.bit.uni-bonn.de/publications/ICML2018.pdf)
 56 | - Reliably Decoding Autoencoders’ Latent Spaces for One-Class Learning Image Inspection Scenarios | **[OAGM Workshop' 18]** | [`[pdf]`](https://workshops.aapr.at/wp-content/uploads/Proceedings/2018/OAGM_2018_paper_19.pdf)
 57 | - q-Space Novelty Detection with Variational Autoencoders  | **[arXiv' 18]** |  [`[pdf]`](https://arxiv.org/pdf/1806.02997.pdf)
 58 | - GANomaly: Semi-Supervised Anomaly Detection via Adversarial Training | **[ACCV' 18]** |  [`[pdf]`](https://arxiv.org/pdf/1805.06725.pdf)
 59 | - Deep Anomaly Detection Using Geometric Transformations  | **[NIPS' 18]** |  [`[pdf]`](http://papers.nips.cc/paper/8183-deep-anomaly-detection-using-geometric-transformations.pdf)
 60 | - Generative Probabilistic Novelty Detection with Adversarial Autoencoders | **[NIPS' 18]** |  [`[pdf]`](http://papers.nips.cc/paper/7915-generative-probabilistic-novelty-detection-with-adversarial-autoencoders.pdf)
 61 | - A loss framework for calibrated anomaly detection | **[NIPS' 18]** |  [`[pdf]`](http://papers.nips.cc/paper/7422-a-loss-framework-for-calibrated-anomaly-detection.pdf)
 62 | - A Practical Algorithm for Distributed Clustering and Outlier Detection | **[NIPS' 18]** |  [`[pdf]`](http://papers.nips.cc/paper/7493-a-practical-algorithm-for-distributed-clustering-and-outlier-detection.pdf)
 63 | - Efficient Anomaly Detection via Matrix Sketching  | **[NIPS' 18]** |  [`[pdf]`](http://papers.nips.cc/paper/8030-efficient-anomaly-detection-via-matrix-sketching.pdf)
 64 | - Adversarially Learned Anomaly Detection  | **[IEEE ICDM' 18]** |  [`[pdf]`](https://arxiv.org/pdf/1812.02288.pdf)
 65 | - Anomaly Detection With Multiple-Hypotheses Predictions  | **[ICML' 19]** |  [`[pdf]`](https://arxiv.org/pdf/1810.13292v5.pdf)
 66 | - Exploring Deep Anomaly Detection Methods Based on Capsule Net  | **[ICMLW' 19]** |  [`[pdf]`](https://arxiv.org/pdf/1907.06312v1.pdf)
 67 | - Latent Space Autoregression for Novelty Detection | **[CVPR' 19]** |  [`[pdf]`](https://arxiv.org/pdf/1807.01653.pdf)
 68 | - OCGAN: One-Class Novelty Detection Using GANs With Constrained Latent Representations | **[CVPR' 19]** |  [`[pdf]`](https://arxiv.org/pdf/1903.08550.pdf)
 69 | - Unsupervised Learning of Anomaly Detection from Contaminated Image Data using Simultaneous Encoder Training | **[arXiv' 19]** |  [`[pdf]`](https://arxiv.org/pdf/1905.11034.pdf)
 70 | - Using Self-Supervised Learning Can Improve Model Robustness and Uncertainty | **[NeurIPS' 19]** |  [`[pdf]`](https://arxiv.org/abs/1906.12340) [`[code]`](https://github.com/hendrycks/ss-ood)
 71 | - Classification-Based Anomaly Detection for General Data | **[ICLR' 20]** |  [`[pdf]`](https://openreview.net/pdf?id=H1lK_lBtvS)
 72 | - Robust Subspace Recovery Layer for Unsupervised Anomaly Detection   | **[ICLR' 20]** |  [`[pdf]`](https://openreview.net/pdf?id=rylb3eBtwr)
 73 | - RaPP: Novelty Detection with Reconstruction along Projection Pathway   | **[ICLR' 20]** |  [`[pdf]`](https://openreview.net/pdf?id=HkgeGeBYDB)
 74 | - Novelty Detection Via Blurring  | **[ICLR' 20]** |  [`[pdf]`](https://openreview.net/pdf?id=ByeNra4FDB)
 75 | - Deep Semi-Supervised Anomaly Detection   | **[ICLR' 20]** |  [`[pdf]`](https://openreview.net/pdf?id=HkgH0TEYwH)
 76 | - Robust anomaly detection and backdoor attack detection via differential privacy   | **[ICLR' 20]** |  [`[pdf]`](https://openreview.net/pdf?id=SJx0q1rtvS)
 77 | 
 78 | 
 79 | ### Out-of-Distribution(OOD) Detction target
 80 | - A Baseline for Detecting Misclassified and Out-of-Distribution Examples in Neural Networks  | **[ICLR' 17]** | [`[pdf]`](https://arxiv.org/pdf/1610.02136.pdf)
 81 | - [**ODIN**] Enhancing The Reliability of Out-of-distribution Image Detection in Neural Networks  | **[ICLR' 18]** | [`[pdf]`](https://arxiv.org/pdf/1706.02690.pdf)
 82 | - Training Confidence-calibrated Classifiers for Detecting Out-of-Distribution Samples | **[ICLR' 18]** |  [`[pdf]`](https://arxiv.org/pdf/1711.09325.pdf)
 83 | - Learning Confidence for Out-of-Distribution Detection in Neural Networks | **[arXiv' 18]** |  [`[pdf]`](https://arxiv.org/pdf/1802.04865.pdf)
 84 | - Out-of-Distribution Detection using Multiple Semantic Label Representations | **[NIPS' 18]** |  [`[pdf]`](http://papers.nips.cc/paper/7967-out-of-distribution-detection-using-multiple-semantic-label-representations.pdf)
 85 | - A Simple Unified Framework for Detecting Out-of-Distribution Samples and Adversarial Attacks  | **[NIPS' 18]** |  [`[pdf]`](http://papers.nips.cc/paper/7947-a-simple-unified-framework-for-detecting-out-of-distribution-samples-and-adversarial-attacks.pdf)
 86 | - Deep Anomaly Detection with Outlier Exposure  | **[ICLR' 19]** |  [`[pdf]`](https://openreview.net/pdf?id=HyxCxhRcY7)
 87 | - Why ReLU networks yield high-confidence predictions far away from the training data and how to mitigate the problem  | **[CVPR' 19]** |  [`[pdf]`](https://arxiv.org/pdf/1812.05720.pdf)
 88 | - Outlier Exposure with Confidence Control for Out-of-Distribution Detection | **[arXiv' 19]** |  [`[pdf]`](https://arxiv.org/abs/1906.03509v2) [`[code]`](https://github.com/nazim1021/OOD-detection-using-OECC)
 89 | - Likelihood Ratios for Out-of-Distribution Detection | **[NeurIPS' 19]** |  [`[pdf]`](https://arxiv.org/pdf/1906.02845.pdf)
 90 | - Input Complexity and Out-of-distribution Detection with Likelihood-based Generative Models | **[ICLR' 20]** |  [`[pdf]`](https://openreview.net/pdf?id=SyxIWpVYvr)
 91 | 
 92 | 
 93 | ### One Class (Anomaly) Segmentation target
 94 | - Anomaly Detection and Localization in Crowded Scenes  | **[TPAMI' 14]** | [`[pdf]`](http://www.svcl.ucsd.edu/publications/journal/2013/pami.anomaly/pami_anomaly.pdf)
 95 | - Novelty detection in images by sparse representations  | **[IEEE Symposium on IES' 14]** | [`[link]`](https://ieeexplore.ieee.org/document/7008985/)
 96 | - Detecting anomalous structures by convolutional sparse models | **[IJCNN' 15]** | [`[pdf]`](http://www.cs.tut.fi/~foi/papers/IJCNN2015-Carrera-Detecting_Anomalous_Structures.pdf)
 97 | - Real-Time Anomaly Detection and Localization in Crowded Scenes | **[CVPR Workshop' 15]** | [`[pdf]`](https://arxiv.org/pdf/1511.06936.pdf)
 98 | - Learning Deep Representations of Appearance and Motion for Anomalous Event Detection  | **[BMVC' 15]** | [`[pdf]`](https://arxiv.org/pdf/1510.01553.pdf)
 99 | - Scale-invariant anomaly detection with multiscale group-sparse models | **[IEEE ICIP' 16]** | [`[link]`](https://ieeexplore.ieee.org/document/7533089/)
100 | - [**AnoGAN**] Unsupervised Anomaly Detection with Generative Adversarial Networks to Guide Marker Discovery  | **[IPMI' 17]** | [`[pdf]`](https://arxiv.org/pdf/1703.05921.pdf) 
101 | - Deep-Anomaly: Fully Convolutional Neural Network for Fast Anomaly Detection in Crowded Scenes | **[Journal of Computer Vision and Image Understanding' 17]** | [`[pdf]`](https://arxiv.org/pdf/1609.00866.pdf)
102 | - Anomaly Detection using a Convolutional Winner-Take-All Autoencoder | **[BMVC' 17]** |  [`[pdf]`](http://eprints.whiterose.ac.uk/121891/1/BMVC2017.pdf)
103 | - Anomaly Detection in Nanofibrous Materials by CNN-Based Self-Similarity  | **[Sensors' 17]** | [`[pdf]`](http://www.mdpi.com/1424-8220/18/1/209/pdf)
104 | - Defect Detection in SEM Images of Nanofibrous Materials | **[IEEE Trans. on Industrial Informatics' 17]** | [`[pdf]`](http://home.deib.polimi.it/boracchi/docs/2017_Anomaly_Detection_SEM.pdf)
105 | - Abnormal event detection in videos using generative adversarial nets  |  **[ICIP' 17]** | [`[link]`](https://ieeexplore.ieee.org/document/8296547/)
106 | - An overview of deep learning based methods for unsupervised and semi-supervised anomaly detection in videos  | **[arXiv' 18]** |  [`[pdf]`](https://arxiv.org/pdf/1801.03149.pdf)
107 | - Improving Unsupervised Defect Segmentation by Applying Structural Similarity to Autoencoders  | **[arXiv' 18]** | [`[pdf]`](https://arxiv.org/pdf/1807.02011.pdf)
108 | - Satellite Image Forgery Detection and Localization Using GAN and One-Class Classifier  | **[IS&T EI' 18]** | [`[pdf]`](https://arxiv.org/pdf/1802.04881.pdf)
109 | - Deep Autoencoding Models for Unsupervised Anomaly Segmentation in Brain MR Images  | **[arXiv' 18]** | [`[pdf]`](https://arxiv.org/pdf/1804.04488.pdf)
110 | - AVID: Adversarial Visual Irregularity Detection  | **[arXiv' 18]** |[`[pdf]`](https://arxiv.org/pdf/1805.09521.pdf)
111 | - MVTec AD -- A Comprehensive Real-World Dataset for Unsupervised Anomaly Detection | **[CVPR' 19]** |  [`[pdf]`](https://www.mvtec.com/fileadmin/Redaktion/mvtec.com/company/research/mvtec_ad.pdf)
112 | - Exploiting Epistemic Uncertainty of Anatomy Segmentation for Anomaly Detection in Retinal OCT | **[IEEE TMI' 19]** |  [`[pdf]`](https://arxiv.org/pdf/1905.12806v1.pdf)
113 | - Uninformed Students: Student-Teacher Anomaly Detection with Discriminative Latent Embeddings | **[arXiv' 19]** |  [`[pdf]`](https://arxiv.org/pdf/1911.02357.pdf)
114 | - Attention Guided Anomaly Detection and Localization in Images | **[arXiv' 19]** |  [`[pdf]`](https://arxiv.org/pdf/1911.08616v1.pdf)
115 | 
116 | 


--------------------------------------------------------------------------------
/records/difficulty.md:
--------------------------------------------------------------------------------
 1 | 异常检测在图像领域困难点
 2 | 
 3 | 1、维度灾难：图像维度高，传统机器学习领域方法无法有效应对维度灾难问题。
 4 | 
 5 | 2、特征表征：图像特征包含较高得语义信息，在无监督信息下无法有效得提取，同时还得保持特征空间一致性。
 6 | 
 7 | 3、理论困乏：目前针对图像领域，缺乏有效手段界定Anomaly Score。
 8 | 
 9 | 
10 | 思考点：
11 | 1、目前纯无监督学习，无法做到异常检测。参见[Anomaly Detection in Images](http://arxiv.org/pdf/1905.13147v1.pdf)
12 | 
13 | 
14 | 数据集：
15 | ![image](imgs/MVTec.png)
16 | 参见[MVTec AD--A Comprehensive Real-World Dataset for Unsupervised Anomaly Detection](http://openaccess.thecvf.com/content_CVPR_2019/papers/Bergmann_MVTec_AD_--_A_Comprehensive_Real-World_Dataset_for_Unsupervised_Anomaly_CVPR_2019_paper.pdf)
17 | 
18 | 有效解决方案：
19 | 
20 | 1、基于 student–teacher learning with Discriminative Latent Embeddings方式。参见[Uninformed Students: Student-Teacher Anomaly Detection with Discriminative Latent Embeddings](http://arxiv.org/pdf/1911.02357v1.pdf)
21 | ![image](imgs/image001.png)
22 | ![image](imgs/image002.png)
23 | 
24 | 采用了度量学习方式，同时基于student–teacher网络在特征空间上对每个feature元素做密集回归的方式，学习异常分布。
25 | 
26 | 2、基于迭代的能量优化模型 参见[Iterative energy-based projection on a normal data manifold for anomaly localization](https://openreview.net/pdf?id=HJx81ySKwr)
27 | ![image](imgs/image003.png)
28 | 
29 | ![image](imgs/image004.png)
30 | ![image](imgs/image005.jpg)
31 | ![image](imgs/image006.jpg)
32 | 
33 | ![image](imgs/image007.png)
34 | 
35 | 以梯度迭代优化的思想，构建能量优化函数，循环迭代，恢复正常流形空间。借鉴了图像修复的思想。
36 | 
37 | 3、基于Memory-augmented思想 构建Autoencoder。使得恢复图像是由正常样本embedding组合而成，避免了恢复出异常图像的可能性。参见[Memorizing Normality to Detect Anomaly: Memory-augmented Deep Autoencoder for Unsupervised Anomaly Detection](http://arxiv.org/pdf/1904.02639v1.pdf)
38 | ![image](imgs/image008.png)
39 | 
40 | 文中提到Autoencoder以及VAE 并不能有效的将异常图像恢复出正常图像，用存储器模块来增强自动编码器，并开发一种称为存储器增强自动编码器的改进的自动编码器，即MemAEMemAE首先从编码器获取编码，然后将其用作查询以检索用于重建的最相关的存储器项。在训练阶段，更新存储器内容并鼓励它们表示正常数据的原型元素。在测试阶段，学习的存储器将被固定，并且从正常数据的一些选定的存储器记录中获得重建。因此，重建将倾向于接近正常样本。
41 | 
42 | 相似的工作 参见[Memory Augmented Generative Adversarial Networks for Anomaly Detection](http://arxiv.org/pdf/2002.02669v1.pdf)
43 | 
44 | [History-based Anomaly Detector: an Adversarial Approach to Anomaly Detection](http://arxiv.org/pdf/1912.11843v1.pdf)
45 | 
46 | 
47 | 4、尝试基于MMD学习异常scores，比较硬核。参见[Anomaly scores for generative models](http://arxiv.org/pdf/1905.11890v1.pdf)
48 | 
49 | 5、尝试基于图像视觉思想解决问题，可以从ChangeDetection+self supervised learning+transfer learning考虑。
50 | 
51 | ![image](imgs/7.jpg)
52 | ![image](imgs/BP202190822100682_3_3.jpg)
53 | 
54 | 6、基于图像修复方法进行
55 | 
56 | ![image](imgs/image009.jpeg)
57 | 
58 | * 提前了解正常图像的异常检测。
59 | * 另外，用普通图像训练部分卷积。
60 | * 遮罩原始图像（蒙版图像）。
61 | * 将其输入到部分卷积中，以获得与蒙版部分互补的图像（预测图像）。
62 | * 对所获取的图像执行异常检测以获得异常得分（Anomaly Score）。
63 | * 最后，将获得的异常分数代入热图（热图）的掩盖部分。
64 | * 移动遮罩并重复该过程。
65 | 
66 | ![image](imgs/image010.png)
67 | ![image](imgs/image011.png)
68 | 


--------------------------------------------------------------------------------
/records/imgs/7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/7.jpg


--------------------------------------------------------------------------------
/records/imgs/BP202190822100682_3_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/BP202190822100682_3_3.jpg


--------------------------------------------------------------------------------
/records/imgs/MVTec.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/MVTec.png


--------------------------------------------------------------------------------
/records/imgs/anomaly_detection_example1.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/anomaly_detection_example1.PNG


--------------------------------------------------------------------------------
/records/imgs/anomaly_detection_types.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/anomaly_detection_types.png


--------------------------------------------------------------------------------
/records/imgs/image001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/image001.png


--------------------------------------------------------------------------------
/records/imgs/image002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/image002.png


--------------------------------------------------------------------------------
/records/imgs/image003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/image003.png


--------------------------------------------------------------------------------
/records/imgs/image004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/image004.png


--------------------------------------------------------------------------------
/records/imgs/image005.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/image005.jpg


--------------------------------------------------------------------------------
/records/imgs/image006.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/image006.jpg


--------------------------------------------------------------------------------
/records/imgs/image007.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/image007.png


--------------------------------------------------------------------------------
/records/imgs/image008.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/image008.png


--------------------------------------------------------------------------------
/records/imgs/image009.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/image009.jpeg


--------------------------------------------------------------------------------
/records/imgs/image010.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/image010.png


--------------------------------------------------------------------------------
/records/imgs/image011.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ForrestPi/AnomalyDetection/77208ff9ba86efa937d10d8aa3f20c9fbefb2d91/records/imgs/image011.png


--------------------------------------------------------------------------------
/resources.md:
--------------------------------------------------------------------------------
  1 | # Awesome Anomaly Detection
  2 | A list of Papers on anomaly detection.
  3 | You are welcome to open an issue and pull your requests if you think any paper that is important but not are inclueded in this repo.
  4 | The papers are orgnized in classical method, deep learning method, application and survey.
  5 | 
  6 | 
  7 | ## Classical Method
  8 | - [Isolation Forest](https://cs.nju.edu.cn/zhouzh/zhouzh.files/publication/icdm08b.pdf) - ICDM 2008.
  9 | 
 10 | - [LOF: Identifying Density-Based Local Outliers](http://www.dbs.ifi.lmu.de/Publikationen/Papers/LOF.pdf) - SIGMOD 2000.
 11 | 
 12 | - [Extended Isolation Forest](http://matias-ck.com/files/papers/Extended_Isolation_Forest.pdf)
 13 | 
 14 | - [Support Vector Method for Novelty Detection](https://papers.nips.cc/paper/1723-support-vector-method-for-novelty-detection.pdf) - NIPS 2000
 15 | 
 16 | ### One-Class Classification
 17 | 
 18 | - [One-Class SVMs for Document Classification](http://www.jmlr.org/papers/volume2/manevitz01a/manevitz01a.pdf) - JMLR 2001. 
 19 | 
 20 | - [Support Vector Data Description](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.100.1425&rep=rep1&type=pdf) 
 21 | 
 22 | - [Can I Trust My One-Class Classification?](http://www.ipb.uni-bonn.de/pdfs/Mack2014Can.pdf)
 23 | 
 24 | - [Efficient Anomaly Detection via Matrix Sketching](https://arxiv.org/pdf/1804.03065.pdf) - NIPS 2018
 25 | 
 26 | ### PCA-based
 27 | 
 28 | - [robust deep and inductive anomaly detection](https://arxiv.org/abs/1704.06743) - ECML PKDD 2017
 29 | 
 30 | - [A loss framework for calibrated anomaly detection](https://papers.nips.cc/paper/7422-a-loss-framework-for-calibrated-anomaly-detection.pdf) - NIPS 2018
 31 | 
 32 | 
 33 | ### Clustering
 34 | 
 35 | - [A Practical Algorithm for Distributed Clustering and Outlier Detection](https://arxiv.org/pdf/1805.09495.pdf) - NIPS 2018
 36 | 
 37 | ### Correlation
 38 | 
 39 | - [Detecting Multiple Periods and Periodic Patterns in Event Time Sequences](http://chaozhang.org/papers/cikm17a.pdf) - CIKM 2017.
 40 | 
 41 | ### Ranking
 42 | 
 43 | - [ranking causal anomalies via temporal and dynamical analysis on vanishing correlations](https://www.kdd.org/kdd2016/papers/files/rfp0445-chengAemb.pdf) - KDD 2016.
 44 | 
 45 | ## Deep Learning Method
 46 | 
 47 | ### Generative Methods
 48 | - [Variational Autoencoder based Anomaly Detection using Reconstruction Probability](http://dm.snu.ac.kr/static/docs/TR/SNUDM-TR-2015-03.pdf)  
 49 | 
 50 | #### Auto-encoder
 51 | 
 52 | - [Learning sparse representation with variational auto-encoder for anomaly detection](https://ieeexplore.ieee.org/document/8386760/)
 53 | 
 54 | - [Anomaly Detection with Robust Deep Autoencoders](http://dl.acm.org/authorize?N33358) - KDD 2017.
 55 | 
 56 | - [DEEP AUTOENCODING GAUSSIAN MIXTURE MODEL FOR UNSUPERVISED ANOMALY DETECTION](https://www.cs.ucsb.edu/~bzong/doc/iclr18-dagmm.pdf) - ICLR 2018.
 57 | 
 58 | - [Generative Probabilistic Novelty Detection with Adversarial Autoencoders](https://papers.nips.cc/paper/7915-generative-probabilistic-novelty-detection-with-adversarial-autoencoders.pdf) - NIPS 2018
 59 | #### Variational Auto-encoder
 60 | 
 61 | - [Multidimensional Time Series Anomaly Detection: A GRU-based Gaussian Mixture Variational Autoencoder Approach](http://proceedings.mlr.press/v95/guo18a/guo18a.pdf) - ACML 2018
 62 | 
 63 | - [A Multimodel Anomaly Detector for Robot-Assisted Feeding Using an LSTM-based Variational Autoencoder](https://arxiv.org/pdf/1711.00614.pdf) - IEEE Robotics and Automation Letters 2018. 
 64 | 
 65 | #### GAN based
 66 | 
 67 | - [Unsupervised Anomaly Detection with Generative Adversarial Networks to Guide Marker Discovery](https://arxiv.org/pdf/1703.05921.pdf) - IPMI 2017.
 68 | 
 69 | - [Efficient-GAN-Based Anomaly Detection](https://github.com/houssamzenati/Efficient-GAN-Anomaly-Detection) ICLR Workshop 2018.
 70 | 
 71 | - [Anomaly detection with generative adversarial networks](https://openreview.net/pdf?id=S1EfylZ0Z) - Reject by ICLR 2018, but was used as baseline method in recent published NIPS paper.
 72 | 
 73 | ### Hypersphereical Learning
 74 | 
 75 | - [Anomaly Detection in Dynamic Networks using Multi-view Time-Series Hypersphere Learning](https://dl.acm.org/citation.cfm?id=3132964) - CIKM 2017.
 76 | 
 77 | - [Deep into Hypersphere: Robust and Unsupervised Anomaly Discovery in Dynamic Networks](https://www.ijcai.org/proceedings/2018/0378.pdf) - IJCAI 2018.
 78 | 
 79 | ### One-Class Classification
 80 | 
 81 | - [High-dimensional and large-scale anomaly detection using a linear one-class SVM with deep learning](https://www.sciencedirect.com/science/article/abs/pii/S0031320316300267) - Pattern Recognition 2018.
 82 | 
 83 | - [Optimal single-class classification strategies](https://papers.nips.cc/paper/2987-optimal-single-class-classification-strategies.pdf) - NIPS 2007
 84 | 
 85 | - [Deep One-Class Classification](http://proceedings.mlr.press/v80/ruff18a/ruff18a.pdf) - ICML 2018.
 86 | 
 87 | ### Energy-based
 88 | 
 89 | - [Deep structured energy based models for anomaly detection](https://arxiv.org/pdf/1605.07717.pdf) - ICML 2016
 90 | 
 91 | ### Time series
 92 | 
 93 | - [A Generalized Student-t Based Approach to Mixed-Type Anomaly Detection](http://www.nvc.cs.vt.edu/~ctlu/Publication/2013/AAAI-Lu-2013.pdf) - AAAI 2013
 94 | 
 95 | - [Stochastic Online Anomaly Analysis for Streaming Time Series](https://www.ijcai.org/proceedings/2017/0445.pdf) - IJCAI 2017
 96 | 
 97 | - [Long short term memory networks for anmomaly detection in time series](https://www.elen.ucl.ac.be/Proceedings/esann/esannpdf/es2015-56.pdf)
 98 | 
 99 | - [LSTM-based Encoder-Decoder for Multi-sensor Anomaly Detection](https://arxiv.org/pdf/1607.00148.pdf) - ICML 2016 Workshop.
100 | 
101 | ### Interpretation
102 | 
103 | - [Contextual Outlier Interpretation](https://www.ijcai.org/proceedings/2018/0341.pdf) -IJCAI 2018
104 | 
105 | ### Evaulation Metrics
106 | 
107 | - [Precision and Recall for Time Series](http://papers.nips.cc/paper/7462-precision-and-recall-for-time-series.pdf) - NIPS 2018
108 | 
109 | ### Geometric transformation
110 | 
111 | - [Deep Anomaly Detection Using Geometric Transformations](https://arxiv.org/pdf/1805.10917.pdf) - NIPS 2018
112 | 
113 | 
114 | ### FeedBack
115 | - [Incorporating Feedback into Tree-based Anomaly Detection](https://github.com/ai/size-limit) - KDD 2017 Workshop on Interactive Data Exploration and Analytics.
116 | 
117 | - [Feedback-Guided Anomaly Discovery via Online Optimization](http://web.engr.oregonstate.edu/~afern/papers/kdd18-siddiqui.pdf) - KDD 2018.
118 | 
119 | ## Anomaly Detection Applications
120 | 
121 | ### KPI
122 | - [Unsupervised Anomaly Detection via Variational Auto-Encoder for Seasonal KPIs in Web Applications](https://arxiv.org/pdf/1802.03903) - WWW 2018.
123 | ### Log
124 | 
125 | - [DeepLog: Anomaly Detection and Diagnosis from System Logs through Deep Learning](https://acmccs.github.io/papers/p1285-duA.pdf) - CCS 2017. 
126 | 
127 | - [Mining Invariants from Logs for System Problem Detection](https://www.usenix.org/legacy/event/atc10/tech/slides/lou.pdf) - USENIX 2010
128 | 
129 | 
130 | ## Survey
131 | 
132 | - [Anomaly detection in dynamic networks: a survey](https://onlinelibrary.wiley.com/doi/pdf/10.1002/wics.1347)
133 | 
134 | - [Anomaly Detection : A Survey](http://cucis.ece.northwestern.edu/projects/DMS/publications/AnomalyDetection.pdf)
135 | 
136 | - [A Survey of Recent Trends in One Class Classification](https://link.springer.com/chapter/10.1007/978-3-642-17080-5_21) 
137 | 
138 | - [A survey on unsupervised outlier detection in high‐dimensional numerical data](https://onlinelibrary.wiley.com/doi/abs/10.1002/sam.11161)
139 | 
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 
146 | 
147 | 
148 | 
149 | 


--------------------------------------------------------------------------------