├── .gitignore
├── LICENSE
├── README.md
├── data_loader.py
├── data_loader_test.py
├── discrete_model.py
├── dsr_model.py
├── loss.py
├── perlin.py
├── requirements.txt
├── test_dsr.py
└── train_dsr.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## DSR - A dual subspace re-projection network for surface anomaly detection
 2 | 
 3 | This repository contains the implementation of **DSR** as proposed in the paper **_DSR -- A dual subspace re-projection network for surface anomaly detection_**
 4 | 
 5 | ### Requirements
 6 | You can create a conda environment with: 
 7 | **conda create --name <env> --file requirements.txt**
 8 | 
 9 | But the main packages used are:
10 | 
11 | PyTorch 1.11, opencv-python, sklearn, numpy
12 | 
13 | 
14 | ### DSR evaluation on MVTec
15 | #### Downloading the MVTec benchmark
16 | Set up the MVTec AD benchmark by downloading it from: <https://www.mvtec.com/company/research/datasets/mvtec-ad>,
17 | and extracting it. For training and evaluation pass the base directory of the extracted files (for example ./data/mvtec/) 
18 | as the **--data_path** argument.
19 | 
20 | #### Download the pretrained models
21 | Download the pretrained models and extract the zip so that the checkpoints folder will be located in the base directory of this repository.
22 | 
23 | Download link: <https://drive.google.com/file/d/15plhikrUjYCcx23JVxxBKb-HBwKAb8UK/view?usp=sharing>
24 | 
25 | 
26 | #### Running the evaluation
27 | ```shell
28 | #BASE_PATH -- the base directory of mvtec
29 | #i -- the gpu id used for evaluation
30 | python test_dsr.py $i $BASE_PATH DSR
31 | ```
32 | 
33 | ### Training DSR
34 | ```shell
35 | #BASE_PATH -- the base directory of mvtec
36 | #OUT_PATH -- where the trained models will be saved
37 | #i -- the index of the object class in the obj_batch list in train_dsr.py
38 | python train_dsr.py --gpu_id 0 --obj_id $i --lr 0.0002 --bs 8 --epochs 100 --data_path $BASE_PATH --out_path $OUT_PATH
39 | ```
40 | 


--------------------------------------------------------------------------------
/data_loader.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from torch.utils.data import Dataset, DataLoader
  3 | import torch
  4 | import cv2
  5 | import glob
  6 | import imgaug.augmenters as iaa
  7 | from perlin import rand_perlin_2d_np
  8 | 
  9 | class TrainImageOnlyDataset(Dataset):
 10 | 
 11 |     def __init__(self, root_dir, resize_shape=None, ):
 12 |         self.root_dir = root_dir
 13 |         self.resize_shape=resize_shape
 14 | 
 15 |         self.images_f = sorted(glob.glob(root_dir+"/*.png"))
 16 |         self.images = np.zeros((len(self.images_f),self.resize_shape[0],self.resize_shape[1],3))
 17 | 
 18 |         for i,img_path in enumerate(self.images_f):
 19 |             img = cv2.imread(img_path)
 20 |             img = cv2.resize(img, dsize=(self.resize_shape[1], self.resize_shape[0]))
 21 |             self.images[i]=img
 22 | 
 23 |     def __len__(self):
 24 |         # arbitrary number- each iteration is sampled in __getitem__
 25 |         return 8000
 26 | 
 27 | 
 28 |     def transform_image(self, image):
 29 |         image = image / 255.0
 30 |         image = np.array(image).reshape((image.shape[0], image.shape[1], 3)).astype(np.float32)
 31 |         image = np.transpose(image, (2, 0, 1))
 32 |         return image
 33 | 
 34 | 
 35 |     def __getitem__(self, idx):
 36 |         new_idx = torch.randint(0, len(self.images), (1,)).numpy()[0]
 37 |         image = self.transform_image(self.images[new_idx])
 38 |         sample = {'image': image, 'idx': new_idx}
 39 |         return sample
 40 | 
 41 | 
 42 | class TrainWholeImageDataset(Dataset):
 43 | 
 44 |     def __init__(self, root_dir, resize_shape=None, perlin_augment=False):
 45 |         self.root_dir = root_dir
 46 |         self.perlin_augment = perlin_augment
 47 | 
 48 |         self.resize_shape=resize_shape
 49 | 
 50 |         self.images_f = sorted(glob.glob(root_dir+"/*.png"))
 51 |         self.images = np.zeros((len(self.images_f),self.resize_shape[0],self.resize_shape[1],3))
 52 | 
 53 |         for i,img_path in enumerate(self.images_f):
 54 |             img = cv2.imread(img_path)
 55 |             img = cv2.resize(img, dsize=(self.resize_shape[1], self.resize_shape[0]))
 56 |             self.images[i]=img
 57 | 
 58 | 
 59 |         self.orig_augment = iaa.Sequential([
 60 |                       iaa.Affine(rotate=(-90, 90))
 61 |                       ])
 62 | 
 63 |         self.rot = iaa.Sequential([iaa.Affine(rotate=(-90, 90))])
 64 | 
 65 |     def __len__(self):
 66 |         # arbitrary number- each iteration is sampled in __getitem__
 67 |         return 8000
 68 | 
 69 | 
 70 |     def transform_image(self, image):
 71 |         if self.perlin_augment:
 72 |             do_aug_orig = torch.rand(1).numpy()[0] > 0.6
 73 |             if do_aug_orig:
 74 |                 image = self.orig_augment(image=image)
 75 |         image = image / 255.0
 76 |         image = np.array(image).reshape((image.shape[0], image.shape[1], 3)).astype(np.float32)
 77 |         image = np.transpose(image, (2, 0, 1))
 78 |         return image
 79 | 
 80 | 
 81 |     def __getitem__(self, idx):
 82 |         if torch.is_tensor(idx):
 83 |             idx = idx.tolist()
 84 | 
 85 |         new_idx = torch.randint(0, len(self.images), (1,)).numpy()[0]
 86 |         image = self.transform_image(self.images[new_idx])
 87 |         has_anomaly = np.array([0], dtype=np.float32)
 88 | 
 89 |         min_perlin_scale = 0
 90 |         perlin_scale = 6
 91 |         perlin_scalex = 2 ** (torch.randint(min_perlin_scale, perlin_scale, (1,)).numpy()[0])
 92 |         perlin_scaley = 2 ** (torch.randint(min_perlin_scale, perlin_scale, (1,)).numpy()[0])
 93 |         threshold = 0.5
 94 |         perlin_noise_np = rand_perlin_2d_np((self.resize_shape[0], self.resize_shape[1]),
 95 |                                             (perlin_scalex, perlin_scaley))
 96 |         perlin_noise_np = self.rot(image=perlin_noise_np)
 97 |         perlin_thr = np.where(perlin_noise_np > threshold, np.ones_like(perlin_noise_np),
 98 |                               np.zeros_like(perlin_noise_np))
 99 |         perlin_thr = torch.from_numpy(perlin_thr)
100 |         perlin_thr = perlin_thr.unsqueeze(0)
101 |         no_anomaly = torch.rand(1).numpy()[0] > 0.5
102 |         if no_anomaly:
103 |             perlin_thr = perlin_thr * 0
104 | 
105 |         sample = {'image': image, 'mask': perlin_thr, 'is_normal': has_anomaly, 'idx': new_idx}
106 | 
107 |         return sample
108 | 
109 | class MVTecImageAnomTrainDataset(Dataset):
110 | 
111 |     def __init__(self, root_dir, resize_shape=None):
112 |         self.root_dir = root_dir
113 |         self.resize_shape=resize_shape
114 | 
115 |         self.images_f = sorted(glob.glob(root_dir+"/*.png"))
116 |         self.images = np.zeros((len(self.images_f),self.resize_shape[0],self.resize_shape[1],3), dtype=np.uint8)
117 | 
118 |         for i,img_path in enumerate(self.images_f):
119 |             img = cv2.imread(img_path)
120 |             img = cv2.resize(img, dsize=(self.resize_shape[1], self.resize_shape[0]))
121 |             self.images[i]=img.astype(np.uint8)
122 | 
123 | 
124 | 
125 |         self.augmenters = [iaa.GammaContrast((0.5,2.0),per_channel=True),
126 |                       iaa.MultiplyAndAddToBrightness(mul=(0.8,1.2),add=(-30,30)),
127 |                       iaa.pillike.EnhanceSharpness(),
128 |                       iaa.AddToHueAndSaturation((-50,50),per_channel=True),
129 |                       iaa.Solarize(0.5, threshold=(32,128)),
130 |                       iaa.Posterize(),
131 |                       iaa.Invert(),
132 |                       iaa.pillike.Autocontrast(),
133 |                       iaa.pillike.Equalize(),
134 |                       iaa.Affine(rotate=(-45, 45))
135 |                       ]
136 | 
137 |         self.rot = iaa.Sequential([iaa.Affine(rotate=(-90, 90))])
138 | 
139 | 
140 |     def __len__(self):
141 |         return 8000
142 | 
143 |     def augment_image(self, image):
144 |         perlin_scale = 6
145 |         min_perlin_scale = 0
146 | 
147 |         img_augmented = np.ones_like(image)
148 |         chosen_color = np.random.rand(1,1,3)
149 |         img_augmented = img_augmented * chosen_color * 255
150 | 
151 |         perlin_scalex = 2 ** (torch.randint(min_perlin_scale, perlin_scale, (1,)).numpy()[0])
152 |         perlin_scaley = 2 ** (torch.randint(min_perlin_scale, perlin_scale, (1,)).numpy()[0])
153 | 
154 |         perlin_noise = rand_perlin_2d_np((self.resize_shape[0], self.resize_shape[1]), (perlin_scalex, perlin_scaley))
155 |         perlin_noise = self.rot(image=perlin_noise)
156 |         threshold = 0.5
157 |         perlin_thr = np.where(perlin_noise > threshold, np.ones_like(perlin_noise), np.zeros_like(perlin_noise))
158 |         perlin_thr = np.expand_dims(perlin_thr, axis=2)
159 | 
160 |         img_thr = img_augmented.astype(np.float32) * perlin_thr / 255.0
161 | 
162 |         beta = torch.rand(1).numpy()[0] * 0.8
163 | 
164 |         augmented_image = image * (1 - perlin_thr) + (1 - beta) * img_thr + beta * image * (
165 |             perlin_thr)
166 | 
167 |         no_anomaly = torch.rand(1).numpy()[0]
168 |         if no_anomaly > 0.5:
169 |             image = image.astype(np.float32)
170 |             return image, np.zeros_like(perlin_thr, dtype=np.float32), np.array([0.0],dtype=np.float32)
171 |         else:
172 |             augmented_image = augmented_image.astype(np.float32)
173 |             msk = (perlin_thr).astype(np.float32)
174 |             augmented_image = msk * augmented_image + (1-msk)*image
175 |             has_anomaly = 1.0
176 |             if np.sum(msk) == 0:
177 |                 has_anomaly=0.0
178 |             return augmented_image, msk, np.array([has_anomaly],dtype=np.float32)
179 | 
180 |     def transform_image(self, image):
181 |         do_aug_orig = torch.rand(1).numpy()[0] > 0.7
182 |         if do_aug_orig:
183 |             image = self.rot(image=image)
184 | 
185 |         image = np.array(image).reshape((image.shape[0], image.shape[1], image.shape[2])).astype(np.float32) / 255.0
186 |         augmented_image, anomaly_mask, has_anomaly = self.augment_image(image)
187 |         augmented_image = np.transpose(augmented_image, (2, 0, 1))
188 |         image = np.transpose(image, (2, 0, 1))
189 |         anomaly_mask = np.transpose(anomaly_mask, (2, 0, 1))
190 |         return image, augmented_image, anomaly_mask, has_anomaly
191 | 
192 |     def __getitem__(self, idx):
193 |         idx = torch.randint(0, len(self.images), (1,)).item()
194 |         image, augmented_image, anomaly_mask, has_anomaly = self.transform_image(self.images[idx])
195 |         sample = {'image': image, "anomaly_mask": anomaly_mask, 'augmented_image': augmented_image, 'has_anomaly': has_anomaly, 'idx': idx}
196 | 
197 |         return sample


--------------------------------------------------------------------------------
/data_loader_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from torch.utils.data import Dataset, DataLoader
 3 | import torch
 4 | import cv2
 5 | import glob
 6 | import imgaug.augmenters as iaa
 7 | from perlin import rand_perlin_2d_np
 8 | import os
 9 | 
10 | class TestMVTecDataset(Dataset):
11 | 
12 |     def __init__(self, root_dir, resize_shape=None):
13 |         self.root_dir = root_dir
14 | 
15 |         self.images = sorted(glob.glob(root_dir+"/*/*.png"))
16 |         self.resize_shape=resize_shape
17 | 
18 |     def __len__(self):
19 |         return len(self.images)
20 | 
21 | 
22 |     def transform_image(self, image_path, mask_path):
23 |         image = cv2.imread(image_path, cv2.IMREAD_COLOR)
24 |         if mask_path is not None:
25 |             mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
26 |         else:
27 |             mask = np.zeros((image.shape[0],image.shape[1]))
28 |         if self.resize_shape != None:
29 |             #h, w
30 |             image = cv2.resize(image, dsize=(self.resize_shape[1], self.resize_shape[0]))
31 |             mask = cv2.resize(mask, dsize=(self.resize_shape[1], self.resize_shape[0]))
32 | 
33 |         image = image / 255.0
34 |         mask = mask / 255.0
35 | 
36 |         image = np.array(image).reshape((image.shape[0], image.shape[1], 3)).astype(np.float32)
37 |         mask = np.array(mask).reshape((mask.shape[0], mask.shape[1], 1)).astype(np.float32)
38 | 
39 |         image = np.transpose(image, (2, 0, 1))
40 |         mask = np.transpose(mask, (2, 0, 1))
41 |         return image, mask
42 | 
43 |     def __getitem__(self, idx):
44 |         if torch.is_tensor(idx):
45 |             idx = idx.tolist()
46 | 
47 |         img_path = self.images[idx]
48 |         dir_path, file_name = os.path.split(img_path)
49 |         # good, crack, scratch, etc.
50 |         base_dir = os.path.basename(dir_path)
51 |         if base_dir == 'good':
52 |             image, mask = self.transform_image(img_path, None)
53 |             has_anomaly = np.array([0], dtype=np.float32)
54 |         else:
55 |             mask_path = os.path.join(dir_path, '../../ground_truth/')
56 |             mask_path = os.path.join(mask_path, base_dir)
57 |             mask_file_name = file_name.split(".")[0]+"_mask.png"
58 |             mask_path = os.path.join(mask_path, mask_file_name)
59 |             image, mask = self.transform_image(img_path, mask_path)
60 |             has_anomaly = np.array([1], dtype=np.float32)
61 | 
62 | 
63 |         sample = {'image': image, 'has_anomaly': has_anomaly,'mask': mask, 'idx': idx}
64 | 
65 |         return sample
66 | 


--------------------------------------------------------------------------------
/discrete_model.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | class VectorQuantizerEMA(nn.Module):
  7 |     # Source for the VectorQuantizerEMA module: https://github.com/zalandoresearch/pytorch-vq-vae
  8 |     def __init__(self, num_embeddings, embedding_dim, commitment_cost, decay, epsilon=1e-5):
  9 |         super(VectorQuantizerEMA, self).__init__()
 10 | 
 11 |         self._embedding_dim = embedding_dim
 12 |         self._num_embeddings = num_embeddings
 13 | 
 14 |         self._embedding = nn.Embedding(self._num_embeddings, self._embedding_dim)
 15 |         self._embedding.weight.data.normal_()
 16 |         self._commitment_cost = commitment_cost
 17 | 
 18 |         self.register_buffer('_ema_cluster_size', torch.zeros(num_embeddings))
 19 |         self._ema_w = nn.Parameter(torch.Tensor(num_embeddings, self._embedding_dim))
 20 |         self._ema_w.data.normal_()
 21 | 
 22 |         self._decay = decay
 23 |         self._epsilon = epsilon
 24 | 
 25 |     def get_quantized(self, inputs):
 26 |         inputs = inputs.permute(0, 2, 3, 1).contiguous()
 27 |         input_shape = inputs.shape
 28 | 
 29 |         # Flatten input
 30 |         flat_input = inputs.view(-1, self._embedding_dim)
 31 | 
 32 |         # Calculate distances
 33 |         distances = (torch.sum(flat_input ** 2, dim=1, keepdim=True)
 34 |                      + torch.sum(self._embedding.weight ** 2, dim=1)
 35 |                      - 2 * torch.matmul(flat_input, self._embedding.weight.t()))
 36 | 
 37 |         # Encoding
 38 |         encoding_indices = torch.argmin(distances, dim=1).unsqueeze(1)
 39 |         encodings = torch.zeros(encoding_indices.shape[0], self._num_embeddings, device=inputs.device)
 40 |         encodings.scatter_(1, encoding_indices, 1)
 41 | 
 42 |         # Quantize and unflatten
 43 |         quantized = torch.matmul(encodings, self._embedding.weight).view(input_shape)
 44 |         quantized = inputs + (quantized - inputs).detach()
 45 | 
 46 |         return quantized.permute(0, 3, 1, 2).contiguous()
 47 | 
 48 |     def forward(self, inputs):
 49 |         # convert inputs from BCHW -> BHWC
 50 |         inputs = inputs.permute(0, 2, 3, 1).contiguous()
 51 |         input_shape = inputs.shape
 52 | 
 53 |         # Flatten input
 54 |         flat_input = inputs.view(-1, self._embedding_dim)
 55 | 
 56 |         # Calculate distances
 57 |         distances = (torch.sum(flat_input ** 2, dim=1, keepdim=True)
 58 |                      + torch.sum(self._embedding.weight ** 2, dim=1)
 59 |                      - 2 * torch.matmul(flat_input, self._embedding.weight.t()))
 60 | 
 61 |         # Encoding
 62 |         encoding_indices = torch.argmin(distances, dim=1).unsqueeze(1)
 63 |         encodings = torch.zeros(encoding_indices.shape[0], self._num_embeddings, device=inputs.device)
 64 |         encodings.scatter_(1, encoding_indices, 1)
 65 | 
 66 |         # Quantize and unflatten
 67 |         quantized = torch.matmul(encodings, self._embedding.weight).view(input_shape)
 68 | 
 69 |         # Use EMA to update the embedding vectors
 70 |         if self.training:
 71 |             self._ema_cluster_size = self._ema_cluster_size * self._decay + \
 72 |                                      (1 - self._decay) * torch.sum(encodings, 0)
 73 | 
 74 |             # Laplace smoothing of the cluster size
 75 |             n = torch.sum(self._ema_cluster_size.data)
 76 |             self._ema_cluster_size = (
 77 |                     (self._ema_cluster_size + self._epsilon)
 78 |                     / (n + self._num_embeddings * self._epsilon) * n)
 79 | 
 80 |             dw = torch.matmul(encodings.t(), flat_input)
 81 |             self._ema_w = nn.Parameter(self._ema_w * self._decay + (1 - self._decay) * dw)
 82 | 
 83 |             self._embedding.weight = nn.Parameter(self._ema_w / self._ema_cluster_size.unsqueeze(1))
 84 | 
 85 |         # Loss
 86 |         e_latent_loss = F.mse_loss(quantized.detach(), inputs)
 87 |         loss = self._commitment_cost * e_latent_loss
 88 | 
 89 |         # Straight Through Estimator
 90 |         quantized = inputs + (quantized - inputs).detach()
 91 |         avg_probs = torch.mean(encodings, dim=0)
 92 |         perplexity = torch.exp(-torch.sum(avg_probs * torch.log(avg_probs + 1e-10)))
 93 | 
 94 |         # convert quantized from BHWC -> BCHW
 95 |         return loss, quantized.permute(0, 3, 1, 2).contiguous(), perplexity, encodings
 96 | 
 97 | 
 98 | 
 99 | class Residual(nn.Module):
100 |     def __init__(self, in_channels, num_hiddens, num_residual_hiddens):
101 |         super(Residual, self).__init__()
102 |         self._block = nn.Sequential(
103 |             nn.ReLU(True),
104 |             nn.Conv2d(in_channels=in_channels,
105 |                       out_channels=num_residual_hiddens,
106 |                       kernel_size=3, stride=1, padding=1, bias=False),
107 |             nn.ReLU(True),
108 |             nn.Conv2d(in_channels=num_residual_hiddens,
109 |                       out_channels=num_hiddens,
110 |                       kernel_size=1, stride=1, bias=False)
111 |         )
112 | 
113 |     def forward(self, x):
114 |         return x + self._block(x)
115 | 
116 | 
117 | class ResidualStack(nn.Module):
118 |     def __init__(self, in_channels, num_hiddens, num_residual_layers, num_residual_hiddens):
119 |         super(ResidualStack, self).__init__()
120 |         self._num_residual_layers = num_residual_layers
121 |         self._layers = nn.ModuleList([Residual(in_channels, num_hiddens, num_residual_hiddens)
122 |                                       for _ in range(self._num_residual_layers)])
123 | 
124 |     def forward(self, x):
125 |         for i in range(self._num_residual_layers):
126 |             x = self._layers[i](x)
127 |         return F.relu(x)
128 | 
129 | 
130 | class EncoderBot(nn.Module):
131 |     def __init__(self, in_channels, num_hiddens, num_residual_layers, num_residual_hiddens):
132 |         super(EncoderBot, self).__init__()
133 | 
134 |         self._conv_1 = nn.Conv2d(in_channels=in_channels,
135 |                                  out_channels=num_hiddens // 2,
136 |                                  kernel_size=4,
137 |                                  stride=2, padding=1)
138 |         self._conv_2 = nn.Conv2d(in_channels=num_hiddens // 2,
139 |                                  out_channels=num_hiddens,
140 |                                  kernel_size=4,
141 |                                  stride=2, padding=1)
142 |         self._conv_3 = nn.Conv2d(in_channels=num_hiddens,
143 |                                  out_channels=num_hiddens,
144 |                                  kernel_size=3,
145 |                                  stride=1, padding=1)
146 |         self._residual_stack = ResidualStack(in_channels=num_hiddens,
147 |                                              num_hiddens=num_hiddens,
148 |                                              num_residual_layers=num_residual_layers,
149 |                                              num_residual_hiddens=num_residual_hiddens)
150 | 
151 |     def forward(self, inputs):
152 |         x = self._conv_1(inputs)
153 |         x = F.relu(x)
154 | 
155 |         x = self._conv_2(x)
156 |         x = F.relu(x)
157 | 
158 |         x = self._conv_3(x)
159 |         return self._residual_stack(x)
160 | 
161 | 
162 | class EncoderTop(nn.Module):
163 |     def __init__(self, in_channels, num_hiddens, num_residual_layers, num_residual_hiddens):
164 |         super(EncoderTop, self).__init__()
165 | 
166 |         self._conv_1 = nn.Conv2d(in_channels=in_channels,
167 |                                  out_channels=num_hiddens,
168 |                                  kernel_size=4,
169 |                                  stride=2, padding=1)
170 |         self._conv_2 = nn.Conv2d(in_channels=num_hiddens,
171 |                                  out_channels=num_hiddens,
172 |                                  kernel_size=3,
173 |                                  stride=1, padding=1)
174 |         self._residual_stack = ResidualStack(in_channels=num_hiddens,
175 |                                              num_hiddens=num_hiddens,
176 |                                              num_residual_layers=num_residual_layers,
177 |                                              num_residual_hiddens=num_residual_hiddens)
178 | 
179 |     def forward(self, inputs):
180 |         x = self._conv_1(inputs)
181 |         x = F.relu(x)
182 | 
183 |         x = self._conv_2(x)
184 |         x = F.relu(x)
185 | 
186 |         x = self._residual_stack(x)
187 |         return x
188 | 
189 | 
190 | class DecoderBot(nn.Module):
191 |     def __init__(self, in_channels, num_hiddens, num_residual_layers, num_residual_hiddens):
192 |         super(DecoderBot, self).__init__()
193 | 
194 |         self._conv_1 = nn.Conv2d(in_channels=in_channels,
195 |                                  out_channels=num_hiddens,
196 |                                  kernel_size=3,
197 |                                  stride=1, padding=1)
198 | 
199 |         self._residual_stack = ResidualStack(in_channels=num_hiddens,
200 |                                              num_hiddens=num_hiddens,
201 |                                              num_residual_layers=num_residual_layers,
202 |                                              num_residual_hiddens=num_residual_hiddens)
203 | 
204 |         self._conv_trans_1 = nn.ConvTranspose2d(in_channels=num_hiddens,
205 |                                                 out_channels=num_hiddens // 2,
206 |                                                 kernel_size=4,
207 |                                                 stride=2, padding=1)
208 | 
209 |         self._conv_trans_2 = nn.ConvTranspose2d(in_channels=num_hiddens // 2,
210 |                                                 out_channels=3,
211 |                                                 kernel_size=4,
212 |                                                 stride=2, padding=1)
213 | 
214 |     def forward(self, inputs):
215 |         x = self._conv_1(inputs)
216 | 
217 |         x = self._residual_stack(x)
218 | 
219 |         x = self._conv_trans_1(x)
220 |         x = F.relu(x)
221 | 
222 |         return self._conv_trans_2(x)
223 | 
224 | class DiscreteLatentModel(nn.Module):
225 |     def __init__(self, num_hiddens, num_residual_layers, num_residual_hiddens, num_embeddings, embedding_dim,
226 |                  commitment_cost, decay=0, test=False):
227 |         # def __init__(self, num_embeddings=512, embedding_dim=128, commitment_cost=0.25, decay=0):
228 |         super(DiscreteLatentModel, self).__init__()
229 |         self.test = test
230 |         self._encoder_t = EncoderTop(num_hiddens, num_hiddens,
231 |                                      num_residual_layers,
232 |                                      num_residual_hiddens)
233 | 
234 |         self._encoder_b = EncoderBot(3, num_hiddens,
235 |                                      num_residual_layers,
236 |                                      num_residual_hiddens)
237 | 
238 |         self._pre_vq_conv_bot = nn.Conv2d(in_channels=num_hiddens + embedding_dim,
239 |                                           out_channels=embedding_dim,
240 |                                           kernel_size=1,
241 |                                           stride=1)
242 | 
243 |         self._pre_vq_conv_top = nn.Conv2d(in_channels=num_hiddens,
244 |                                           out_channels=embedding_dim,
245 |                                           kernel_size=1,
246 |                                           stride=1)
247 | 
248 |         self._vq_vae_top = VectorQuantizerEMA(num_embeddings, embedding_dim,
249 |                                               commitment_cost, decay)
250 | 
251 |         self._vq_vae_bot = VectorQuantizerEMA(num_embeddings, embedding_dim,
252 |                                               commitment_cost, decay)
253 | 
254 |         self._decoder_b = DecoderBot(embedding_dim*2,
255 |                                      num_hiddens,
256 |                                      num_residual_layers,
257 |                                      num_residual_hiddens)
258 | 
259 | 
260 |         self.upsample_t = nn.ConvTranspose2d(
261 |             embedding_dim, embedding_dim, 4, stride=2, padding=1
262 |         )
263 | 
264 | 
265 |     def forward(self, x):
266 |         #Encoder Hi
267 |         enc_b = self._encoder_b(x)
268 | 
269 |         #Encoder Lo -- F_Lo
270 |         enc_t = self._encoder_t(enc_b)
271 |         zt = self._pre_vq_conv_top(enc_t)
272 | 
273 |         # Quantize F_Lo with K_Lo
274 |         loss_t, quantized_t, perplexity_t, encodings_t = self._vq_vae_top(zt)
275 |         # Upsample Q_Lo
276 |         up_quantized_t = self.upsample_t(quantized_t)
277 | 
278 |         # Concatenate and transform the output of Encoder_Hi and upsampled Q_lo -- F_Hi
279 |         feat = torch.cat((enc_b, up_quantized_t), dim=1)
280 |         zb = self._pre_vq_conv_bot(feat)
281 | 
282 |         # Quantize F_Hi with K_Hi
283 |         loss_b, quantized_b, perplexity_b, encodings_b = self._vq_vae_bot(zb)
284 | 
285 |         # Concatenate Q_Hi and Q_Lo and input it into the General appearance decoder
286 |         quant_join = torch.cat((up_quantized_t, quantized_b), dim=1)
287 |         recon_fin = self._decoder_b(quant_join)
288 | 
289 |         #return loss_b, loss_t, recon_fin, encodings_t, encodings_b, quantized_t, quantized_b
290 |         return loss_b, loss_t, recon_fin, quantized_t, quantized_b


--------------------------------------------------------------------------------
/dsr_model.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch
  3 | import torch.nn.functional as F
  4 | 
  5 | class SubspaceRestrictionModule(nn.Module):
  6 |     def __init__(self, embedding_size=64):
  7 |         super(SubspaceRestrictionModule, self).__init__()
  8 | 
  9 |         base_width = embedding_size
 10 |         self.unet = SubspaceRestrictionNetwork(in_channels=base_width, out_channels=base_width, base_width=embedding_size)
 11 | 
 12 |     def forward(self, x, quantization):
 13 |         x = self.unet(x)
 14 |         loss_b, quantized_b, perplexity_b, encodings_b = quantization(x)
 15 |         return x, quantized_b, loss_b
 16 | 
 17 | class SubspaceRestrictionNetwork(nn.Module):
 18 |     def __init__(self, in_channels=64, out_channels=64, base_width=64):
 19 |         super().__init__()
 20 |         self.base_width = base_width
 21 |         self.encoder = FeatureEncoder(in_channels, self.base_width)
 22 |         self.decoder = FeatureDecoder(self.base_width, out_channels=out_channels)
 23 | 
 24 |     def forward(self, x):
 25 |         b1, b2, b3 = self.encoder(x)
 26 |         output = self.decoder(b1, b2, b3)
 27 |         return output
 28 | 
 29 | class FeatureEncoder(nn.Module):
 30 |     def __init__(self, in_channels, base_width):
 31 |         super().__init__()
 32 |         self.block1 = nn.Sequential(
 33 |             nn.Conv2d(in_channels, base_width, kernel_size=3, padding=1),
 34 |             nn.InstanceNorm2d(base_width),
 35 |             nn.ReLU(inplace=True),
 36 |             nn.Conv2d(base_width, base_width, kernel_size=3, padding=1),
 37 |             nn.InstanceNorm2d(base_width),
 38 |             nn.ReLU(inplace=True))
 39 |         self.mp1 = nn.Sequential(nn.MaxPool2d(2))
 40 |         self.block2 = nn.Sequential(
 41 |             nn.Conv2d(base_width, base_width * 2, kernel_size=3, padding=1),
 42 |             nn.InstanceNorm2d(base_width * 2),
 43 |             nn.ReLU(inplace=True),
 44 |             nn.Conv2d(base_width * 2, base_width * 2, kernel_size=3, padding=1),
 45 |             nn.InstanceNorm2d(base_width * 2),
 46 |             nn.ReLU(inplace=True))
 47 |         self.mp2 = nn.Sequential(nn.MaxPool2d(2))
 48 |         self.block3 = nn.Sequential(
 49 |             nn.Conv2d(base_width * 2, base_width * 4, kernel_size=3, padding=1),
 50 |             nn.InstanceNorm2d(base_width * 4),
 51 |             nn.ReLU(inplace=True),
 52 |             nn.Conv2d(base_width * 4, base_width * 4, kernel_size=3, padding=1),
 53 |             nn.InstanceNorm2d(base_width * 4),
 54 |             nn.ReLU(inplace=True))
 55 | 
 56 |     def forward(self, x):
 57 |         b1 = self.block1(x)
 58 |         mp1 = self.mp1(b1)
 59 |         b2 = self.block2(mp1)
 60 |         mp2 = self.mp2(b2)
 61 |         b3 = self.block3(mp2)
 62 |         return b1, b2, b3
 63 | 
 64 | class FeatureDecoder(nn.Module):
 65 |     def __init__(self, base_width, out_channels=1):
 66 |         super().__init__()
 67 | 
 68 |         self.up2 = nn.Sequential(nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
 69 |                                  nn.Conv2d(base_width * 4, base_width * 2, kernel_size=3, padding=1),
 70 |                                  nn.InstanceNorm2d(base_width * 2),
 71 |                                  nn.ReLU(inplace=True))
 72 | 
 73 |         self.db2 = nn.Sequential(
 74 |             nn.Conv2d(base_width * 2, base_width * 2, kernel_size=3, padding=1),
 75 |             nn.InstanceNorm2d(base_width * 2),
 76 |             nn.ReLU(inplace=True),
 77 |             nn.Conv2d(base_width * 2, base_width * 2, kernel_size=3, padding=1),
 78 |             nn.InstanceNorm2d(base_width * 2),
 79 |             nn.ReLU(inplace=True)
 80 |         )
 81 | 
 82 |         self.up3 = nn.Sequential(nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
 83 |                                  nn.Conv2d(base_width * 2, base_width, kernel_size=3, padding=1),
 84 |                                  nn.InstanceNorm2d(base_width),
 85 |                                  nn.ReLU(inplace=True))
 86 |         self.db3 = nn.Sequential(
 87 |             nn.Conv2d(base_width, base_width, kernel_size=3, padding=1),
 88 |             nn.InstanceNorm2d(base_width),
 89 |             nn.ReLU(inplace=True),
 90 |             nn.Conv2d(base_width, base_width, kernel_size=3, padding=1),
 91 |             nn.InstanceNorm2d(base_width),
 92 |             nn.ReLU(inplace=True)
 93 |         )
 94 | 
 95 |         self.fin_out = nn.Sequential(nn.Conv2d(base_width, out_channels, kernel_size=3, padding=1))
 96 | 
 97 |     def forward(self, b1, b2, b3):
 98 |         up2 = self.up2(b3)
 99 |         db2 = self.db2(up2)
100 | 
101 |         up3 = self.up3(db2)
102 |         db3 = self.db3(up3)
103 | 
104 |         out = self.fin_out(db3)
105 |         return out
106 | 
107 | class Residual(nn.Module):
108 |     def __init__(self, in_channels, num_hiddens, num_residual_hiddens):
109 |         super(Residual, self).__init__()
110 |         self._block = nn.Sequential(
111 |             nn.ReLU(True),
112 |             nn.Conv2d(in_channels=in_channels,
113 |                       out_channels=num_residual_hiddens,
114 |                       kernel_size=3, stride=1, padding=1, bias=False),
115 |             nn.ReLU(True),
116 |             nn.Conv2d(in_channels=num_residual_hiddens,
117 |                       out_channels=num_hiddens,
118 |                       kernel_size=1, stride=1, bias=False)
119 |         )
120 | 
121 |     def forward(self, x):
122 |         return x + self._block(x)
123 | 
124 | 
125 | class ResidualStack(nn.Module):
126 |     def __init__(self, in_channels, num_hiddens, num_residual_layers, num_residual_hiddens):
127 |         super(ResidualStack, self).__init__()
128 |         self._num_residual_layers = num_residual_layers
129 |         self._layers = nn.ModuleList([Residual(in_channels, num_hiddens, num_residual_hiddens)
130 |                                       for _ in range(self._num_residual_layers)])
131 | 
132 |     def forward(self, x):
133 |         for i in range(self._num_residual_layers):
134 |             x = self._layers[i](x)
135 |         return F.relu(x)
136 | 
137 | 
138 | class ImageReconstructionNetwork(nn.Module):
139 |     def __init__(self, in_channels, num_hiddens, num_residual_layers, num_residual_hiddens):
140 |         super(ImageReconstructionNetwork, self).__init__()
141 |         norm_layer = nn.InstanceNorm2d
142 |         self.block1 = nn.Sequential(
143 |             nn.Conv2d(in_channels, in_channels, kernel_size=3, padding=1),
144 |             norm_layer(in_channels),
145 |             nn.ReLU(inplace=True),
146 |             nn.Conv2d(in_channels, in_channels*2, kernel_size=3, padding=1),
147 |             norm_layer(in_channels*2),
148 |             nn.ReLU(inplace=True))
149 |         self.mp1 = nn.Sequential(nn.MaxPool2d(2))
150 |         self.block2 = nn.Sequential(
151 |             nn.Conv2d(in_channels*2, in_channels * 2, kernel_size=3, padding=1),
152 |             norm_layer(in_channels * 2),
153 |             nn.ReLU(inplace=True),
154 |             nn.Conv2d(in_channels * 2, in_channels * 4, kernel_size=3, padding=1),
155 |             norm_layer(in_channels * 4),
156 |             nn.ReLU(inplace=True))
157 |         self.mp2 = nn.Sequential(nn.MaxPool2d(2))
158 | 
159 |         self.pre_vq_conv = nn.Conv2d(in_channels=in_channels*4,
160 |                                  out_channels=64,
161 |                                  kernel_size=1,
162 |                                  stride=1)
163 | 
164 | 
165 | 
166 |         #self.vq = VectorQuantizerEMA(512, 64, 0.25, 0.99)
167 | 
168 |         self.upblock1 = nn.ConvTranspose2d(in_channels=64,
169 |                                                 out_channels=64,
170 |                                                 kernel_size=4,
171 |                                                 stride=2, padding=1)
172 | 
173 |         self.upblock2 = nn.ConvTranspose2d(in_channels=64,
174 |                                                 out_channels=64,
175 |                                                 kernel_size=4,
176 |                                                 stride=2, padding=1)
177 | 
178 |         self._conv_1 = nn.Conv2d(in_channels=64,
179 |                                  out_channels=num_hiddens,
180 |                                  kernel_size=3,
181 |                                  stride=1, padding=1)
182 | 
183 |         self._residual_stack = ResidualStack(in_channels=num_hiddens,
184 |                                              num_hiddens=num_hiddens,
185 |                                              num_residual_layers=num_residual_layers,
186 |                                              num_residual_hiddens=num_residual_hiddens)
187 | 
188 |         self._conv_trans_1 = nn.ConvTranspose2d(in_channels=num_hiddens,
189 |                                                 out_channels=num_hiddens // 2,
190 |                                                 kernel_size=4,
191 |                                                 stride=2, padding=1)
192 | 
193 |         self._conv_trans_2 = nn.ConvTranspose2d(in_channels=num_hiddens // 2,
194 |                                                 out_channels=3,
195 |                                                 kernel_size=4,
196 |                                                 stride=2, padding=1)
197 | 
198 |     def forward(self, inputs):
199 |         x = self.block1(inputs)
200 |         x = self.mp1(x)
201 |         x = self.block2(x)
202 |         x = self.mp2(x)
203 |         x = self.pre_vq_conv(x)
204 | 
205 |         x = self.upblock1(x)
206 |         x = F.relu(x)
207 |         x = self.upblock2(x)
208 |         x = F.relu(x)
209 |         x = self._conv_1(x)
210 | 
211 |         x = self._residual_stack(x)
212 | 
213 |         x = self._conv_trans_1(x)
214 |         x = F.relu(x)
215 | 
216 |         return self._conv_trans_2(x)
217 | 
218 | 
219 | 
220 | 
221 | class UnetEncoder(nn.Module):
222 |     def __init__(self, in_channels, base_width):
223 |         super().__init__()
224 |         norm_layer = nn.InstanceNorm2d
225 |         self.block1 = nn.Sequential(
226 |             nn.Conv2d(in_channels, base_width, kernel_size=3, padding=1),
227 |             norm_layer(base_width),
228 |             nn.ReLU(inplace=True),
229 |             nn.Conv2d(base_width, base_width, kernel_size=3, padding=1),
230 |             norm_layer(base_width),
231 |             nn.ReLU(inplace=True))
232 |         self.mp1 = nn.Sequential(nn.MaxPool2d(2))
233 |         self.block2 = nn.Sequential(
234 |             nn.Conv2d(base_width, base_width * 2, kernel_size=3, padding=1),
235 |             norm_layer(base_width * 2),
236 |             nn.ReLU(inplace=True),
237 |             nn.Conv2d(base_width * 2, base_width * 2, kernel_size=3, padding=1),
238 |             norm_layer(base_width * 2),
239 |             nn.ReLU(inplace=True))
240 |         self.mp2 = nn.Sequential(nn.MaxPool2d(2))
241 |         self.block3 = nn.Sequential(
242 |             nn.Conv2d(base_width * 2, base_width * 4, kernel_size=3, padding=1),
243 |             norm_layer(base_width * 4),
244 |             nn.ReLU(inplace=True),
245 |             nn.Conv2d(base_width * 4, base_width * 4, kernel_size=3, padding=1),
246 |             norm_layer(base_width * 4),
247 |             nn.ReLU(inplace=True))
248 |         self.mp3 = nn.Sequential(nn.MaxPool2d(2))
249 |         self.block4 = nn.Sequential(
250 |             nn.Conv2d(base_width * 4, base_width * 4, kernel_size=3, padding=1),
251 |             norm_layer(base_width * 4),
252 |             nn.ReLU(inplace=True),
253 |             nn.Conv2d(base_width * 4, base_width * 4, kernel_size=3, padding=1),
254 |             norm_layer(base_width * 4),
255 |             nn.ReLU(inplace=True))
256 | 
257 |     def forward(self, x):
258 |         b1 = self.block1(x)
259 |         mp1 = self.mp1(b1)
260 |         b2 = self.block2(mp1)
261 |         mp2 = self.mp2(b2)
262 |         b3 = self.block3(mp2)
263 |         mp3 = self.mp3(b3)
264 |         b4 = self.block4(mp3)
265 |         return b1, b2, b3, b4
266 | 
267 | 
268 | class UnetDecoder(nn.Module):
269 |     def __init__(self, base_width, out_channels=1):
270 |         super().__init__()
271 |         norm_layer = nn.InstanceNorm2d
272 |         self.up1 = nn.Sequential(nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
273 |                                  nn.Conv2d(base_width * 4, base_width * 4, kernel_size=3, padding=1),
274 |                                  norm_layer(base_width * 4),
275 |                                  nn.ReLU(inplace=True))
276 |         # cat with base*4
277 |         self.db1 = nn.Sequential(
278 |             nn.Conv2d(base_width * (4 + 4), base_width * 4, kernel_size=3, padding=1),
279 |             norm_layer(base_width * 4),
280 |             nn.ReLU(inplace=True),
281 |             nn.Conv2d(base_width * 4, base_width * 4, kernel_size=3, padding=1),
282 |             norm_layer(base_width * 4),
283 |             nn.ReLU(inplace=True)
284 |         )
285 | 
286 |         self.up2 = nn.Sequential(nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
287 |                                  nn.Conv2d(base_width * 4, base_width * 2, kernel_size=3, padding=1),
288 |                                  norm_layer(base_width * 2),
289 |                                  nn.ReLU(inplace=True))
290 |         # cat with base*2
291 |         self.db2 = nn.Sequential(
292 |             nn.Conv2d(base_width * (2 + 2), base_width * 2, kernel_size=3, padding=1),
293 |             norm_layer(base_width * 2),
294 |             nn.ReLU(inplace=True),
295 |             nn.Conv2d(base_width * 2, base_width * 2, kernel_size=3, padding=1),
296 |             norm_layer(base_width * 2),
297 |             nn.ReLU(inplace=True)
298 |         )
299 | 
300 |         self.up3 = nn.Sequential(nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
301 |                                  nn.Conv2d(base_width * 2, base_width, kernel_size=3, padding=1),
302 |                                  norm_layer(base_width),
303 |                                  nn.ReLU(inplace=True))
304 |         # cat with base*1
305 |         self.db3 = nn.Sequential(
306 |             nn.Conv2d(base_width * (1 + 1), base_width, kernel_size=3, padding=1),
307 |             norm_layer(base_width),
308 |             nn.ReLU(inplace=True),
309 |             nn.Conv2d(base_width, base_width, kernel_size=3, padding=1),
310 |             norm_layer(base_width),
311 |             nn.ReLU(inplace=True)
312 |         )
313 | 
314 |         self.fin_out = nn.Sequential(nn.Conv2d(base_width, out_channels, kernel_size=3, padding=1))
315 | 
316 |     def forward(self, b1, b2, b3, b4):
317 | 
318 |         up1 = self.up1(b4)
319 |         cat1 = torch.cat((up1, b3), dim=1)
320 |         db1 = self.db1(cat1)
321 | 
322 |         up2 = self.up2(db1)
323 |         cat2 = torch.cat((up2, b2), dim=1)
324 |         db2 = self.db2(cat2)
325 | 
326 |         up3 = self.up3(db2)
327 |         cat3 = torch.cat((up3, b1), dim=1)
328 |         db3 = self.db3(cat3)
329 | 
330 |         out = self.fin_out(db3)
331 |         return out
332 | 
333 | 
334 | 
335 | class UnetModel(nn.Module):
336 |     def __init__(self, in_channels=64, out_channels=64, base_width=64):
337 |         super().__init__()
338 |         self.encoder = UnetEncoder(in_channels, base_width)
339 |         self.decoder = UnetDecoder(base_width, out_channels=out_channels)
340 | 
341 |     def forward(self, x):
342 |         b1, b2, b3, b4 = self.encoder(x)
343 |         output = self.decoder(b1, b2, b3, b4)
344 |         return output
345 | 
346 | class AnomalyDetectionModule(nn.Module):
347 |     def __init__(self, embedding_size=64):
348 |         super(AnomalyDetectionModule, self).__init__()
349 |         self.unet = UnetModel(in_channels=6, out_channels=2, base_width=64)
350 |     def forward(self, image_real, image_anomaly):
351 |         img_x = torch.cat((image_real, image_anomaly),dim=1)
352 |         x = self.unet(img_x)
353 |         return x
354 | 
355 | 
356 | class UpsamplingModule(nn.Module):
357 |     def __init__(self, embedding_size=64):
358 |         super(UpsamplingModule, self).__init__()
359 |         self.unet = UnetModel(in_channels=8, out_channels=2, base_width=64)
360 |         #self.unet = UNetNormalSkip(in_channels=4 * embedding_size + 16, out_channels=embedding_size)
361 |     def forward(self, image_real, image_anomaly, segmentation_map):
362 |         img_x = torch.cat((image_real, image_anomaly, segmentation_map),dim=1)
363 |         x = self.unet(img_x)
364 |         return x
365 | 


--------------------------------------------------------------------------------
/loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class FocalLoss(nn.Module):
 5 |     """
 6 |     copy from: https://github.com/Hsuxu/Loss_ToolBox-PyTorch/blob/master/FocalLoss/FocalLoss.py
 7 |     This is a implementation of Focal Loss with smooth label cross entropy supported which is proposed in
 8 |     'Focal Loss for Dense Object Detection. (https://arxiv.org/abs/1708.02002)'
 9 |         Focal_Loss= -1*alpha*(1-pt)*log(pt)
10 |     :param num_class:
11 |     :param alpha: (tensor) 3D or 4D the scalar factor for this criterion
12 |     :param gamma: (float,double) gamma > 0 reduces the relative loss for well-classified examples (p>0.5) putting more
13 |                     focus on hard misclassified example
14 |     :param smooth: (float,double) smooth value when cross entropy
15 |     :param balance_index: (int) balance class index, should be specific when alpha is float
16 |     :param size_average: (bool, optional) By default, the losses are averaged over each loss element in the batch.
17 |     """
18 | 
19 |     def __init__(self, apply_nonlin=None, alpha=None, gamma=2, balance_index=0, smooth=1e-5, size_average=True):
20 |         super(FocalLoss, self).__init__()
21 |         self.apply_nonlin = apply_nonlin
22 |         self.alpha = alpha
23 |         self.gamma = gamma
24 |         self.balance_index = balance_index
25 |         self.smooth = smooth
26 |         self.size_average = size_average
27 | 
28 |         if self.smooth is not None:
29 |             if self.smooth < 0 or self.smooth > 1.0:
30 |                 raise ValueError('smooth value should be in [0,1]')
31 | 
32 |     def forward(self, logit, target):
33 |         if self.apply_nonlin is not None:
34 |             logit = self.apply_nonlin(logit)
35 |         num_class = logit.shape[1]
36 | 
37 |         if logit.dim() > 2:
38 |             # N,C,d1,d2 -> N,C,m (m=d1*d2*...)
39 |             logit = logit.view(logit.size(0), logit.size(1), -1)
40 |             logit = logit.permute(0, 2, 1).contiguous()
41 |             logit = logit.view(-1, logit.size(-1))
42 |         target = torch.squeeze(target, 1)
43 |         target = target.view(-1, 1)
44 |         # print(logit.shape, target.shape)
45 |         #
46 |         alpha = self.alpha
47 | 
48 |         if alpha is None:
49 |             alpha = torch.ones(num_class, 1)
50 |         elif isinstance(alpha, (list, np.ndarray)):
51 |             assert len(alpha) == num_class
52 |             alpha = torch.FloatTensor(alpha).view(num_class, 1)
53 |             alpha = alpha / alpha.sum()
54 |         elif isinstance(alpha, float):
55 |             alpha = torch.ones(num_class, 1)
56 |             alpha = alpha * (1 - self.alpha)
57 |             alpha[self.balance_index] = self.alpha
58 | 
59 |         else:
60 |             raise TypeError('Not support alpha type')
61 | 
62 |         if alpha.device != logit.device:
63 |             alpha = alpha.to(logit.device)
64 | 
65 |         idx = target.cpu().long()
66 | 
67 |         one_hot_key = torch.FloatTensor(target.size(0), num_class).zero_()
68 |         one_hot_key = one_hot_key.scatter_(1, idx, 1)
69 |         if one_hot_key.device != logit.device:
70 |             one_hot_key = one_hot_key.to(logit.device)
71 | 
72 |         if self.smooth:
73 |             one_hot_key = torch.clamp(
74 |                 one_hot_key, self.smooth / (num_class - 1), 1.0 - self.smooth)
75 |         pt = (one_hot_key * logit).sum(1) + self.smooth
76 |         logpt = pt.log()
77 | 
78 |         gamma = self.gamma
79 | 
80 |         alpha = alpha[idx]
81 |         alpha = torch.squeeze(alpha)
82 |         loss = -1 * alpha * torch.pow((1 - pt), gamma) * logpt
83 | 
84 |         if self.size_average:
85 |             loss = loss.mean()
86 |         else:
87 |             loss = loss.sum()
88 |         return loss
89 | 


--------------------------------------------------------------------------------
/perlin.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import math
  3 | import numpy as np
  4 | from scipy.interpolate import interp2d
  5 | 
  6 | import cv2
  7 | def lerp_np(x,y,w):
  8 |     fin_out = (y-x)*w + x
  9 |     return fin_out
 10 | 
 11 | def generate_fractal_noise_2d(shape, res, octaves=1, persistence=0.5):
 12 |     noise = np.zeros(shape)
 13 |     frequency = 1
 14 |     amplitude = 1
 15 |     for _ in range(octaves):
 16 |         noise += amplitude * generate_perlin_noise_2d(shape, (frequency*res[0], frequency*res[1]))
 17 |         frequency *= 2
 18 |         amplitude *= persistence
 19 |     return noise
 20 | 
 21 | 
 22 | def generate_perlin_noise_2d(shape, res):
 23 |     def f(t):
 24 |         return 6 * t ** 5 - 15 * t ** 4 + 10 * t ** 3
 25 | 
 26 |     delta = (res[0] / shape[0], res[1] / shape[1])
 27 |     d = (shape[0] // res[0], shape[1] // res[1])
 28 |     grid = np.mgrid[0:res[0]:delta[0], 0:res[1]:delta[1]].transpose(1, 2, 0) % 1
 29 |     # Gradients
 30 |     angles = 2 * np.pi * np.random.rand(res[0] + 1, res[1] + 1)
 31 |     gradients = np.dstack((np.cos(angles), np.sin(angles)))
 32 |     g00 = gradients[0:-1, 0:-1].repeat(d[0], 0).repeat(d[1], 1)
 33 |     g10 = gradients[1:, 0:-1].repeat(d[0], 0).repeat(d[1], 1)
 34 |     g01 = gradients[0:-1, 1:].repeat(d[0], 0).repeat(d[1], 1)
 35 |     g11 = gradients[1:, 1:].repeat(d[0], 0).repeat(d[1], 1)
 36 |     # Ramps
 37 |     n00 = np.sum(grid * g00, 2)
 38 |     n10 = np.sum(np.dstack((grid[:, :, 0] - 1, grid[:, :, 1])) * g10, 2)
 39 |     n01 = np.sum(np.dstack((grid[:, :, 0], grid[:, :, 1] - 1)) * g01, 2)
 40 |     n11 = np.sum(np.dstack((grid[:, :, 0] - 1, grid[:, :, 1] - 1)) * g11, 2)
 41 |     # Interpolation
 42 |     t = f(grid)
 43 |     n0 = n00 * (1 - t[:, :, 0]) + t[:, :, 0] * n10
 44 |     n1 = n01 * (1 - t[:, :, 0]) + t[:, :, 0] * n11
 45 |     return np.sqrt(2) * ((1 - t[:, :, 1]) * n0 + t[:, :, 1] * n1)
 46 | 
 47 | 
 48 | def rand_perlin_2d_np(shape, res, fade=lambda t: 6 * t ** 5 - 15 * t ** 4 + 10 * t ** 3):
 49 |     delta = (res[0] / shape[0], res[1] / shape[1])
 50 |     d = (shape[0] // res[0], shape[1] // res[1])
 51 |     grid = np.mgrid[0:res[0]:delta[0], 0:res[1]:delta[1]].transpose(1, 2, 0) % 1
 52 | 
 53 |     angles = 2 * math.pi * np.random.rand(res[0] + 1, res[1] + 1)
 54 |     gradients = np.stack((np.cos(angles), np.sin(angles)), axis=-1)
 55 |     tt = np.repeat(np.repeat(gradients,d[0],axis=0),d[1],axis=1)
 56 | 
 57 |     tile_grads = lambda slice1, slice2: np.repeat(np.repeat(gradients[slice1[0]:slice1[1], slice2[0]:slice2[1]],d[0],axis=0),d[1],axis=1)
 58 |     dot = lambda grad, shift: (
 59 |                 np.stack((grid[:shape[0], :shape[1], 0] + shift[0], grid[:shape[0], :shape[1], 1] + shift[1]),
 60 |                             axis=-1) * grad[:shape[0], :shape[1]]).sum(axis=-1)
 61 | 
 62 |     n00 = dot(tile_grads([0, -1], [0, -1]), [0, 0])
 63 |     n10 = dot(tile_grads([1, None], [0, -1]), [-1, 0])
 64 |     n01 = dot(tile_grads([0, -1], [1, None]), [0, -1])
 65 |     n11 = dot(tile_grads([1, None], [1, None]), [-1, -1])
 66 |     t = fade(grid[:shape[0], :shape[1]])
 67 |     return math.sqrt(2) * lerp_np(lerp_np(n00, n10, t[..., 0]), lerp_np(n01, n11, t[..., 0]), t[..., 1])
 68 | 
 69 | 
 70 | def rand_perlin_2d(shape, res, fade=lambda t: 6 * t ** 5 - 15 * t ** 4 + 10 * t ** 3):
 71 |     delta = (res[0] / shape[0], res[1] / shape[1])
 72 |     d = (shape[0] // res[0], shape[1] // res[1])
 73 | 
 74 |     grid = torch.stack(torch.meshgrid(torch.arange(0, res[0], delta[0]), torch.arange(0, res[1], delta[1])), dim=-1) % 1
 75 |     angles = 2 * math.pi * torch.rand(res[0] + 1, res[1] + 1)
 76 |     gradients = torch.stack((torch.cos(angles), torch.sin(angles)), dim=-1)
 77 | 
 78 |     tile_grads = lambda slice1, slice2: gradients[slice1[0]:slice1[1], slice2[0]:slice2[1]].repeat_interleave(d[0],
 79 |                                                                                                               0).repeat_interleave(
 80 |         d[1], 1)
 81 |     dot = lambda grad, shift: (
 82 |                 torch.stack((grid[:shape[0], :shape[1], 0] + shift[0], grid[:shape[0], :shape[1], 1] + shift[1]),
 83 |                             dim=-1) * grad[:shape[0], :shape[1]]).sum(dim=-1)
 84 | 
 85 |     n00 = dot(tile_grads([0, -1], [0, -1]), [0, 0])
 86 | 
 87 |     n10 = dot(tile_grads([1, None], [0, -1]), [-1, 0])
 88 |     n01 = dot(tile_grads([0, -1], [1, None]), [0, -1])
 89 |     n11 = dot(tile_grads([1, None], [1, None]), [-1, -1])
 90 |     t = fade(grid[:shape[0], :shape[1]])
 91 |     return math.sqrt(2) * torch.lerp(torch.lerp(n00, n10, t[..., 0]), torch.lerp(n01, n11, t[..., 0]), t[..., 1])
 92 | 
 93 | 
 94 | def rand_perlin_2d_octaves(shape, res, octaves=1, persistence=0.5):
 95 |     noise = torch.zeros(shape)
 96 |     frequency = 1
 97 |     amplitude = 1
 98 |     for _ in range(octaves):
 99 |         noise += amplitude * rand_perlin_2d(shape, (frequency * res[0], frequency * res[1]))
100 |         frequency *= 2
101 |         amplitude *= persistence
102 |     return noise


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | # This file may be used to create an environment using:
  2 | # $ conda create --name <env> --file <this file>
  3 | # platform: linux-64
  4 | _libgcc_mutex=0.1=main
  5 | _openmp_mutex=5.1=1_gnu
  6 | absl-py=1.0.0=pypi_0
  7 | aiohttp=3.8.1=pypi_0
  8 | aiosignal=1.2.0=pypi_0
  9 | async-timeout=4.0.2=pypi_0
 10 | attrs=21.4.0=pypi_0
 11 | beautifulsoup4=4.11.1=pypi_0
 12 | blas=1.0=mkl
 13 | brotlipy=0.7.0=py310h7f8727e_1002
 14 | bzip2=1.0.8=h7b6447c_0
 15 | ca-certificates=2022.4.26=h06a4308_0
 16 | cachetools=5.1.0=pypi_0
 17 | certifi=2020.6.20=pyhd3eb1b0_3
 18 | cffi=1.15.0=py310hd667e15_1
 19 | charset-normalizer=2.0.4=pyhd3eb1b0_0
 20 | chex=0.1.3=pypi_0
 21 | click=8.1.3=pypi_0
 22 | cryptography=37.0.1=py310h9ce1e76_0
 23 | cudatoolkit=10.2.89=hfd86e86_1
 24 | cycler=0.11.0=pypi_0
 25 | dm-tree=0.1.7=pypi_0
 26 | docker-pycreds=0.4.0=pypi_0
 27 | efficientnet-pytorch=0.7.1=pypi_0
 28 | faiss-gpu=1.7.2=pypi_0
 29 | ffmpeg=4.3=hf484d3e_0
 30 | filelock=3.7.1=pypi_0
 31 | flatbuffers=2.0=pypi_0
 32 | flax=0.4.2=pypi_0
 33 | fonttools=4.33.3=pypi_0
 34 | freetype=2.11.0=h70c0345_0
 35 | frozenlist=1.3.0=pypi_0
 36 | fsspec=2022.5.0=pypi_0
 37 | gdown=4.5.1=pypi_0
 38 | giflib=5.2.1=h7b6447c_0
 39 | gitdb=4.0.9=pypi_0
 40 | gitpython=3.1.27=pypi_0
 41 | gmp=6.2.1=h2531618_2
 42 | gnutls=3.6.15=he1e5248_0
 43 | google-auth=2.6.6=pypi_0
 44 | google-auth-oauthlib=0.4.6=pypi_0
 45 | grpcio=1.46.3=pypi_0
 46 | idna=3.3=pyhd3eb1b0_0
 47 | imageio=2.19.2=pypi_0
 48 | imgaug=0.4.0=pypi_0
 49 | intel-openmp=2021.4.0=h06a4308_3561
 50 | jax=0.3.13=pypi_0
 51 | jaxlib=0.3.10+cuda11.cudnn82=pypi_0
 52 | joblib=1.1.0=pypi_0
 53 | jpeg=9e=h7f8727e_0
 54 | kiwisolver=1.4.2=pypi_0
 55 | lame=3.100=h7b6447c_0
 56 | lcms2=2.12=h3be6417_0
 57 | ld_impl_linux-64=2.38=h1181459_1
 58 | libffi=3.3=he6710b0_2
 59 | libgcc-ng=11.2.0=h1234567_0
 60 | libgomp=11.2.0=h1234567_0
 61 | libiconv=1.16=h7f8727e_2
 62 | libidn2=2.3.2=h7f8727e_0
 63 | libpng=1.6.37=hbc83047_0
 64 | libstdcxx-ng=11.2.0=h1234567_0
 65 | libtasn1=4.16.0=h27cfd23_0
 66 | libtiff=4.2.0=h85742a9_0
 67 | libunistring=0.9.10=h27cfd23_0
 68 | libuuid=1.0.3=h7f8727e_2
 69 | libuv=1.40.0=h7b6447c_0
 70 | libwebp=1.2.2=h55f646e_0
 71 | libwebp-base=1.2.2=h7f8727e_0
 72 | lz4-c=1.9.3=h295c915_1
 73 | markdown=3.3.7=pypi_0
 74 | matplotlib=3.5.2=pypi_0
 75 | mkl=2021.4.0=h06a4308_640
 76 | mkl-service=2.4.0=py310h7f8727e_0
 77 | mkl_fft=1.3.1=py310hd6ae3a3_0
 78 | mkl_random=1.2.2=py310h00e6091_0
 79 | msgpack=1.0.4=pypi_0
 80 | multidict=6.0.2=pypi_0
 81 | ncurses=6.3=h7f8727e_2
 82 | nettle=3.7.3=hbbd107a_1
 83 | networkx=2.8.1=pypi_0
 84 | numpy=1.22.3=py310hfa59a62_0
 85 | numpy-base=1.22.3=py310h9585f30_0
 86 | oauthlib=3.2.0=pypi_0
 87 | opencv-python=4.5.5.64=pypi_0
 88 | openh264=2.1.1=h4ff587b_0
 89 | openssl=1.1.1o=h7f8727e_0
 90 | opt-einsum=3.3.0=pypi_0
 91 | optax=0.1.2=pypi_0
 92 | packaging=21.3=pypi_0
 93 | pathtools=0.1.2=pypi_0
 94 | pillow=9.0.1=py310h22f2fdc_0
 95 | pip=21.2.4=py310h06a4308_0
 96 | promise=2.3=pypi_0
 97 | protobuf=3.20.1=pypi_0
 98 | psutil=5.9.1=pypi_0
 99 | pyasn1=0.4.8=pypi_0
100 | pyasn1-modules=0.2.8=pypi_0
101 | pycparser=2.21=pyhd3eb1b0_0
102 | pydeprecate=0.3.2=pypi_0
103 | pyopenssl=22.0.0=pyhd3eb1b0_0
104 | pyparsing=3.0.9=pypi_0
105 | pysocks=1.7.1=py310h06a4308_0
106 | python=3.10.4=h12debd9_0
107 | python-dateutil=2.8.2=pypi_0
108 | pytorch=1.11.0=py3.10_cuda10.2_cudnn7.6.5_0
109 | pytorch-lightning=1.6.3=pypi_0
110 | pytorch-mutex=1.0=cuda
111 | pywavelets=1.3.0=pypi_0
112 | pyyaml=6.0=pypi_0
113 | readline=8.1.2=h7f8727e_1
114 | requests=2.27.1=pyhd3eb1b0_0
115 | requests-oauthlib=1.3.1=pypi_0
116 | rsa=4.8=pypi_0
117 | scikit-image=0.19.2=pypi_0
118 | scikit-learn=1.1.1=pypi_0
119 | scipy=1.8.1=pypi_0
120 | sentry-sdk=1.6.0=pypi_0
121 | setproctitle=1.2.3=pypi_0
122 | setuptools=61.2.0=py310h06a4308_0
123 | shapely=1.8.2=pypi_0
124 | shortuuid=1.0.9=pypi_0
125 | six=1.16.0=pyhd3eb1b0_1
126 | sklearn=0.0=pypi_0
127 | smmap=5.0.0=pypi_0
128 | soupsieve=2.3.2.post1=pypi_0
129 | sqlite=3.38.3=hc218d9a_0
130 | tensorboard=2.9.0=pypi_0
131 | tensorboard-data-server=0.6.1=pypi_0
132 | tensorboard-plugin-wit=1.8.1=pypi_0
133 | threadpoolctl=3.1.0=pypi_0
134 | tifffile=2022.5.4=pypi_0
135 | timm=0.6.2.dev0=pypi_0
136 | tk=8.6.11=h1ccaba5_1
137 | toolz=0.11.2=pypi_0
138 | torchaudio=0.11.0=py310_cu102
139 | torchmetrics=0.8.2=pypi_0
140 | torchvision=0.12.0=py310_cu102
141 | tqdm=4.64.0=pypi_0
142 | typing_extensions=4.1.1=pyh06a4308_0
143 | tzdata=2022a=hda174b7_0
144 | urllib3=1.26.9=py310h06a4308_0
145 | wandb=0.12.19=pypi_0
146 | werkzeug=2.1.2=pypi_0
147 | wheel=0.37.1=pyhd3eb1b0_0
148 | xz=5.2.5=h7f8727e_1
149 | yarl=1.7.2=pypi_0
150 | zlib=1.2.12=h7f8727e_2
151 | zstd=1.4.9=haebb681_0
152 | 


--------------------------------------------------------------------------------
/test_dsr.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.utils.data import DataLoader
  3 | import os
  4 | import numpy as np
  5 | from dsr_model import SubspaceRestrictionModule, ImageReconstructionNetwork, AnomalyDetectionModule, UpsamplingModule
  6 | from discrete_model import DiscreteLatentModel
  7 | import sys
  8 | from sklearn.metrics import roc_auc_score, average_precision_score
  9 | from data_loader_test import TestMVTecDataset
 10 | import cv2
 11 | 
 12 | 
 13 | def crop_image(image, img_dim):
 14 |     b,c,h,w = image.shape
 15 |     hdif = max(0,h - img_dim) // 2
 16 |     wdif = max(0,w - img_dim) // 2
 17 |     image_cropped = image[:,:,hdif:-hdif,wdif:-wdif]
 18 |     return image_cropped
 19 | 
 20 | def evaluate_model(model, model_normal, model_normal_top, model_decode, decoder_seg, model_upsample, obj_name, mvtec_path, cnt_total):
 21 |     img_dim = 256
 22 |     dataset = TestMVTecDataset(mvtec_path + obj_name + "/test/", resize_shape=[img_dim,img_dim])
 23 | 
 24 |     dataloader = DataLoader(dataset, batch_size=1,
 25 |                             shuffle=False, num_workers=0)
 26 | 
 27 | 
 28 |     img_dim = 224
 29 |     total_pixel_scores = np.zeros((img_dim * img_dim * 500))
 30 |     total_gt_pixel_scores = np.zeros((img_dim * img_dim * 500))
 31 |     mask_cnt = 0
 32 | 
 33 |     total_gt = []
 34 |     total_score = []
 35 |     iter = cnt_total
 36 | 
 37 |     for i_batch, sample_batched in enumerate(dataloader):
 38 | 
 39 |         gray_batch = sample_batched["image"].cuda()
 40 | 
 41 |         is_normal = sample_batched["has_anomaly"].detach().numpy()[0,0]
 42 |         total_gt.append(is_normal)
 43 |         true_mask = sample_batched["mask"]
 44 |         true_mask = crop_image(true_mask, img_dim)
 45 |         true_mask_cv = true_mask.detach().numpy()[0, :, :, :].transpose((1, 2, 0))
 46 | 
 47 |         loss_b, loss_t, data_recon, embeddings_t, embeddings = model(gray_batch)
 48 |         embeddings = embeddings.detach()
 49 |         embeddings_t = embeddings_t.detach()
 50 | 
 51 |         embedder = model._vq_vae_bot
 52 |         embedder_top = model._vq_vae_top
 53 | 
 54 |         anomaly_embedding_copy = embeddings.clone()
 55 |         anomaly_embedding_top_copy = embeddings_t.clone()
 56 |         recon_feat, recon_embeddings, _ = model_normal(anomaly_embedding_copy, embedder)
 57 |         recon_feat_top, recon_embeddings_top, loss_b_top = model_normal_top(anomaly_embedding_top_copy,
 58 |                                                                             embedder_top)
 59 | 
 60 |         up_quantized_recon_t = model.upsample_t(recon_embeddings_top)
 61 |         quant_join = torch.cat((up_quantized_recon_t, recon_embeddings), dim=1)
 62 |         recon_image_recon = model_decode(quant_join)
 63 | 
 64 |         up_quantized_embedding_t = model.upsample_t(embeddings_t)
 65 |         quant_join_real = torch.cat((up_quantized_embedding_t, embeddings), dim=1)
 66 |         recon_image = model._decoder_b(quant_join_real)
 67 |         out_mask = decoder_seg(recon_image_recon.detach(),
 68 |                                recon_image.detach())
 69 |         out_mask_sm = torch.softmax(out_mask, dim=1)
 70 | 
 71 |         upsampled_mask = model_upsample(recon_image_recon.detach(), recon_image.detach(), out_mask_sm)
 72 |         out_mask_sm_up = torch.softmax(upsampled_mask, dim=1)
 73 |         out_mask_sm_up = crop_image(out_mask_sm_up, img_dim)
 74 | 
 75 |         iter += 1
 76 | 
 77 | 
 78 |         out_mask_cv = out_mask_sm_up[0,1,:,:].detach().cpu().numpy()
 79 | 
 80 |         out_mask_averaged = torch.nn.functional.avg_pool2d(out_mask_sm[:,1:,:,:], 21, stride=1,
 81 |                                                            padding=21 // 2).cpu().detach().numpy()
 82 |         image_score = np.max(out_mask_averaged)
 83 | 
 84 |         total_score.append(image_score)
 85 | 
 86 |         flat_true_mask = true_mask_cv.flatten()
 87 |         flat_out_mask = out_mask_cv.flatten()
 88 |         total_pixel_scores[mask_cnt * img_dim * img_dim:(mask_cnt + 1) * img_dim * img_dim] = flat_out_mask
 89 |         total_gt_pixel_scores[mask_cnt * img_dim * img_dim:(mask_cnt + 1) * img_dim * img_dim] = flat_true_mask
 90 |         mask_cnt += 1
 91 | 
 92 |     total_score = np.array(total_score)
 93 |     total_gt = np.array(total_gt)
 94 |     auroc = roc_auc_score(total_gt, total_score)
 95 | 
 96 |     total_gt_pixel_scores = total_gt_pixel_scores.astype(np.uint8)
 97 |     total_gt_pixel_scores = total_gt_pixel_scores[:img_dim * img_dim * mask_cnt]
 98 |     total_pixel_scores = total_pixel_scores[:img_dim * img_dim * mask_cnt]
 99 |     auroc_pixel = roc_auc_score(total_gt_pixel_scores, total_pixel_scores)
100 |     ap_pixel = average_precision_score(total_gt_pixel_scores, total_pixel_scores)
101 |     ap = average_precision_score(total_gt, total_score)
102 |     print(obj_name+" AUC Image: "+str(auroc)+",  AUC Pixel: "+str(auroc_pixel)+", AP Pixel:"+str(ap_pixel)+", AP :"+str(ap))
103 | 
104 |     return auroc, auroc_pixel, ap_pixel, ap, iter
105 | 
106 | def train_on_device(obj_names, mvtec_path, run_basename):
107 |     auroc_list = []
108 |     auroc_pixel_list = []
109 |     ap_pixel_list = []
110 |     ap_list = []
111 |     cnt_total = 0
112 |     for obj_name in obj_names:
113 |         run_name_pre = 'vq_model_pretrained_128_4096'
114 | 
115 |         run_name = run_basename+'_'
116 | 
117 |         num_hiddens = 128
118 |         num_residual_hiddens = 64
119 |         num_residual_layers = 2
120 |         embedding_dim = 128
121 |         num_embeddings = 4096
122 |         commitment_cost = 0.25
123 |         decay = 0.99
124 |         model_vq = DiscreteLatentModel(num_hiddens, num_residual_layers, num_residual_hiddens,
125 |                       num_embeddings, embedding_dim,
126 |                       commitment_cost, decay)
127 |         model_vq.cuda()
128 |         model_vq.load_state_dict(
129 |             torch.load("./checkpoints/" + run_name_pre + ".pckl", map_location='cuda:0'))
130 |         model_vq.eval()
131 | 
132 | 
133 | 
134 |         sub_res_hi_module = SubspaceRestrictionModule(embedding_size=embedding_dim)
135 |         sub_res_hi_module.load_state_dict(
136 |             torch.load("./checkpoints/" + run_name + "subspace_restriction_hi_"+obj_name+".pckl", map_location='cuda:0'))
137 |         sub_res_hi_module.cuda()
138 |         sub_res_hi_module.eval()
139 | 
140 |         sub_res_lo_module = SubspaceRestrictionModule(embedding_size=embedding_dim)
141 |         sub_res_lo_module.load_state_dict(
142 |             torch.load("./checkpoints/" + run_name + "subspace_restriction_lo_"+obj_name+".pckl", map_location='cuda:0'))
143 |         sub_res_lo_module.cuda()
144 |         sub_res_lo_module.eval()
145 | 
146 | 
147 |         anom_det_module = AnomalyDetectionModule(embedding_size=embedding_dim)
148 |         anom_det_module.load_state_dict(
149 |             torch.load("./checkpoints/" + run_name + "anomaly_det_module_"+obj_name+".pckl", map_location='cuda:0'))
150 |         anom_det_module.cuda()
151 |         anom_det_module.eval()
152 | 
153 |         upsample_module = UpsamplingModule(embedding_size=embedding_dim)
154 |         upsample_module.load_state_dict(
155 |             torch.load("./checkpoints/" + run_name + "upsample_module_"+obj_name+".pckl", map_location='cuda:0'))
156 |         upsample_module.cuda()
157 |         upsample_module.eval()
158 | 
159 | 
160 |         image_recon_module = ImageReconstructionNetwork(embedding_dim * 2,
161 |                    num_hiddens,
162 |                    num_residual_layers,
163 |                    num_residual_hiddens)
164 |         image_recon_module.load_state_dict(
165 |             torch.load("./checkpoints/" + run_name + "image_recon_module_"+obj_name+".pckl", map_location='cuda:0'), strict=False)
166 |         image_recon_module.cuda()
167 |         image_recon_module.eval()
168 | 
169 | 
170 |         with torch.no_grad():
171 |             auroc, auroc_pixel, ap_pixel, ap, cnt = evaluate_model(model_vq, sub_res_hi_module, sub_res_lo_module, image_recon_module, anom_det_module, upsample_module, obj_name, mvtec_path, cnt_total)
172 |             cnt_total += cnt
173 |             ap_list.append(ap)
174 |             auroc_list.append(auroc)
175 |             auroc_pixel_list.append(auroc_pixel)
176 |             ap_pixel_list.append(ap_pixel)
177 | 
178 |     print(run_basename)
179 |     auroc_mean = np.mean(auroc_list)
180 |     auroc_pixel_mean = np.mean(auroc_pixel_list)
181 |     print("Detection AUROC: "+str(auroc_mean))
182 |     print("Localization AUROC: "+str(auroc_pixel_mean))
183 |     print("Localization AP: "+str(np.mean(ap_pixel_list)))
184 | 
185 | 
186 | if __name__=="__main__":
187 |     obj_names = ['capsule', 'bottle', 'grid', 'leather', 'pill', 'tile', 'transistor', 'zipper', 'cable', 'carpet',
188 |                  'hazelnut', 'metal_nut', 'screw', 'toothbrush', 'wood']
189 | 
190 |     list1 = sys.argv[1].split(',')
191 | 
192 |     with torch.cuda.device(int(sys.argv[1])):
193 |         train_on_device(obj_names, sys.argv[2], sys.argv[3])
194 | 
195 | 


--------------------------------------------------------------------------------
/train_dsr.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from data_loader import TrainWholeImageDataset, MVTecImageAnomTrainDataset
  4 | from torch.utils.data import DataLoader
  5 | from torch import optim
  6 | import os
  7 | import numpy as np
  8 | from dsr_model import SubspaceRestrictionModule, ImageReconstructionNetwork, AnomalyDetectionModule, UpsamplingModule
  9 | from discrete_model import DiscreteLatentModel
 10 | import sys
 11 | from loss import FocalLoss
 12 | from sklearn.metrics import roc_auc_score, average_precision_score
 13 | from data_loader_test import TestMVTecDataset
 14 | import time
 15 | 
 16 | 
 17 | def weights_init(m):
 18 |     classname = m.__class__.__name__
 19 |     if classname.find('Conv') != -1:
 20 |         m.weight.data.normal_(0.0, 0.02)
 21 |     elif classname.find('BatchNorm') != -1:
 22 |         m.weight.data.normal_(1.0, 0.02)
 23 |         m.bias.data.fill_(0)
 24 | 
 25 | def generate_fake_anomalies_joined(features,embeddings, memory_torch_original, mask, diversity=1.0, strength=None):
 26 |     random_embeddings = torch.zeros((embeddings.shape[0],embeddings.shape[2]*embeddings.shape[3], memory_torch_original.shape[1]))
 27 |     inputs = features.permute(0, 2, 3, 1).contiguous()
 28 | 
 29 |     for k in range(embeddings.shape[0]):
 30 |         memory_torch = memory_torch_original
 31 |         flat_input = inputs[k].view(-1, memory_torch.shape[1])
 32 | 
 33 |         distances_b = (torch.sum(flat_input ** 2, dim=1, keepdim=True)
 34 |                      + torch.sum(memory_torch ** 2, dim=1)
 35 |                      - 2 * torch.matmul(flat_input, memory_torch.t()))
 36 | 
 37 |         percentage_vectors = strength[k]
 38 |         topk = max(1, min(int(percentage_vectors * memory_torch.shape[0]) + 1, memory_torch.shape[0] - 1))
 39 |         values, topk_indices = torch.topk(distances_b, topk, dim=1, largest=False)
 40 |         topk_indices = topk_indices[:, int(memory_torch.shape[0] * 0.05):]
 41 |         topk = topk_indices.shape[1]
 42 | 
 43 |         random_indices_hik = torch.randint(topk, size=(topk_indices.shape[0],))
 44 |         random_indices_t = topk_indices[torch.arange(random_indices_hik.shape[0]),random_indices_hik]
 45 |         random_embeddings[k] = memory_torch[random_indices_t,:]
 46 |     random_embeddings = random_embeddings.reshape((random_embeddings.shape[0],embeddings.shape[2],embeddings.shape[3],random_embeddings.shape[2]))
 47 |     random_embeddings_tensor = random_embeddings.permute(0,3,1,2).cuda()
 48 | 
 49 |     down_ratio_y = int(mask.shape[2]/embeddings.shape[2])
 50 |     down_ratio_x = int(mask.shape[3]/embeddings.shape[3])
 51 |     anomaly_mask = torch.nn.functional.max_pool2d(mask, (down_ratio_y, down_ratio_x)).float()
 52 | 
 53 |     anomaly_embedding = anomaly_mask * random_embeddings_tensor + (1.0 - anomaly_mask) * embeddings
 54 | 
 55 |     return anomaly_embedding
 56 | 
 57 | 
 58 | def train_upsampling_module(model, sub_res_model_hi, sub_res_model_lo, model_decode, decoder_seg, obj_name, mvtec_path, out_path, lr, batch_size, epochs, anom_par):
 59 |     run_name = 'dsr_' + str(lr) + '_' + str(epochs) + '_bs' + str(batch_size) + "_anom_par" + str(anom_par) + "_"
 60 | 
 61 |     embedding_dim = 128
 62 |     model.eval()
 63 |     sub_res_model_hi.eval()
 64 |     sub_res_model_lo.eval()
 65 |     decoder_seg.eval()
 66 |     model_decode.eval()
 67 | 
 68 |     model_upsample = UpsamplingModule(embedding_size=embedding_dim)
 69 |     model_upsample.cuda()
 70 |     model_upsample.train()
 71 | 
 72 | 
 73 |     optimizer = torch.optim.Adam([
 74 |                                   {"params": model_upsample.parameters(), "lr": lr}
 75 |                                  ])
 76 | 
 77 |     scheduler = optim.lr_scheduler.MultiStepLR(optimizer,[int(epochs*0.80), int(epochs*0.90)],gamma=0.2, last_epoch=-1)
 78 | 
 79 |     loss_focal = FocalLoss()
 80 | 
 81 |     dataset = MVTecImageAnomTrainDataset(mvtec_path + obj_name + "/train/good/", resize_shape=[256, 256])
 82 |     dataloader = DataLoader(dataset, batch_size=batch_size,
 83 |                             shuffle=True, num_workers=12)
 84 | 
 85 |     n_iter = 0.0
 86 | 
 87 |     segment_loss_avg = 0.0
 88 | 
 89 |     for epoch in range(epochs//2):
 90 |         start_time = time.time()
 91 |         for i_batch, sample_batched in enumerate(dataloader):
 92 | 
 93 |             input_image_aug = sample_batched["augmented_image"].cuda()
 94 |             anomaly_mask = sample_batched["anomaly_mask"].cuda()
 95 | 
 96 |             optimizer.zero_grad()
 97 | 
 98 |             loss_b, loss_t, data_recon, embeddings_t, embeddings = model(input_image_aug)
 99 | 
100 |             data_recon = data_recon.detach()
101 |             embeddings = embeddings.detach()
102 |             embeddings_t = embeddings_t.detach()
103 | 
104 |             embedder = model._vq_vae_bot
105 |             embedder_top = model._vq_vae_top
106 | 
107 |             anomaly_embedding_copy = embeddings.clone()
108 |             anomaly_embedding_top_copy = embeddings_t.clone()
109 |             recon_feat, recon_embeddings, _ = sub_res_model_hi(anomaly_embedding_copy, embedder)
110 |             recon_feat_top, recon_embeddings_top, loss_b_top = sub_res_model_lo(anomaly_embedding_top_copy,
111 |                                                                                 embedder_top)
112 | 
113 |             up_quantized_recon_t = model.upsample_t(recon_embeddings_top)
114 |             quant_join = torch.cat((up_quantized_recon_t, recon_embeddings), dim=1)
115 |             recon_image_recon = model_decode(quant_join)
116 | 
117 |             ################################################
118 |             up_quantized_embedding_t = model.upsample_t(embeddings_t)
119 |             quant_join_real = torch.cat((up_quantized_embedding_t, embeddings), dim=1)
120 |             recon_image = model._decoder_b(quant_join_real)
121 | 
122 |             out_mask = decoder_seg(recon_image_recon, recon_image)
123 |             out_mask_sm = torch.softmax(out_mask, dim=1)
124 |             refined_mask = model_upsample(recon_image_recon, recon_image, out_mask_sm)
125 |             refined_mask_sm = torch.softmax(refined_mask, dim=1)
126 | 
127 |             segment_loss = loss_focal(refined_mask_sm, anomaly_mask)
128 | 
129 |             loss = segment_loss
130 |             loss.backward()
131 |             optimizer.step()
132 | 
133 |             segment_loss_avg = segment_loss_avg * 0.95 + 0.05 * segment_loss.item()
134 | 
135 |             n_iter +=1
136 | 
137 |         scheduler.step()
138 | 
139 |         if epoch % 5 == 0:
140 |             torch.save(model_upsample.state_dict(), out_path+"checkpoints/"+run_name+"_upsample.pckl")
141 | 
142 | 
143 | 
144 | 
145 | def train_on_device(obj_names, mvtec_path, out_path, lr, batch_size, epochs):
146 |     run_name_pre = 'vq_model_pretrained_128_4096'
147 |     num_hiddens = 128
148 |     num_residual_hiddens = 64
149 |     num_residual_layers = 2
150 |     embedding_dim = 128
151 |     num_embeddings = 4096
152 |     commitment_cost = 0.25
153 |     decay = 0.99
154 |     anom_par = 0.2
155 | 
156 |     # Load the pretrained discrete latent model used.
157 |     model = DiscreteLatentModel(num_hiddens, num_residual_layers, num_residual_hiddens, num_embeddings, embedding_dim,
158 |                                 commitment_cost, decay)
159 |     model.cuda()
160 |     model.load_state_dict(torch.load("./checkpoints/" + run_name_pre + ".pckl", map_location='cuda:0'))
161 |     model.eval()
162 | 
163 |     # Modules using the codebooks K_hi and K_lo for feature quantization
164 |     embedder_hi = model._vq_vae_bot
165 |     embedder_lo = model._vq_vae_top
166 | 
167 |     for obj_name in obj_names:
168 |         run_name = 'dsr_'+str(lr)+'_'+str(epochs)+'_bs'+str(batch_size)+"_anom_par"+str(anom_par)+"_"
169 | 
170 |         # Define the subspace restriction modules - Encoder decoder networks
171 |         sub_res_model_lo = SubspaceRestrictionModule(embedding_size=embedding_dim)
172 |         sub_res_model_hi = SubspaceRestrictionModule(embedding_size=embedding_dim)
173 |         sub_res_model_lo.cuda()
174 |         sub_res_model_hi.cuda()
175 | 
176 |         # Define the anomaly detection module - UNet-based network
177 |         decoder_seg = AnomalyDetectionModule(embedding_size=embedding_dim)
178 |         decoder_seg.cuda()
179 |         decoder_seg.apply(weights_init)
180 | 
181 | 
182 |         # Image reconstruction network reconstructs the image from discrete features.
183 |         # It is trained for a specific object
184 |         model_decode = ImageReconstructionNetwork(embedding_dim * 2,
185 |                    num_hiddens,
186 |                    num_residual_layers,
187 |                    num_residual_hiddens)
188 |         model_decode.cuda()
189 |         model_decode.apply(weights_init)
190 | 
191 | 
192 |         optimizer = torch.optim.Adam([
193 |                                       {"params": sub_res_model_lo.parameters(), "lr": lr},
194 |                                       {"params": sub_res_model_hi.parameters(), "lr": lr},
195 |                                       {"params": model_decode.parameters(), "lr": lr},
196 |                                       {"params": decoder_seg.parameters(), "lr": lr}])
197 | 
198 |         scheduler = optim.lr_scheduler.MultiStepLR(optimizer,[int(epochs*0.80)],gamma=0.1, last_epoch=-1)
199 | 
200 |         loss_focal = FocalLoss()
201 | 
202 |         dataset = TrainWholeImageDataset(mvtec_path + obj_name + "/train/good/", resize_shape=[256, 256], perlin_augment=True)
203 |         dataloader = DataLoader(dataset, batch_size=batch_size,
204 |                                 shuffle=True, num_workers=8)
205 | 
206 |         n_train = len(dataset)
207 | 
208 |         n_iter = 0.0
209 |         start_time = time.time()
210 |         for epoch in range(epochs):
211 |             print("Epoch ", epoch)
212 |             for i_batch, sample_batched in enumerate(dataloader):
213 | 
214 |                 in_image = sample_batched["image"].cuda()
215 |                 anomaly_mask = sample_batched["mask"].cuda()
216 |                 optimizer.zero_grad()
217 | 
218 |                 with torch.no_grad():
219 |                     anomaly_strength_lo = (torch.rand(in_image.shape[0]) * (1.0-anom_par) + anom_par).cuda()
220 |                     anomaly_strength_hi = (torch.rand(in_image.shape[0]) * (1.0-anom_par) + anom_par).cuda()
221 | 
222 |                     # Extract features from the discrete model
223 |                     enc_b = model._encoder_b(in_image)
224 |                     enc_t = model._encoder_t(enc_b)
225 |                     zt = model._pre_vq_conv_top(enc_t)
226 | 
227 |                     # Quantize the extracted features
228 |                     loss_t, quantized_t, perplexity_t, encodings_t = embedder_lo(zt)
229 | 
230 |                     # Generate feature-based anomalies on F_lo
231 |                     anomaly_embedding_lo = generate_fake_anomalies_joined(zt, quantized_t,
232 |                                                                            embedder_lo._embedding.weight,
233 |                                                                            anomaly_mask, strength=anomaly_strength_lo)
234 | 
235 |                     # Upsample the extracted quantized features and the quantized features augmented with anomalies
236 |                     up_quantized_t = model.upsample_t(anomaly_embedding_lo)
237 |                     up_quantized_t_real = model.upsample_t(quantized_t)
238 |                     feat = torch.cat((enc_b, up_quantized_t), dim=1)
239 |                     feat_real = torch.cat((enc_b, up_quantized_t_real), dim=1)
240 |                     zb = model._pre_vq_conv_bot(feat)
241 |                     zb_real = model._pre_vq_conv_bot(feat_real)
242 |                     # Quantize the upsampled features - F_hi
243 |                     loss_b, quantized_b, perplexity_b, encodings_b = embedder_hi(zb)
244 |                     loss_b, quantized_b_real, perplexity_b, encodings_b = embedder_hi(zb_real)
245 | 
246 |                     # Generate feature-based anomalies on F_hi
247 |                     anomaly_embedding = generate_fake_anomalies_joined(zb, quantized_b,
248 |                                                                           embedder_hi._embedding.weight, anomaly_mask
249 |                                                                          , strength=anomaly_strength_hi)
250 | 
251 |                     use_both = torch.randint(0, 2,(in_image.shape[0],1,1,1)).cuda().float()
252 |                     use_lo = torch.randint(0, 2,(in_image.shape[0],1,1,1)).cuda().float()
253 |                     use_hi = (1 - use_lo)
254 |                     anomaly_embedding_hi_usebot = generate_fake_anomalies_joined(zb_real,
255 |                                                                          quantized_b_real,
256 |                                                                          embedder_hi._embedding.weight,
257 |                                                                          anomaly_mask, strength=anomaly_strength_hi)
258 |                     anomaly_embedding_lo_usebot = quantized_t
259 |                     anomaly_embedding_hi_usetop = quantized_b_real
260 |                     anomaly_embedding_lo_usetop = anomaly_embedding_lo
261 |                     anomaly_embedding_hi_not_both =  use_hi * anomaly_embedding_hi_usebot + use_lo * anomaly_embedding_hi_usetop
262 |                     anomaly_embedding_lo_not_both =  use_hi * anomaly_embedding_lo_usebot + use_lo * anomaly_embedding_lo_usetop
263 |                     anomaly_embedding_hi = (anomaly_embedding * use_both + anomaly_embedding_hi_not_both * (1.0 - use_both)).detach().clone()
264 |                     anomaly_embedding_lo = (anomaly_embedding_lo * use_both + anomaly_embedding_lo_not_both * (1.0 - use_both)).detach().clone()
265 | 
266 |                     anomaly_embedding_hi_copy = anomaly_embedding_hi.clone()
267 |                     anomaly_embedding_lo_copy = anomaly_embedding_lo.clone()
268 | 
269 |                 # Restore the features to normality with the Subspace restriction modules
270 |                 recon_feat_hi, recon_embeddings_hi, loss_b = sub_res_model_hi(anomaly_embedding_hi_copy, embedder_hi)
271 |                 recon_feat_lo, recon_embeddings_lo, loss_b_lo = sub_res_model_lo(anomaly_embedding_lo_copy, embedder_lo)
272 | 
273 |                 # Reconstruct the image from the anomalous features with the general appearance decoder
274 |                 up_quantized_anomaly_t = model.upsample_t(anomaly_embedding_lo)
275 |                 quant_join_anomaly = torch.cat((up_quantized_anomaly_t, anomaly_embedding_hi), dim=1)
276 |                 recon_image_general = model._decoder_b(quant_join_anomaly)
277 | 
278 | 
279 |                 # Reconstruct the image from the reconstructed features
280 |                 # with the object-specific image reconstruction module
281 |                 up_quantized_recon_t = model.upsample_t(recon_embeddings_lo)
282 |                 quant_join = torch.cat((up_quantized_recon_t, recon_embeddings_hi), dim=1)
283 |                 recon_image_recon = model_decode(quant_join)
284 | 
285 |                 # Generate the anomaly segmentation map
286 |                 out_mask = decoder_seg(recon_image_recon.detach(),recon_image_general.detach())
287 |                 out_mask_sm = torch.softmax(out_mask, dim=1)
288 | 
289 |                 # Calculate losses
290 |                 loss_feat_hi = torch.nn.functional.mse_loss(recon_feat_hi, quantized_b_real.detach())
291 |                 loss_feat_lo = torch.nn.functional.mse_loss(recon_feat_lo, quantized_t.detach())
292 |                 loss_l2_recon_img = torch.nn.functional.mse_loss(in_image, recon_image_recon)
293 |                 total_recon_loss = loss_feat_lo + loss_feat_hi + loss_l2_recon_img*10
294 | 
295 | 
296 |                 # Resize the ground truth anomaly map to closely match the augmented features
297 |                 down_ratio_x_hi = int(anomaly_mask.shape[3] / quantized_b.shape[3])
298 |                 anomaly_mask_hi = torch.nn.functional.max_pool2d(anomaly_mask,
299 |                                                                   (down_ratio_x_hi, down_ratio_x_hi)).float()
300 |                 anomaly_mask_hi = torch.nn.functional.interpolate(anomaly_mask_hi, scale_factor=down_ratio_x_hi)
301 |                 down_ratio_x_lo = int(anomaly_mask.shape[3] / quantized_t.shape[3])
302 |                 anomaly_mask_lo = torch.nn.functional.max_pool2d(anomaly_mask,
303 |                                                                   (down_ratio_x_lo, down_ratio_x_lo)).float()
304 |                 anomaly_mask_lo = torch.nn.functional.interpolate(anomaly_mask_lo, scale_factor=down_ratio_x_lo)
305 |                 anomaly_mask = anomaly_mask_lo * use_both + (
306 |                             anomaly_mask_lo * use_lo + anomaly_mask_hi * use_hi) * (1.0 - use_both)
307 | 
308 | 
309 |                 # Calculate the segmentation loss with GT mask generated at low resolution.
310 |                 segment_loss = loss_focal(out_mask_sm, anomaly_mask)
311 | 
312 |                 loss = segment_loss + total_recon_loss
313 |                 loss.backward()
314 |                 optimizer.step()
315 | 
316 |                 if i_batch == 0:
317 |                     print("Loss Focal: ", segment_loss.item())
318 |                     print("Loss Recon: ", total_recon_loss.item())
319 | 
320 |                 n_iter +=1
321 | 
322 |             scheduler.step()
323 | 
324 |             if (epoch+1) % 5 == 0:
325 |                 # Save models
326 |                 torch.save(decoder_seg.state_dict(), out_path+"checkpoints/"+run_name+"anomaly_det_module_"+obj_name+".pckl")
327 |                 torch.save(sub_res_model_lo.state_dict(), out_path+"checkpoints/"+run_name+"subspace_restriction_lo_"+obj_name+".pckl")
328 |                 torch.save(sub_res_model_hi.state_dict(), out_path+"checkpoints/"+run_name+"subspace_restriction_hi_"+obj_name+".pckl")
329 |                 torch.save(model_decode.state_dict(), out_path+"checkpoints/"+run_name+"image_recon_module_"+obj_name+".pckl")
330 | 
331 |     return model, sub_res_model_hi, sub_res_model_lo, model_decode, decoder_seg
332 | 
333 | if __name__=="__main__":
334 |     import argparse
335 | 
336 |     parser = argparse.ArgumentParser()
337 |     parser.add_argument('--obj_id', action='store', type=int, required=True)
338 |     parser.add_argument('--bs', action='store', type=int, required=True)
339 |     parser.add_argument('--lr', action='store', type=float, required=True)
340 |     parser.add_argument('--epochs', action='store', type=int, required=True)
341 |     parser.add_argument('--gpu_id', action='store', type=int, required=True)
342 |     parser.add_argument('--data_path', action='store', type=str, required=True)
343 |     parser.add_argument('--out_path', action='store', type=str, required=True)
344 | 
345 |     args = parser.parse_args()
346 | 
347 |     # Use: python train_dsr.py --gpu_id 0 --obj_id $i --lr 0.0002 --bs 8 --epochs 100 --data_path $BASE_PATH --out_path $OUT_PATH
348 |     # BASE_PATH -- the base directory of mvtec
349 |     # OUT_PATH -- where the trained models will be saved
350 |     # i -- the index of the object class in the obj_batch list
351 |     obj_batch = [['capsule'],
352 |                  ['bottle'],
353 |                  ['carpet'],
354 |                  ['leather'],
355 |                  ['pill'],
356 |                  ['transistor'],
357 |                  ['tile'],
358 |                  ['cable'],
359 |                  ['zipper'],
360 |                  ['toothbrush'],
361 |                  ['metal_nut'],
362 |                  ['hazelnut'],
363 |                  ['screw'],
364 |                  ['grid'],
365 |                  ['wood']
366 |                  ]
367 | 
368 |     with torch.cuda.device(args.gpu_id):
369 |         model, sub_res_model_hi, sub_res_model_lo, model_decode, decoder_seg = train_on_device(obj_batch[int(args.obj_id)],args.data_path, args.out_path, args.lr, args.bs, args.epochs)
370 |         train_upsampling_module(model, sub_res_model_hi, sub_res_model_lo, model_decode, decoder_seg,
371 |                                 obj_batch[int(args.obj_id)], args.data_path, args.out_path, args.lr, args.bs, args.epochs)
372 | 
373 | 


--------------------------------------------------------------------------------