├── code
    ├── Latest
    │   ├── __init__.py
    │   ├── WRN
    │   │   ├── __init__.py
    │   │   ├── oe_wrn_dataset.py
    │   │   └── train_oe_wrn.py
    │   ├── BinClass
    │   │   ├── __init__.py
    │   │   └── train_bin_class.py
    │   ├── RealNVP
    │   │   ├── __init__.py
    │   │   ├── real_nvp_dataset.py
    │   │   └── train_real_nvp.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── wrn_oe
    │   │   │   ├── __init__.py
    │   │   │   ├── wrn.py
    │   │   │   └── wrn_model_functions.py
    │   │   ├── autoencoder
    │   │   │   ├── __init__.py
    │   │   │   ├── autoencoder.py
    │   │   │   └── autoencoder_model_functions.py
    │   │   ├── real_nvp
    │   │   │   ├── __init__.py
    │   │   │   ├── real_nvp.py
    │   │   │   └── real_nvp_model_functions.py
    │   │   └── binary_classifier
    │   │   │   ├── __init__.py
    │   │   │   ├── bin_class.py
    │   │   │   └── bin_class_model_functions.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── losses_util.py
    │   │   ├── check_create_folder.py
    │   │   ├── variables_util.py
    │   │   ├── hooks.py
    │   │   ├── image_util.py
    │   │   ├── rnvp_auc_computation_utils.py
    │   │   ├── metrics_util.py
    │   │   └── artifacts_util.py
    │   ├── Autoencoder
    │   │   ├── __init__.py
    │   │   ├── create_embeddings.py
    │   │   ├── train_autoencoder.py
    │   │   └── autoencoder_dataset.py
    │   ├── Outlier_exposure
    │   │   ├── __init__.py
    │   │   ├── train_real_nvp_with_outlier_exposure.py
    │   │   └── oe_rnvp_dataset.py
    │   ├── requirements.txt
    │   └── README.md
    └── OLD_CODE
    │   ├── __init__.py
    │   ├── model
    │       ├── __init__.py
    │       ├── autoencoder.py
    │       └── uniformed_model_functions.py
    │   ├── paper_utils
    │       ├── __init__.py
    │       ├── losses_util.py
    │       ├── check_create_folder.py
    │       ├── init_utils.py
    │       ├── variables_util.py
    │       ├── artifacts_util.py
    │       ├── metrics_util.py
    │       └── data_util.py
    │   ├── requirements.txt
    │   ├── dataset_generator.py
    │   ├── dataset.py
    │   └── run.py
├── images
    ├── samples.png
    ├── corridor
    │   ├── box.jpg
    │   ├── cable.jpg
    │   ├── cones.jpg
    │   ├── door.jpg
    │   ├── floor.jpg
    │   ├── foam.jpg
    │   ├── human.jpg
    │   ├── water.jpg
    │   ├── clutter.jpg
    │   ├── debris.jpg
    │   ├── defects.jpg
    │   ├── normal1.jpg
    │   ├── normal2.jpg
    │   ├── normal3.jpg
    │   ├── sawdust.jpg
    │   ├── screws.jpg
    │   ├── cellophane.jpg
    │   └── hanging_cable.jpg
    ├── tunnel
    │   ├── dust1.jpg
    │   ├── root1.jpg
    │   ├── wet1.jpg
    │   └── normal1.jpg
    ├── factory
    │   ├── mist1.jpg
    │   ├── mist2.jpg
    │   ├── tape1.jpg
    │   ├── tape2.jpg
    │   ├── normal1.jpg
    │   └── normal2.jpg
    └── dataset_examplev4.png
└── README.md


/code/Latest/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/code/Latest/WRN/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/code/OLD_CODE/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/code/Latest/BinClass/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/code/Latest/RealNVP/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/code/Latest/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/code/Latest/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/code/OLD_CODE/model/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/code/Latest/Autoencoder/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/code/Latest/models/wrn_oe/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/code/OLD_CODE/paper_utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/code/Latest/Outlier_exposure/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/code/Latest/models/autoencoder/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/code/Latest/models/real_nvp/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/code/Latest/models/binary_classifier/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/images/samples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/samples.png


--------------------------------------------------------------------------------
/images/corridor/box.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/corridor/box.jpg


--------------------------------------------------------------------------------
/images/tunnel/dust1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/tunnel/dust1.jpg


--------------------------------------------------------------------------------
/images/tunnel/root1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/tunnel/root1.jpg


--------------------------------------------------------------------------------
/images/tunnel/wet1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/tunnel/wet1.jpg


--------------------------------------------------------------------------------
/images/corridor/cable.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/corridor/cable.jpg


--------------------------------------------------------------------------------
/images/corridor/cones.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/corridor/cones.jpg


--------------------------------------------------------------------------------
/images/corridor/door.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/corridor/door.jpg


--------------------------------------------------------------------------------
/images/corridor/floor.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/corridor/floor.jpg


--------------------------------------------------------------------------------
/images/corridor/foam.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/corridor/foam.jpg


--------------------------------------------------------------------------------
/images/corridor/human.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/corridor/human.jpg


--------------------------------------------------------------------------------
/images/corridor/water.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/corridor/water.jpg


--------------------------------------------------------------------------------
/images/factory/mist1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/factory/mist1.jpg


--------------------------------------------------------------------------------
/images/factory/mist2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/factory/mist2.jpg


--------------------------------------------------------------------------------
/images/factory/tape1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/factory/tape1.jpg


--------------------------------------------------------------------------------
/images/factory/tape2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/factory/tape2.jpg


--------------------------------------------------------------------------------
/images/tunnel/normal1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/tunnel/normal1.jpg


--------------------------------------------------------------------------------
/images/corridor/clutter.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/corridor/clutter.jpg


--------------------------------------------------------------------------------
/images/corridor/debris.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/corridor/debris.jpg


--------------------------------------------------------------------------------
/images/corridor/defects.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/corridor/defects.jpg


--------------------------------------------------------------------------------
/images/corridor/normal1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/corridor/normal1.jpg


--------------------------------------------------------------------------------
/images/corridor/normal2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/corridor/normal2.jpg


--------------------------------------------------------------------------------
/images/corridor/normal3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/corridor/normal3.jpg


--------------------------------------------------------------------------------
/images/corridor/sawdust.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/corridor/sawdust.jpg


--------------------------------------------------------------------------------
/images/corridor/screws.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/corridor/screws.jpg


--------------------------------------------------------------------------------
/images/dataset_examplev4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/dataset_examplev4.png


--------------------------------------------------------------------------------
/images/factory/normal1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/factory/normal1.jpg


--------------------------------------------------------------------------------
/images/factory/normal2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/factory/normal2.jpg


--------------------------------------------------------------------------------
/images/corridor/cellophane.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/corridor/cellophane.jpg


--------------------------------------------------------------------------------
/images/corridor/hanging_cable.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/idsia-robotics/hazard-detection/HEAD/images/corridor/hanging_cable.jpg


--------------------------------------------------------------------------------
/code/OLD_CODE/requirements.txt:
--------------------------------------------------------------------------------
 1 | scikit-learn==0.24.1
 2 | opencv-python==4.5.1.48
 3 | tqdm
 4 | numpy==1.19.5
 5 | scipy==1.6.0
 6 | rich==9.10.0
 7 | torch==1.7.1
 8 | torchvision==0.8.2
 9 | torchinfo==0.0.6
10 | Pillow
11 | pandas==1.2.1
12 | albumentations==0.5.2
13 | setuptools
14 | mlflow==1.14.0
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/code/Latest/utils/losses_util.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.nn.functional import mse_loss, l1_loss
 3 | 
 4 | losses_list = ["mse", "mae"]
 5 | 
 6 | 
 7 | def compute_losses(y_true: torch.Tensor, y_pred: torch.Tensor):
 8 | 
 9 |     mse = mse_loss(y_true, y_pred)
10 |     mae = l1_loss(y_true, y_pred)
11 |     return [mse, mae]
12 | 
13 | 


--------------------------------------------------------------------------------
/code/OLD_CODE/paper_utils/losses_util.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.nn.functional import mse_loss, l1_loss
 3 | 
 4 | losses_list = ["mse", "mae"]
 5 | 
 6 | 
 7 | def compute_losses(y_true: torch.Tensor, y_pred: torch.Tensor):
 8 |     mse = mse_loss(y_true, y_pred)
 9 |     mae = l1_loss(y_true, y_pred)
10 |     return [mse, mae]
11 | 
12 | 


--------------------------------------------------------------------------------
/code/Latest/utils/check_create_folder.py:
--------------------------------------------------------------------------------
 1 | import errno
 2 | import os
 3 | 
 4 | def check_create_folder(folder_path: str):
 5 |     if not os.path.exists(os.path.dirname(folder_path)):
 6 |         try:
 7 |             os.makedirs(os.path.dirname(folder_path))
 8 |         except OSError as exc:  # Guard against race condition
 9 |             if exc.errno != errno.EEXIST:
10 |                 raise
11 | 


--------------------------------------------------------------------------------
/code/OLD_CODE/paper_utils/check_create_folder.py:
--------------------------------------------------------------------------------
 1 | import errno
 2 | import os
 3 | 
 4 | 
 5 | def check_create_folder(folder_path: str):
 6 |     if not os.path.exists(os.path.dirname(folder_path)):
 7 |         try:
 8 |             os.makedirs(os.path.dirname(folder_path))
 9 |         except OSError as exc:  # Guard against race condition
10 |             if exc.errno != errno.EEXIST:
11 |                 raise
12 | 


--------------------------------------------------------------------------------
/code/Latest/requirements.txt:
--------------------------------------------------------------------------------
 1 | scikit-learn==1.0
 2 | opencv-python==4.5.1.48
 3 | tqdm~=4.62.1
 4 | matplotlib==3.3.3
 5 | seaborn==0.11.1
 6 | numpy==1.20.0
 7 | scipy==1.6.0
 8 | rich==9.10.0
 9 | torch==1.7.1
10 | torchvision==0.8.2
11 | torchinfo==0.0.6
12 | Pillow~=8.1.0
13 | pandas==1.2.1
14 | albumentations==0.5.2
15 | setuptools~=51.3.3
16 | toml~=0.10.2
17 | tabulate~=0.8.9
18 | plotly~=5.6.0
19 | websockets==9.1
20 | av>=9.2.0


--------------------------------------------------------------------------------
/code/Latest/utils/variables_util.py:
--------------------------------------------------------------------------------
 1 | combined_labels_to_names = {
 2 |     0: "normal",
 3 |     1: "water",
 4 |     2: "cable",
 5 |     3: "cellophane",
 6 |     4: "defects",
 7 |     5: "hanging cable",
 8 |     6: "floor",
 9 |     7: "human",
10 |     8: "screws",
11 |     9: "boxes",
12 |     10: "door",
13 |     11: "cones",
14 |     12: "tape",
15 |     13: "troley",
16 |     14: "debris",
17 |     15: "misc",
18 |     16: "shard",
19 |     17: "sawdust",
20 |     18: "foam",
21 |     19: "clutter",
22 | }
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/code/OLD_CODE/paper_utils/init_utils.py:
--------------------------------------------------------------------------------
 1 | def uniformed_model_paths_init():
 2 |     model_save_prefix = "path/to/experiment/folder"
 3 |     noise_path = f"path/to/perlin_noise"
 4 |     train_path = f"path/to/train_set"
 5 |     val_path = f"path/to/validation_set"
 6 |     test_paths = f"path/to/test_set"
 7 |     qualitative_paths = f"path/to/qualitative_set"
 8 |     test_labels_csv = f"path/to/frames_labels.csv"
 9 |     return model_save_prefix, qualitative_paths, test_paths, test_labels_csv, train_path, val_path, noise_path
10 | 


--------------------------------------------------------------------------------
/code/Latest/models/binary_classifier/bin_class.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | 
 4 | class BinaryClassifier(nn.Module):
 5 |     def __init__(self):
 6 |         super().__init__()
 7 |         self.layer_1 = nn.Linear(128, 256)
 8 |         self.layer_2 = nn.Linear(256, 64)
 9 |         self.layer_out = nn.Linear(64, 1)
10 |         self.relu = nn.ReLU()
11 |         # self.bn1 = nn.BatchNorm1d(128)
12 |         self.sigm=nn.Sigmoid()
13 | 
14 |     def forward(self, x):
15 |         # x = self.bn1(x)
16 |         x = self.layer_1(x)
17 |         x = self.relu(x)
18 |         x = self.layer_2(x)
19 |         x = self.relu(x)
20 |         y = self.sigm(self.layer_out(x))
21 |         return y
22 | 


--------------------------------------------------------------------------------
/code/Latest/utils/hooks.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable, Dict
 2 | 
 3 | import torch
 4 | from torch import nn, Tensor
 5 | 
 6 | 
 7 | class BottleneckEmbeddingExtractor(nn.Module):
 8 |     def __init__(self, model: nn.Module):
 9 |         super().__init__()
10 |         self.model = model
11 |         self.embedding = torch.empty(0)
12 |         self.model.bottleneck[0].fc1.register_forward_hook(self.save_outputs_hook())
13 | 
14 |     def save_outputs_hook(self) -> Callable:
15 |         def fn(_, __, output):
16 |             self.embedding = output.detach().cpu()
17 |         return fn
18 | 
19 |     def forward(self, x: Tensor) -> Dict[str, Tensor]:
20 |         _ = self.model(x)
21 |         return self.embedding
22 | 


--------------------------------------------------------------------------------
/code/OLD_CODE/paper_utils/variables_util.py:
--------------------------------------------------------------------------------
 1 | # unifromed model variables
 2 | dataset_names = {0: "tunnel", 1: "factory", 2: "corridor"}
 3 | available_scale_levels = {0: "s1", 1: "s2", 2: "s4", 3: "s8"}
 4 | scaled_image_shapes = {0: (512, 512), 1: (256, 256), 2: (128, 128), 3: (64, 64)}
 5 | datasets_labels_names = {
 6 |     0:
 7 |         {
 8 |             0: "normal",
 9 |             1: "dust",
10 |             2: "root",
11 |             3: "wet",
12 |         },
13 |     1:
14 |         {
15 |             0: "normal",
16 |             1: "mist",
17 |             2: "tape",
18 |         },
19 |     2:
20 |         {
21 |             0: "normal",
22 |             1: "water",
23 |             2: "cellophane",
24 |             3: "cable",
25 |             4: "defects",
26 |             5: "hanging cable",
27 |             6: "floor",
28 |             7: "human",
29 |             8: "screws",
30 |         },
31 | }
32 | 


--------------------------------------------------------------------------------
/code/OLD_CODE/dataset_generator.py:
--------------------------------------------------------------------------------
 1 | from paper_code_release.paper_utils.data_util import FrameToFramesRescaler
 2 | 
 3 | if __name__ == "__main__":
 4 |     s1_resolution = [512, 512]
 5 |     s2_resolution = [256, 256]
 6 |     s4_resolution = [128, 128]
 7 |     s8_resolution = [64, 64]
 8 |     # example
 9 |     vc_tunnel_s2 = FrameToFramesRescaler(
10 |         input_paths="path/to/tunnel/s1_frames/",
11 |         output_path="path/to/tunnel/s2_frames/",
12 |         final_resolution_x=s2_resolution[0], final_resolution_y=s2_resolution[1])
13 |     vc_tunnel_s2.rescale()
14 | 
15 |     vc_tunnel_s4 = FrameToFramesRescaler(
16 |         input_paths="path/to/tunnel/s1_frames/",
17 |         output_path="path/to/tunnel/s4_frames/",
18 |         final_resolution_x=s4_resolution[0], final_resolution_y=s4_resolution[1])
19 |     vc_tunnel_s4.rescale()
20 | 
21 |     vc_tunnel_s8 = FrameToFramesRescaler(
22 |         input_paths="path/to/tunnel/s1_frames/",
23 |         output_path="path/to/tunnel/s8_frames/",
24 |         final_resolution_x=s8_resolution[0], final_resolution_y=s8_resolution[1])
25 |     vc_tunnel_s8.rescale()
26 | 


--------------------------------------------------------------------------------
/code/Latest/utils/image_util.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | 
 5 | def torch_standardize_batch(batch: torch.Tensor):
 6 |     epsilon = 0.0000001
 7 |     batch_size, channels, _, _ = batch.shape
 8 |     im_mean = batch.view(batch_size, channels, -1).mean(2).view(batch_size, channels, 1, 1)
 9 |     # print(im_mean)
10 |     im_std = batch.view(batch_size, channels, -1).std(2).view(batch_size, channels, 1, 1)
11 |     # print(im_std)
12 |     return (batch - im_mean) / (im_std + epsilon)
13 | 
14 | 
15 | def torch_standardize_image(image: torch.Tensor, epsilon: float = 0.0000001):
16 |     # USE move_mean_and_clip with matplotlib imshow to visualize the image
17 |     channels, _, _ = image.shape
18 |     im_mean = image.view(channels, -1).mean(1).view(channels, 1, 1)
19 |     im_std = image.view(channels, -1).std(1).view(channels, 1, 1)
20 |     return (image - im_mean) / (im_std + epsilon)
21 | 
22 | 
23 | def np_standardize_image(image: np.array, epsilon: float = 0.0000001):
24 |     _, _, channels = image.shape
25 |     return (image - image.reshape((-1, channels)).mean(axis=0).reshape((1, 1, channels))) / (
26 |             image.reshape((-1, channels)).std(axis=0) + epsilon).reshape((1, 1, channels))
27 | 
28 | 
29 | def move_mean_and_clip(image: torch.Tensor, k: float = 0.7, t: float = 0.5):
30 |     return torch.clip(image * k + t, min=0, max=1)
31 | 


--------------------------------------------------------------------------------
/code/Latest/utils/rnvp_auc_computation_utils.py:
--------------------------------------------------------------------------------
 1 | from utils.metrics_util import compute_oe_rnvp_model_metrics
 2 | 
 3 | 
 4 | # USED
 5 | def avg_l2_norms(test_df):
 6 |     ok_l2 = test_df.loc[test_df.label == 0].l2_norm_of_z.values.mean()
 7 |     an_l2 = test_df.loc[test_df.label != 0].l2_norm_of_z.values.mean()
 8 |     return an_l2, ok_l2
 9 | 
10 | 
11 | # USED
12 | def rnvp_auc_computation_and_logging(test_df):
13 |     labels = [0 if el == 0 else 1 for el in test_df["label"].values]
14 |     metrics_dict = compute_oe_rnvp_model_metrics(
15 |         labels,
16 |         test_df
17 |     )
18 |     avg_an_l2_norm, avg_ok_l2_norm = avg_l2_norms(test_df)
19 |     print(f'test_set_ok_mean_loss = {metrics_dict["ok_mean_loss"]}\n'
20 |           f'test_set_an_mean_loss = {metrics_dict["an_mean_loss"]}\n'
21 |           f'test_set_ok_mean_log_prob = {metrics_dict["ok_mean_log_prob"]}\n'
22 |           f'test_set_an_mean_log_prob = {metrics_dict["an_mean_log_prob"]}\n'
23 |           f'test_set_ok_mean_log_det_J = {metrics_dict["ok_mean_log_det_J"]}\n'
24 |           f'test_set_an_mean_log_det_J = {metrics_dict["an_mean_log_det_J"]}\n'
25 |           f'test_set_roc_auc = {metrics_dict["roc_auc"]}\n'
26 |           f'test_set_pr_auc = {metrics_dict["pr_auc"]}\n'
27 |           f'test_avg_an_l2_norm = {avg_an_l2_norm}\n'
28 |           f'test_avg_ok_l2_norm = {avg_ok_l2_norm}\n'
29 |           )
30 | 
31 | 
32 | # USED
33 | def per_label_rnvp_metrics(df, label_key):
34 |     if label_key == 0:
35 |         label_unique_values = [0 if el == 0 else 1 for el in df["label"].values]
36 |         return_dict = compute_oe_rnvp_model_metrics(
37 |             label_unique_values,
38 |             df,
39 |         )
40 |     else:
41 |         df_anomaly = df[df.label.isin([0, label_key])]
42 |         label_unique_values = [0 if el == 0 else 1 for el in df_anomaly["label"].values]
43 |         return_dict = compute_oe_rnvp_model_metrics(
44 |             label_unique_values,
45 |             df_anomaly,
46 |         )
47 |     return return_dict
48 | 


--------------------------------------------------------------------------------
/code/Latest/utils/metrics_util.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Dict
 2 | 
 3 | import pandas as pd
 4 | from sklearn.metrics import auc, precision_recall_curve, roc_curve
 5 | 
 6 | 
 7 | # USED
 8 | def compute_oe_rnvp_model_metrics(
 9 |         labels: List[int],
10 |         df_losses: pd.DataFrame,
11 | ):
12 |     y_true = labels
13 |     losses = df_losses["loss"].values
14 |     pr_auc = compute_pr_aucs_single_loss(y_true, losses)
15 |     roc_auc = compute_roc_aucs_single_loss(y_true, losses)
16 | 
17 |     an_mean_loss = df_losses[df_losses["label"] != 0]["loss"].values.mean()
18 |     an_mean_prob = df_losses[df_losses["label"] != 0]["log_prob"].values.mean()
19 |     an_mean_detj = df_losses[df_losses["label"] != 0]["log_det_J"].values.mean()
20 |     an_mean_l2_norm_of_z = df_losses[df_losses["label"] != 0]["l2_norm_of_z"].values.mean()
21 | 
22 |     ok_mean_loss = df_losses[df_losses["label"] == 0]["loss"].values.mean()
23 |     ok_mean_prob = df_losses[df_losses["label"] == 0]["log_prob"].values.mean()
24 |     ok_mean_detj = df_losses[df_losses["label"] == 0]["log_det_J"].values.mean()
25 |     ok_mean_l2_norm_of_z = df_losses[df_losses["label"] == 0]["l2_norm_of_z"].values.mean()
26 |     # composing return dict
27 |     return_dict = {
28 |         "an_mean_loss": an_mean_loss,
29 |         "an_mean_log_prob": an_mean_prob,
30 |         "an_mean_log_det_J": an_mean_detj,
31 |         "an_mean_l2_norm_of_z": an_mean_l2_norm_of_z,
32 |         "ok_mean_loss": ok_mean_loss,
33 |         "ok_mean_log_prob": ok_mean_prob,
34 |         "ok_mean_log_det_J": ok_mean_detj,
35 |         "ok_mean_l2_norm_of_z": ok_mean_l2_norm_of_z,
36 |         "roc_auc": roc_auc,
37 |         "pr_auc": pr_auc,
38 |     }
39 |     return return_dict
40 | 
41 | 
42 | # USED
43 | def compute_pr_aucs(y_true: List[int], losses_list: List[str], test_set_losses: Dict[str, float]):
44 |     dict_aucs = {}
45 |     for loss in losses_list:
46 |         y_score = test_set_losses[loss]
47 |         precision, recall, _ = precision_recall_curve(y_true, y_score)
48 |         pr_auc = auc(recall, precision)
49 |         dict_aucs[loss] = pr_auc
50 |     return dict_aucs
51 | 
52 | 
53 | # USED
54 | def compute_roc_aucs(y_true: List[int], losses_list: List[str], test_set_losses: Dict[str, float]):
55 |     dict_aucs = {}
56 |     for loss in losses_list:
57 |         y_score = test_set_losses[loss]
58 |         fpr, tpr, _ = roc_curve(y_true, y_score)
59 |         roc_auc = auc(fpr, tpr)
60 |         dict_aucs[loss] = roc_auc
61 |     return dict_aucs
62 | 
63 | 
64 | # USED
65 | def compute_pr_aucs_single_loss(y_true: List[int], y_score: Dict[str, float]):
66 |     precision, recall, _ = precision_recall_curve(y_true, y_score)
67 |     pr_auc = auc(recall, precision)
68 |     return pr_auc
69 | 
70 | 
71 | # USED
72 | def compute_roc_aucs_single_loss(y_true: List[int], y_score: List[float]):
73 |     fpr, tpr, _ = roc_curve(y_true, y_score)
74 |     roc_auc = auc(fpr, tpr)
75 |     return roc_auc
76 | 


--------------------------------------------------------------------------------
/code/Latest/WRN/oe_wrn_dataset.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import pandas as pd
 3 | import torchvision
 4 | from rich.traceback import install
 5 | from torch.utils.data import Dataset
 6 | 
 7 | install()
 8 | 
 9 | 
10 | class OEWRNImagesDataset(Dataset):
11 |     def __init__(self,
12 |                  root_dir: str,
13 |                  transform,
14 |                  env_labels_csv: str,
15 |                  ):
16 |         list_files = sorted(glob.glob(root_dir + "/*"))
17 |         assert len(list_files) != 0, "Error in loading frames"
18 |         self.frames = list_files
19 |         self.transform = transform
20 |         self.labels = pd.read_csv(env_labels_csv)["env"].values
21 | 
22 |     def __len__(self):
23 |         return len(self.frames)
24 | 
25 |     def __getitem__(self, idx):
26 |         pt_image = torchvision.io.read_image(self.frames[idx]).float() / 255.0
27 |         final_image = self.transform(pt_image)
28 |         return final_image, self.labels[idx]
29 | 
30 | 
31 | class OEWRNImagesOutlierset(Dataset):
32 |     def __init__(
33 |             self,
34 |             files_path: str,
35 |             label_csv_path: str,
36 |             transform,
37 |             required_dataset_size=0,
38 |     ):
39 |         list_files = sorted(glob.glob(files_path + "/*"))
40 |         assert len(list_files) != 0, "Error in loading frames"
41 |         self.label_df = pd.read_csv(label_csv_path)["label"].values
42 |         self.frames = list_files
43 |         self.transform = transform
44 |         all_idxs = [i for i in range(len(self.frames))]
45 |         self.available_frames_idx = all_idxs
46 | 
47 |         if len(self.available_frames_idx) < required_dataset_size:
48 |             self.len_dataset = required_dataset_size
49 |         else:
50 |             self.len_dataset = len(self.available_frames_idx)
51 | 
52 |     def __len__(self):
53 |         return self.len_dataset
54 | 
55 |     def __getitem__(self, idx):
56 |         idx = idx % len(self.available_frames_idx)
57 |         sel_id = self.available_frames_idx[idx]
58 |         pt_image = torchvision.io.read_image(self.frames[idx]).float() / 255.0
59 |         final_image = self.transform(pt_image)
60 |         return final_image, self.label_df[sel_id]
61 | 
62 | 
63 | class OEWRNImagesTestset(Dataset):
64 |     def __init__(
65 |             self,
66 |             test_set_path: str,
67 |             label_csv_path: str,
68 |             transform,
69 |     ):
70 |         list_files = sorted(glob.glob(test_set_path + "/*"))
71 |         assert len(list_files) != 0, "Error in loading frames"
72 |         self.labels = pd.read_csv(label_csv_path)["label"].values
73 |         self.frames = list_files
74 |         self.transform = transform
75 | 
76 |     def __len__(self):
77 |         return len(self.frames)
78 | 
79 |     def __getitem__(self, idx):
80 |         # we read a pil image
81 |         sel_id = idx
82 |         pt_image = torchvision.io.read_image(self.frames[idx]).float() / 255.0
83 |         final_image = self.transform(pt_image)
84 |         return final_image, self.labels[sel_id]
85 | 


--------------------------------------------------------------------------------
/code/OLD_CODE/paper_utils/artifacts_util.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import pickle
 3 | from typing import DefaultDict, List, Tuple
 4 | 
 5 | import pandas as pd
 6 | import torch
 7 | from torchinfo import summary
 8 | 
 9 | from .check_create_folder import check_create_folder
10 | from ..model.autoencoder import AE
11 | 
12 | 
13 | def uniformed_model_artifact_saver(
14 |         batch_size: int,
15 |         bottleneck: int,
16 |         epochs: int,
17 |         layer_1_ft: int,
18 |         lr: float,
19 |         metrics: DefaultDict[str, float],
20 |         test_set_df: pd.DataFrame,
21 |         losses_list: List[str],
22 |         model: AE,
23 |         image_channels: int,
24 |         image_size: Tuple[int, int],
25 |         model_save_folder: str,
26 |         ml_flow_run_id: str,
27 |         id_optimized_loss: int,
28 |         param: argparse.ArgumentParser,
29 |         csv_row: DefaultDict,
30 |         csv_key: str,
31 |         train_sample_batch=None,
32 |         test_sample_batch=None,
33 | ):
34 |     check_create_folder(model_save_folder)
35 |     torch.save(model.state_dict(), model_save_folder + f'{csv_key}_last.pth')
36 | 
37 |     with open(model_save_folder + 'train_info.txt', 'w') as txf:
38 |         if ml_flow_run_id is not None:
39 |             txf.write(f"model type: ae\n"
40 |                       f"lr={lr}\n"
41 |                       f"batch_size={batch_size}\n"
42 |                       f"epochs={epochs}\n"
43 |                       f"bottleneck={bottleneck}\n"
44 |                       f"first_layer_channels={layer_1_ft}\n"
45 |                       f"loss optimized={losses_list[id_optimized_loss]}\n"
46 |                       f"mlflow_run_id = {ml_flow_run_id}")
47 |         else:
48 |             txf.write(f"model type: ae\n"
49 |                       f"lr={lr}\n"
50 |                       f"batch_size={batch_size}\n"
51 |                       f"epochs={epochs}\n"
52 |                       f"bottleneck={bottleneck}\n"
53 |                       f"first_layer_channels={layer_1_ft}\n"
54 |                       f"loss optimized={losses_list[id_optimized_loss]}")
55 | 
56 |     model.eval()
57 |     summ = summary(model, input_size=(1, image_channels, image_size[0], image_size[1]), device="cpu", depth=4,
58 |                    col_names=["output_size", "kernel_size", "num_params"])
59 | 
60 |     with open(model_save_folder + 'params.pk', 'wb') as fp:
61 |         pickle.dump(param, fp)
62 | 
63 |     with open(model_save_folder + 'metrics.pk', 'wb') as fp:
64 |         pickle.dump(metrics, fp)
65 | 
66 |     with open(model_save_folder + 'model_summary.txt', 'w') as txf:
67 |         txf.write(f"{summ}")
68 | 
69 |     with open(model_save_folder + 'train_sample_batch.pk', "wb") as fp:
70 |         pickle.dump(train_sample_batch, fp)
71 |     with open(model_save_folder + 'test_sample_batch.pk', "wb") as fp:
72 |         pickle.dump(test_sample_batch, fp)
73 |     with open(model_save_folder + f'{csv_key}.pk', "wb") as fp:
74 |         pickle.dump(csv_row, fp)
75 | 
76 |     test_set_df.to_csv(model_save_folder + f'{csv_key}_test_set_df.csv')
77 |     print(f"artifacts saved at {model_save_folder}")
78 | 


--------------------------------------------------------------------------------
/code/Latest/README.md:
--------------------------------------------------------------------------------
 1 | # Software Description
 2 | 
 3 | # Install
 4 | Install Python 3.8 on your machine. We suggest Ubuntu 18.04 or 20.04.
 5 | 
 6 | Create an env with your virtual environment of choice. We suggest `venv` or `anaconda`.
 7 | 
 8 | Install the requirements present in `requirements.txt` present in this folder.
 9 | 
10 | # Usage
11 | 
12 | ## Training 
13 | ### Autoencoder
14 | For this approach, refer to the files present in the folder `Autoencoder`
15 | 
16 | To train a new model, in your virtual environment, use the command `Python3.8 train_autoencoder.py -r root_path`. 
17 | Use the other available argument flags to set a value (different from the default) for the batch_size, bottleneck size, GPU, and the number of workers. 
18 | This command will train a model and save it under `{root_path}/data/autoencoder/saves/{model_key}/` as a `{model_key}_last.pth`.
19 | 
20 | Use the saved model for:
21 | 1. inference using the autoencoder 
22 | 2. training and inference with Real NVP
23 | 
24 | 
25 | ### Real-NVP
26 | 
27 | First, train an Autoencoder with bottleneck size 128. Then, use the script `Autoencoder/create_embeddings.py` to generate the embeddings needed to train the Real NVP model.
28 | Notice that the autoencoder used has to produce 128 size embeddings so if the model save has a bottleneck of a
29 | different size, please train a new one and use that one.
30 | Check the script additional arguments flags.
31 | The script will generate under `{root_path}/data/autoencoder/saves/embeddings_{time_string}` multiple `*_embs_dict.pk` files that have to be moved under
32 | `{root_path}/data/embeddings/`.
33 | 
34 | The next script is located under`RealNVP`.
35 | 
36 | With these embeddings it is possible to run `Python3.8 train_real_nvp.py -r root_path`.
37 | Use the other arguments to change the batch_size, number of workers, and computing device
38 | This script will save under `{root_path}/data/rnvp/saves/{model_key}/checkpoints/` the best models as `model_{model_key}_epoch_{epoch}.pth`.
39 | 
40 | Note: the Real-NVP code is based on the one available from https://github.com/ispamm/realnvp-demo-pytorch
41 | 
42 | ## Outlier Exposure
43 | The process is similar to the one of Real-NVP. 
44 | 
45 | First, train an Autoencoder with bottleneck size 128. Then, use the script `Autoencoder/create_embeddings.py` to generate the embeddings needed to train the Real NVP model.
46 | Notice that the autoencoder used has to produce 128 size embeddings so if the model save has a bottleneck of a
47 | different size, please train a new one and use that one.
48 | Check the script additional arguments flags.
49 | The script will generate under `{root_path}/data/autoencoder/saves/embeddings_{time_string}` multiple `*_embs_dict.pk` files that have to be moved under
50 | `{root_path}/data/embeddings/`.
51 | 
52 | The next script is located under`Outlier_exposure`.
53 | 
54 | With these embeddings it is possible to run `Python3.8 train_real_nvp_with_outlier_exposure.py -r root_path`.
55 | Use the other arguments to change the batch_size, number of workers, and computing device
56 | This script will save under `{root_path}/data/rnvp/saves/{model_key}/checkpoints/` the best models as `model_{model_key}_epoch_{epoch}.pth`.
57 | 


--------------------------------------------------------------------------------
/code/Latest/RealNVP/real_nvp_dataset.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | from torch.utils.data import Dataset
 4 | 
 5 | 
 6 | class OEEmbeddingsDatasetOnlyAnomalies(Dataset):
 7 |     def __init__(
 8 |             self,
 9 |             file_path: str,
10 |     ):
11 |         with open(file_path, "rb") as pkf:
12 |             data = pickle.load(pkf)
13 |         self.embs = data["embeddings"]
14 |         self.labels = data["labels"]
15 | 
16 |         an_ids = []
17 |         for i, x in enumerate(self.labels):
18 |             if x == 1:
19 |                 an_ids.append(i)
20 |         self.an_ids = an_ids
21 | 
22 |     def __len__(self):
23 |         return len(self.an_ids)
24 | 
25 |     def __getitem__(self, idx):
26 |         set_id = self.an_ids[idx]
27 |         return self.embs[set_id][0], self.labels[set_id]
28 | 
29 | 
30 | class OEEmbeddingsDataset(Dataset):
31 |     def __init__(
32 |             self,
33 |             file_path: str,
34 |     ):
35 |         with open(file_path, "rb") as pkf:
36 |             data = pickle.load(pkf)
37 |         self.embs = data["embeddings"]
38 |         self.frame_paths = data["frame_paths"]
39 | 
40 |     def __len__(self):
41 |         return len(self.embs)
42 | 
43 |     def __getitem__(self, idx):
44 |         return self.embs[idx][0], self.frame_paths[idx]
45 | 
46 | 
47 | class OEEmbeddingsOutliersSet(Dataset):
48 |     def __init__(
49 |             self,
50 |             file_path: str,
51 |             required_dataset_size:int,
52 |     ):
53 |         with open(file_path, "rb") as pkf:
54 |             data = pickle.load(pkf)
55 |         self.embs = data["embeddings"]
56 |         self.frame_paths = data["frame_paths"]
57 |         self.labels = data["labels"]
58 |         all_idxs = [i for i in range(len(self.embs))]
59 |         self.available_frames_idx = all_idxs
60 |         if len(self.available_frames_idx) < required_dataset_size:
61 |             #  if the available frames are less than the required frames the dataset is falsely increased
62 |             self.len_dataset = required_dataset_size
63 |         else:
64 |             self.len_dataset = len(self.available_frames_idx)
65 | 
66 |     def __len__(self):
67 |         return self.len_dataset
68 | 
69 |     def __getitem__(self, idx):
70 |         # this allows resampling since len_dataset could be larger that the real number of frames
71 |         idx = idx % len(self.available_frames_idx)
72 |         sel_id = self.available_frames_idx[idx]
73 |         label = self.labels[sel_id]
74 |         return self.embs[sel_id][0], label, self.frame_paths[sel_id]
75 | 
76 | 
77 | class OEEmbeddingsTestSet(Dataset):
78 |     def __init__(
79 |             self,
80 |             file_path: str,
81 |     ):
82 |         with open(file_path, "rb") as pkf:
83 |             data = pickle.load(pkf)
84 |         self.embs = data["embeddings"]
85 |         self.frame_paths = data["frame_paths"]
86 |         self.labels = data["labels"]
87 | 
88 |     def __len__(self):
89 |         return len(self.embs)
90 | 
91 |     def __getitem__(self, idx):
92 |         label = self.labels[idx]
93 |         emb = self.embs[idx][0]
94 |         frame_path = self.frame_paths[idx]
95 |         return emb, label, frame_path
96 | 


--------------------------------------------------------------------------------
/code/OLD_CODE/paper_utils/metrics_util.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Dict
 2 | 
 3 | import pandas as pd
 4 | from sklearn.metrics import auc, precision_recall_curve, roc_curve
 5 | 
 6 | 
 7 | def uniformed_model_group_losses_by_group_method_and_loss_name(df_losses, loss_name, grouping_method):
 8 |     if grouping_method == "q99":
 9 |         return df_losses[["frame_id", f"{loss_name}_loss"]].groupby("frame_id").quantile(q=0.99).values
10 |     elif grouping_method == "mean":
11 |         return df_losses[["frame_id", f"{loss_name}_loss"]].groupby("frame_id").mean().values
12 |     elif grouping_method == "std":
13 |         return df_losses[["frame_id", f"{loss_name}_loss"]].groupby("frame_id").std(ddof=0).values
14 |     else:
15 |         raise Exception("Grouping method not found")
16 | 
17 | 
18 | def compute_uniformed_model_metrics(
19 |         labels: List[int],
20 |         list_losses: List[str],
21 |         df_losses: pd.DataFrame,
22 |         stdev=False,
23 | ):
24 |     # this code produces the q99 and mean of N patches X ROC auc and PR AUC X mse, mae = 8 metrics
25 | 
26 |     y_test = labels
27 | 
28 |     # q99
29 | 
30 |     quant_test_set_losses = {
31 |         list_losses[0]: uniformed_model_group_losses_by_group_method_and_loss_name(df_losses, list_losses[0], "q99"),
32 |         list_losses[1]: uniformed_model_group_losses_by_group_method_and_loss_name(df_losses, list_losses[1], "q99"),
33 |     }
34 |     q99_pr_auc = compute_pr_aucs(y_test, list_losses, quant_test_set_losses)
35 |     q99_roc_auc = compute_roc_aucs(y_test, list_losses, quant_test_set_losses)
36 | 
37 |     # mean
38 |     mean_test_set_losses = {
39 |         list_losses[0]: uniformed_model_group_losses_by_group_method_and_loss_name(df_losses, list_losses[0], "mean"),
40 |         list_losses[1]: uniformed_model_group_losses_by_group_method_and_loss_name(df_losses, list_losses[1], "mean"),
41 |     }
42 |     mean_pr_auc = compute_pr_aucs(y_test, list_losses, mean_test_set_losses)
43 |     mean_roc_auc = compute_roc_aucs(y_test, list_losses, mean_test_set_losses)
44 | 
45 |     if not stdev:
46 |         # composing return dict
47 |         return_dict = {
48 |             "q99_roc_auc": q99_roc_auc,
49 |             "q99_pr_auc": q99_pr_auc,
50 |             "mean_roc_auc": mean_roc_auc,
51 |             "mean_pr_auc": mean_pr_auc,
52 |         }
53 |     else:
54 |         # std
55 |         std_test_set_losses = {
56 |             list_losses[0]: uniformed_model_group_losses_by_group_method_and_loss_name(df_losses, "mse", "std"),
57 |             list_losses[1]: uniformed_model_group_losses_by_group_method_and_loss_name(df_losses, "mae", "std"),
58 |         }
59 |         std_pr_auc = compute_pr_aucs(y_test, list_losses, std_test_set_losses)
60 |         std_roc_auc = compute_roc_aucs(y_test, list_losses, std_test_set_losses)
61 | 
62 |         # composing return dict
63 |         return_dict = {
64 |             "q99_roc_auc": q99_roc_auc,
65 |             "q99_pr_auc": q99_pr_auc,
66 |             "mean_roc_auc": mean_roc_auc,
67 |             "mean_pr_auc": mean_pr_auc,
68 |             "std_roc_auc": std_roc_auc,
69 |             "std_pr_auc": std_pr_auc,
70 |         }
71 |     return return_dict
72 | 
73 | 
74 | def compute_pr_aucs(y_test: List[int], list_losses: List[str], test_set_losses: Dict[str, float]):
75 |     dict_aucs = {}
76 |     for loss in list_losses:
77 |         y_score = test_set_losses[loss]
78 |         precision, recall, _ = precision_recall_curve(y_test, y_score)
79 |         pr_auc = auc(recall, precision)
80 |         dict_aucs[loss] = pr_auc
81 |     return dict_aucs
82 | 
83 | 
84 | def compute_roc_aucs(y_test: List[int], list_losses: List[str], test_set_losses: Dict[str, float]):
85 |     dict_aucs = {}
86 |     for loss in list_losses:
87 |         y_score = test_set_losses[loss]
88 |         fpr, tpr, _ = roc_curve(y_test, y_score)
89 |         roc_auc = auc(fpr, tpr)
90 |         dict_aucs[loss] = roc_auc
91 |     return dict_aucs
92 | 


--------------------------------------------------------------------------------
/code/Latest/models/wrn_oe/wrn.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | 
 7 | class BasicBlock(nn.Module):
 8 |     def __init__(self, in_planes, out_planes, stride, dropRate=0.0):
 9 |         super(BasicBlock, self).__init__()
10 |         self.bn1 = nn.BatchNorm2d(in_planes)
11 |         self.relu1 = nn.ReLU(inplace=True)
12 |         self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
13 |                                padding=1, bias=False)
14 |         self.bn2 = nn.BatchNorm2d(out_planes)
15 |         self.relu2 = nn.ReLU(inplace=True)
16 |         self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1,
17 |                                padding=1, bias=False)
18 |         self.droprate = dropRate
19 |         self.equalInOut = (in_planes == out_planes)
20 |         self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride,
21 |                                                                 padding=0, bias=False) or None
22 | 
23 |     def forward(self, x):
24 |         if not self.equalInOut:
25 |             x = self.relu1(self.bn1(x))
26 |         else:
27 |             out = self.relu1(self.bn1(x))
28 |         if self.equalInOut:
29 |             out = self.relu2(self.bn2(self.conv1(out)))
30 |         else:
31 |             out = self.relu2(self.bn2(self.conv1(x)))
32 |         if self.droprate > 0:
33 |             out = F.dropout(out, p=self.droprate, training=self.training)
34 |         out = self.conv2(out)
35 |         if not self.equalInOut:
36 |             return torch.add(self.convShortcut(x), out)
37 |         else:
38 |             return torch.add(x, out)
39 | 
40 | 
41 | class NetworkBlock(nn.Module):
42 |     def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0):
43 |         super(NetworkBlock, self).__init__()
44 |         self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropRate)
45 | 
46 |     def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropRate):
47 |         layers = []
48 |         for i in range(nb_layers):
49 |             layers.append(block(i == 0 and in_planes or out_planes, out_planes, i == 0 and stride or 1, dropRate))
50 |         return nn.Sequential(*layers)
51 | 
52 |     def forward(self, x):
53 |         return self.layer(x)
54 | 
55 | 
56 | class WideResNet(nn.Module):
57 |     def __init__(self, depth, num_classes, widen_factor=1, dropRate=0.0):
58 |         super(WideResNet, self).__init__()
59 |         nChannels = [16, 16 * widen_factor, 32 * widen_factor, 64 * widen_factor]
60 |         assert ((depth - 4) % 6 == 0)
61 |         n = (depth - 4) // 6
62 |         block = BasicBlock
63 |         # 1st conv before any network block
64 |         self.conv1 = nn.Conv2d(3, nChannels[0], kernel_size=3, stride=1,
65 |                                padding=1, bias=False)
66 |         # 1st block
67 |         self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropRate)
68 |         # 2nd block
69 |         self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, dropRate)
70 |         # 3rd block
71 |         self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, dropRate)
72 |         # global average pooling and classifier
73 |         self.bn1 = nn.BatchNorm2d(nChannels[3])
74 |         self.relu = nn.ReLU(inplace=True)
75 |         self.fc = nn.Linear(nChannels[3], num_classes)
76 |         self.nChannels = nChannels[3]
77 | 
78 |         for m in self.modules():
79 |             if isinstance(m, nn.Conv2d):
80 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
81 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
82 |             elif isinstance(m, nn.BatchNorm2d):
83 |                 m.weight.data.fill_(1)
84 |                 m.bias.data.zero_()
85 |             elif isinstance(m, nn.Linear):
86 |                 m.bias.data.zero_()
87 | 
88 |     def forward(self, x):
89 |         out = self.conv1(x)
90 |         out = self.block1(out)
91 |         out = self.block2(out)
92 |         out = self.block3(out)
93 |         out = self.relu(self.bn1(out))
94 |         out = F.avg_pool2d(out, 16)
95 |         out = out.view(-1, self.nChannels)
96 |         return self.fc(out)
97 | 


--------------------------------------------------------------------------------
/code/OLD_CODE/paper_utils/data_util.py:
--------------------------------------------------------------------------------
  1 | import errno
  2 | import glob
  3 | import os
  4 | 
  5 | import cv2
  6 | from rich.console import Console
  7 | from tqdm import tqdm
  8 | 
  9 | console = Console()
 10 | 
 11 | 
 12 | class DataConverter:
 13 |     def __init__(self, input_path: str, output_path: str):
 14 |         self.input_path = input_path
 15 |         self.output_path = output_path
 16 |         pass
 17 | 
 18 |     @staticmethod
 19 |     def _check_folder(folder_path: str):
 20 |         return os.path.exists(os.path.dirname(folder_path))
 21 | 
 22 |     @staticmethod
 23 |     def _check_create_folder(folder_path: str):
 24 |         if not os.path.exists(os.path.dirname(folder_path)):
 25 |             try:
 26 |                 os.makedirs(os.path.dirname(folder_path))
 27 |             except OSError as exc:  # Guard against race condition
 28 |                 if exc.errno != errno.EEXIST:
 29 |                     raise
 30 | 
 31 | 
 32 | class FrameToFramesRescaler(DataConverter):
 33 |     def __init__(self, input_paths: str, output_path: str, final_resolution_x: int = None,
 34 |                  final_resolution_y: int = None):
 35 |         super().__init__(input_paths, output_path)
 36 |         """
 37 |         Init of the class
 38 |         :param input_paths: paths of the folders containing the frames
 39 |         :param output_path: path of the output folder
 40 |         :param final_resolution_x: optional, use it if you want to resize the frames
 41 |         :param final_resolution_y: optional, use it if you want to resize the frames
 42 |         """
 43 |         # assert type(final_resolution_x) is int, "final_resolution_x is not an integer: %r" % final_resolution_x
 44 |         # assert type(final_resolution_y) is int, "final_resolution_y  is not an integer: %r" % final_resolution_y
 45 |         assert self._check_folder(input_paths), "input paths don't exist"
 46 |         self._input_paths = glob.glob(input_paths + "/*")
 47 |         self._output_path = output_path
 48 |         self._final_resolution = (final_resolution_x, final_resolution_y)
 49 | 
 50 |     def rescale(self):
 51 |         """
 52 |         Call for rescaling frames
 53 |         :return:
 54 |         """
 55 |         for i, input_path in enumerate(self._input_paths):
 56 |             for frame_file in tqdm(glob.glob(input_path + "/*"), desc=f"Rescaling Frames {i}/{len(self._input_paths)}"):
 57 |                 set_name = frame_file.split("/")[-2]
 58 |                 data_path = os.path.join(self._output_path, set_name + "/")
 59 |                 self._check_create_folder(data_path)
 60 |                 self._rescaler_helper(frame_file, data_path)
 61 |         print("Completed")
 62 | 
 63 |     def current_settings(self):
 64 |         """
 65 |         print current settings
 66 |         :return: nothing
 67 |         """
 68 |         print(f"{self._input_paths=}\n{self._output_path=}\n{self._final_resolution=}")
 69 | 
 70 |     def change_resolution(self, new_resolution_x: int, new_resolution_y: int):
 71 |         """
 72 |         change resolution after class init for resizing final frames
 73 |         :param new_resolution_x: final width of the frames
 74 |         :param new_resolution_y: final height of the frames
 75 | 
 76 |         """
 77 |         assert type(new_resolution_x) is int, "new_resolution_x is not an integer: %r" % new_resolution_x
 78 |         assert type(new_resolution_y) is int, "new_resolution_y is not an integer: %r" % new_resolution_y
 79 |         self._final_resolution = (new_resolution_x, new_resolution_y)
 80 | 
 81 |     def change_input_paths(self, new_paths: str):
 82 |         """
 83 |         change input path after class init
 84 |         :param new_paths: new input path
 85 |         """
 86 |         assert self._check_folder(new_paths), "new input path doesn't exists"
 87 |         self._input_paths = new_paths
 88 | 
 89 |     def change_output_path(self, new_path: str):
 90 |         """
 91 |         change output path after class init
 92 |         :param new_path: new output path
 93 |         """
 94 |         self._output_path = new_path
 95 | 
 96 |     def _rescaler_helper(self, file: str, subfolder: str):
 97 |         image = cv2.imread(file)
 98 |         count = file.split("/")[-1]
 99 |         if '_' in count:
100 |             count = count.split("_")[0]
101 | 
102 |         height, width, _ = image.shape
103 |         if self._final_resolution[0] is None or self._final_resolution[1] is None:
104 |             self._final_resolution = (width, height)
105 | 
106 |         if width > self._final_resolution[0]:
107 |             frame = cv2.resize(image, self._final_resolution, fx=0, fy=0, interpolation=cv2.INTER_AREA)
108 |         else:
109 |             frame = image
110 | 
111 |         cv2.imwrite(
112 |             os.path.join(
113 |                 subfolder,
114 |                 f"{count}".zfill(6) +
115 |                 f"_{self._final_resolution[0]}_{self._final_resolution[1]}.jpg"),
116 |             frame)  # save frame as JPEG file
117 | 
118 | 


--------------------------------------------------------------------------------
/code/Latest/Autoencoder/create_embeddings.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from datetime import datetime
  3 | 
  4 | import torch
  5 | from rich.console import Console
  6 | from torch.utils.data import Dataset
  7 | 
  8 | from autoencoder_dataset import OEImagesDatasetForEmbeddings, \
  9 |     OEImagesLabeledSetForEmbs
 10 | from utils.artifacts_util import small_ae_pretrain_model_artifact_saver
 11 | from utils.check_create_folder import check_create_folder
 12 | from utils.hooks import BottleneckEmbeddingExtractor
 13 | from models.autoencoder.autoencoder import AE
 14 | from models.autoencoder.autoencoder_model_functions import \
 15 |     embedding_production, set_model_parameters_for_embedding_creation
 16 | 
 17 | console = Console()
 18 | 
 19 | 
 20 | def params_parser():
 21 |     parser = argparse.ArgumentParser()
 22 |     parser.add_argument("--num_workers",
 23 |                         '-w', type=int, default=4)
 24 |     parser.add_argument('--gpu_number',
 25 |                         '-g', type=int, default=0)
 26 |     parser.add_argument('--input_channels',
 27 |                         '-i', type=int, default=3)
 28 |     parser.add_argument('--root_path',
 29 |                         '-r', type=str, default=".")
 30 |     parser.add_argument('--model_path',
 31 |                         '-p', type=str)
 32 |     param = parser.parse_args()
 33 |     return param
 34 | 
 35 | 
 36 | def main():
 37 |     """
 38 |     if param.gpu_number = -1 it uses the cpu
 39 |     """
 40 |     param = params_parser()
 41 | 
 42 |     console.log(f'Using the following params:{param}')
 43 |     time_string = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
 44 |     image_size = (64, 64)
 45 |     if param.gpu_number == -1:
 46 |         cuda_gpu = f"cpu"
 47 |     else:
 48 |         cuda_gpu = f"cuda:{param.gpu_number}"
 49 |     device = torch.device(cuda_gpu if torch.cuda.is_available() else "cpu")
 50 |     (
 51 |         bottleneck,
 52 |         input_channel,
 53 |         layer_1_ft,
 54 |         widths,
 55 |         workers,
 56 |     ) = set_model_parameters_for_embedding_creation(param)
 57 |     # override bottleneck size for embedding production
 58 |     bottleneck = 128
 59 | 
 60 |     root_path = param.root_path
 61 |     model_path = param.model_path
 62 |     train_set_path = f"{root_path}/data/train_set"
 63 |     validation_set_path = f"{root_path}/data/validation_set"
 64 |     test_set_path = f"{root_path}/data/test_set"
 65 |     test_set_labels_csv = f"{root_path}/data/metadata/frames_labels.csv"
 66 |     outliers_set_path = f"{root_path}/data/outliers_set"
 67 |     outliers_set_labels_csv = f"{root_path}/data/metadata/outliers_frames_labels.csv"
 68 |     model_key = f"embeddings_{time_string}"
 69 |     model_save_folder = f"{root_path}/data/autoencoder/saves/{model_key}"
 70 | 
 71 |     model = AE(widths, image_shape=image_size, bottleneck_size=bottleneck)
 72 |     model.load_state_dict(torch.load(model_path, map_location=device))
 73 |     model.to(device)
 74 | 
 75 |     check_create_folder(model_save_folder)
 76 | 
 77 |     # Create embeddings
 78 |     train_set_for_embs = OEImagesDatasetForEmbeddings(
 79 |         train_set_path,
 80 |     )
 81 |     val_set_for_embs = OEImagesDatasetForEmbeddings(
 82 |         validation_set_path,
 83 |     )
 84 | 
 85 |     train_for_embs_loader = torch.utils.data.DataLoader(
 86 |         train_set_for_embs,
 87 |         batch_size=1,
 88 |         shuffle=False,
 89 |         num_workers=workers,
 90 |         pin_memory=True,
 91 |         drop_last=False,
 92 |     )
 93 | 
 94 |     val_for_embs_loader = torch.utils.data.DataLoader(
 95 |         val_set_for_embs,
 96 |         batch_size=1,
 97 |         shuffle=False,
 98 |         num_workers=workers,
 99 |         pin_memory=True,
100 |         drop_last=False,
101 |     )
102 | 
103 |     outliers_set_for_embs = OEImagesLabeledSetForEmbs(outliers_set_path, outliers_set_labels_csv)
104 |     outliers_set_for_embs_loader = torch.utils.data.DataLoader(
105 |         outliers_set_for_embs,
106 |         batch_size=1,
107 |         shuffle=False,
108 |         num_workers=workers,
109 |         pin_memory=True,
110 |         drop_last=False,
111 |     )
112 |     test_set_for_embs = OEImagesLabeledSetForEmbs(test_set_path, test_set_labels_csv)
113 |     test_for_embs_loader = torch.utils.data.DataLoader(
114 |         test_set_for_embs,
115 |         batch_size=1,
116 |         shuffle=False,
117 |         num_workers=workers,
118 |         pin_memory=True,
119 |         drop_last=False
120 |     )
121 |     extractor_model = BottleneckEmbeddingExtractor(model)
122 |     (
123 |         train_embs_dict,
124 |         oe_embs_dict,
125 |         val_embs_dict,
126 |         test_embs_dict
127 |     ) = embedding_production(
128 |         device,
129 |         extractor_model,
130 |         train_for_embs_loader,
131 |         outliers_set_for_embs_loader,
132 |         test_for_embs_loader,
133 |         val_for_embs_loader,
134 |     )
135 |     console.log("Embeddings Creation Completed. Saving started")
136 | 
137 |     # SAVE STUFF
138 |     artifacts_path = model_save_folder + "/artifacts/"
139 |     small_ae_pretrain_model_artifact_saver(
140 |         artifacts_path,
141 |         train_embs_dict,
142 |         oe_embs_dict,
143 |         val_embs_dict,
144 |         test_embs_dict,
145 |     )
146 |     console.log(f"Script completed, artifacts and embeddings located at {model_save_folder}.")
147 | 
148 | 
149 | if __name__ == "__main__":
150 |     main()
151 | 


--------------------------------------------------------------------------------
/code/OLD_CODE/dataset.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import pickle
  3 | import albumentations as A
  4 | import numpy as np
  5 | import pandas as pd
  6 | import torch
  7 | import torchvision
  8 | from sklearn.feature_extraction.image import extract_patches_2d
  9 | from torch.utils.data import Dataset
 10 | from utils.image_util import torch_standardize_image
 11 | 
 12 | 
 13 | class UniformedPatchesDataset(Dataset):
 14 |     def __init__(self,
 15 |                  root_dir,
 16 |                  patch_shape,
 17 |                  max_patches: int,
 18 |                  aug_flag=False,
 19 |                  noise_flag=False,
 20 |                  transform=None,
 21 |                  noise_path=None,
 22 |                  noise_alpha: int = 60,
 23 |                  noise_p=0.5,
 24 |                  ):
 25 |         self.frames = sorted(glob.glob(root_dir + "/*"))
 26 |         self.aug_flag = aug_flag
 27 |         self.transform = transform
 28 |         self.noise_path = noise_path
 29 |         self.patch_shape = patch_shape
 30 | 
 31 |         self.noise_flag = noise_flag
 32 |         self.noise_alpha = noise_alpha
 33 |         self.noise_p = noise_p
 34 |         self.max_patches = max_patches
 35 |         if noise_flag:
 36 |             self.available_noises = len(glob.glob(noise_path + "/*"))
 37 |         else:
 38 |             self.available_noises = 0
 39 | 
 40 |     def __len__(self):
 41 |         return len(self.frames)
 42 | 
 43 |     def __getitem__(self, idx):
 44 | 
 45 |         pt_image = torchvision.io.read_image(self.frames[idx]).numpy()
 46 |         image = np.transpose(pt_image, (1, 2, 0))
 47 |         # we apply augmentations and apply standardization.
 48 |         if self.aug_flag and self.transform is not None:
 49 |             aug_image = self.transform(image=image)["image"]
 50 |             if self.noise_flag and np.random.rand() <= self.noise_p:
 51 |                 int_image = self.add_noise_to_image(aug_image)
 52 |             else:
 53 |                 int_image = aug_image
 54 |         else:
 55 |             int_image = image
 56 |         torch__float = torch.from_numpy(np.transpose(int_image, (2, 0, 1))).float()
 57 |         numpy__float = torch_standardize_image(torch__float).numpy()
 58 |         final_image = np.transpose(numpy__float, (1, 2, 0))
 59 |         # then extract the patches
 60 |         if final_image.shape[:-1] == self.patch_shape:
 61 |             # we convert the patches to tensor
 62 |             patches = torch.tensor(final_image).unsqueeze(0).permute(0, 3, 1, 2).contiguous()
 63 | 
 64 |         else:
 65 |             np_patch = extract_patches_2d(final_image, self.patch_shape, max_patches=self.max_patches)
 66 |             # we convert the patches to tensor
 67 |             patches = torch.tensor(np_patch).permute(0, 3, 1, 2).contiguous()
 68 | 
 69 |         return patches
 70 | 
 71 |     def add_noise_to_image(self, aug_image):
 72 |         noise_id = np.random.randint(self.available_noises)
 73 |         with open(self.noise_path + f"/noise_{noise_id}.pk", "rb") as pk_file:
 74 |             noise = pickle.load(pk_file)
 75 |         x_crop = np.random.randint(1000 - aug_image.shape[1])
 76 |         y_crop = np.random.randint(1000 - aug_image.shape[0])
 77 |         crop_noise = A.Crop(
 78 |             x_min=x_crop,
 79 |             y_min=y_crop,
 80 |             x_max=x_crop + aug_image.shape[1],
 81 |             y_max=y_crop + aug_image.shape[0],
 82 |             always_apply=True, p=1.0,
 83 |         )(image=noise)["image"]
 84 |         noise_rand_alpha = np.random.randint(low=self.noise_alpha, high=100) / 100.0
 85 |         final_image = aug_image / 255 + (crop_noise * (1.0 - noise_rand_alpha))
 86 |         return final_image
 87 | 
 88 | 
 89 | class UniformedPatchesTestset(Dataset):
 90 |     def __init__(
 91 |             self,
 92 |             root_dir: str,
 93 |             patch_shape,
 94 |             label_csv: str,
 95 |             max_patches: int = 128,
 96 |     ):
 97 |         self.frames = sorted(glob.glob(root_dir + "/*"))
 98 |         self.labels = pd.read_csv(label_csv)
 99 |         self.max_patches = max_patches
100 |         self.patch_shape = patch_shape
101 | 
102 |     def __len__(self):
103 |         return len(self.frames)
104 | 
105 |     def __getitem__(self, idx):
106 |         # we read a pil image
107 |         pt_image = torch_standardize_image(torchvision.io.read_image(self.frames[idx]).float()).numpy()
108 |         final_image = np.transpose(pt_image, (1, 2, 0))
109 |         # then extract the patches
110 |         if final_image.shape[:-1] == self.patch_shape:
111 |             # we convert the patches to tensor
112 |             patches = torch.tensor(final_image).unsqueeze(0).permute(0, 3, 1, 2).contiguous()
113 |         else:
114 |             np_patch = extract_patches_2d(final_image, self.patch_shape, max_patches=self.max_patches)
115 |             # we convert the patches to tensor
116 |             patches = torch.tensor(np_patch).permute(0, 3, 1, 2).contiguous()
117 |         sample = {'patches': patches, 'label': self.labels["label"].values[idx], "frame_id": idx}
118 |         return sample
119 | 
120 | 
121 | class UniformedPatchesDatasetAndPath(Dataset):
122 |     def __init__(self, root_dir, limit=False):
123 |         if limit:
124 |             self.frames = glob.glob(root_dir + "/*")[:3000]
125 |         else:
126 |             self.frames = sorted(glob.glob(root_dir + "/*"))
127 | 
128 |     def __len__(self):
129 |         return len(self.frames)
130 | 
131 |     def __getitem__(self, idx):
132 |         image = torchvision.io.read_image(self.frames[idx]).float()
133 | 
134 |         return image, self.frames[idx]
135 | 


--------------------------------------------------------------------------------
/code/Latest/models/binary_classifier/bin_class_model_functions.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | from collections import defaultdict
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | import torch.nn.functional as F
  8 | from tqdm import tqdm
  9 | 
 10 | from utils.check_create_folder import check_create_folder
 11 | 
 12 | logging.basicConfig(level=logging.INFO)
 13 | 
 14 | 
 15 | # USED
 16 | def set_bin_class_parameters(
 17 |         param: argparse.ArgumentParser,
 18 | ):
 19 |     epochs = 500
 20 |     batch_size = param.batch_size
 21 |     embedding_size = 128
 22 |     workers = param.num_workers
 23 |     lr = 0.001
 24 |     max_patience = int(np.log2(epochs)) + 2
 25 |     outlier_batch_size = int(batch_size * 0.1)
 26 |     return batch_size, embedding_size, epochs, lr, workers, max_patience, outlier_batch_size
 27 | 
 28 | 
 29 | # USED
 30 | def epoch_loop_bin_class(device: torch.device,
 31 |                          epochs: int,
 32 |                          model,
 33 |                          optimizer,
 34 |                          scheduler,
 35 |                          param: argparse.ArgumentParser,
 36 |                          normal_train_loader: torch.utils.data.DataLoader,
 37 |                          anomalies_train_loader: torch.utils.data.DataLoader,
 38 |                          val_loader: torch.utils.data.DataLoader,
 39 |                          save_folder: str,
 40 |                          model_key: str,
 41 |                          ):
 42 |     # Epoch Loop
 43 |     logging.info('Training started')
 44 | 
 45 |     etqdm = tqdm(range(epochs), total=epochs, postfix="Training")
 46 |     best_loss = None
 47 |     best_epoch = 0
 48 |     best_model_path = None
 49 |     metrics = defaultdict(list)
 50 |     for epoch in etqdm:
 51 |         model, train_loss = trainer_bin_class(
 52 |             device,
 53 |             model,
 54 |             optimizer,
 55 |             normal_train_loader,
 56 |             anomalies_train_loader,
 57 |         )
 58 |         metrics[f'train_loss'].append(train_loss)
 59 |         val_loss = validator(val_loader, model, device)
 60 |         scheduler.step(val_loss)
 61 |         metrics[f'val_loss'].append(val_loss)
 62 |         if best_loss is None:
 63 |             best_epoch, best_loss, best_model_path = best_epoch_saver(epoch, model, param,
 64 |                                                                       save_folder, val_loss, model_key)
 65 |         if val_loss < best_loss:
 66 |             best_epoch, best_loss, best_model_path = best_epoch_saver(epoch, model, param,
 67 |                                                                       save_folder, val_loss, model_key)
 68 | 
 69 |         etqdm.set_description(
 70 |             f"Train loss: {train_loss:.3f} | Val loss: {val_loss:.3f} | best model @ epoch {best_epoch}")
 71 |     logging.info('Finished training.')
 72 |     return metrics, best_model_path
 73 | 
 74 | 
 75 | # USED
 76 | def best_epoch_saver(epoch, model, param, save_folder, val_loss, model_key):
 77 |     best_loss = val_loss
 78 |     best_epoch = epoch
 79 |     checkpoint_folder = save_folder + f'/checkpoints/'
 80 |     check_create_folder(checkpoint_folder)
 81 |     best_model_path = checkpoint_folder + f'model_{model_key}_epoch_{epoch}.pth'
 82 |     torch.save(model.state_dict(), best_model_path)
 83 |     logging.debug(f"Checkpoin model epoch {epoch},saved {best_model_path}")
 84 |     return best_epoch, best_loss, best_model_path
 85 | 
 86 | 
 87 | # USED
 88 | def model_tester(best_model_path, model, device, test_loader):
 89 |     model.load_state_dict(torch.load(best_model_path, map_location=device))
 90 |     model.to(device)
 91 |     model.eval()
 92 |     df_dict = {'z': [], 'label': [], 'loss': []}
 93 |     with torch.no_grad():
 94 |         for data in tqdm(test_loader, total=len(test_loader), postfix="Running Test Set inference"):
 95 |             inputs, label, _ = data
 96 |             inputs = inputs.to(device)
 97 |             label = label.unsqueeze(-1).float().to(device)
 98 |             outputs = model(inputs)
 99 |             loss = F.binary_cross_entropy(outputs, label)
100 |             df_dict["z"].append(outputs.item())
101 |             df_dict["label"].append(label.item())
102 |             df_dict["loss"].append(loss.detach().cpu().item())
103 |     return df_dict
104 | 
105 | 
106 | # USED
107 | def validator(val_loader, model, device):
108 |     with torch.no_grad():
109 |         model.eval()
110 |         losses = []
111 |         for data in val_loader:
112 |             inputs = data[0]
113 |             inputs = inputs.to(device)
114 |             target = torch.zeros(inputs.shape[0]).unsqueeze(-1).to(device)
115 |             outputs = model(inputs)
116 |             loss = F.binary_cross_entropy(outputs, target)
117 |             losses.append(loss.detach().cpu().item())
118 |     return np.mean(losses)
119 | 
120 | 
121 | # USED
122 | def trainer_bin_class(device, model, optimizer, normal_train_loader, anomalies_train_loader):
123 |     model.train()
124 |     losses = []
125 |     for ok_data, an_data in zip(normal_train_loader, anomalies_train_loader):
126 |         # normal data
127 |         ok_inputs, _ = ok_data
128 |         an_inputs, _, _ = an_data
129 |         inputs = torch.cat((ok_inputs, an_inputs))
130 |         ok_t = torch.zeros(ok_inputs.shape[0])
131 |         an_t = torch.ones(an_inputs.shape[0])
132 |         target = torch.cat((ok_t, an_t)).unsqueeze(-1).to(device)
133 |         inputs = inputs.to(device)
134 |         optimizer.zero_grad()
135 |         outputs = model(inputs)
136 |         loss = F.binary_cross_entropy(outputs, target)
137 |         loss.backward()
138 |         optimizer.step()
139 |         losses.append(loss.item())
140 | 
141 |     return model, np.mean(losses)
142 | 


--------------------------------------------------------------------------------
/code/OLD_CODE/model/autoencoder.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torchinfo import summary
  4 | from typing import Tuple, List
  5 | 
  6 | 
  7 | # MODULAR AUTO ENCODER
  8 | class Reshaper(nn.Module):
  9 |     def __init__(self, out_shape: int):
 10 |         super().__init__()
 11 |         self.out_shape = out_shape
 12 | 
 13 |     def forward(self, x: torch.Tensor):
 14 |         return torch.reshape(x, self.out_shape)
 15 | 
 16 | 
 17 | class ConvLeakyRelu(nn.Sequential):
 18 |     def __init__(self, in_features: int, out_features: int, kernel_size: int, **kwargs):
 19 |         super().__init__()
 20 |         padding = kernel_size // 2
 21 |         self.conv = nn.Conv2d(in_features, out_features, bias=True,
 22 |                               padding=padding, kernel_size=kernel_size, **kwargs)
 23 |         self.act = nn.LeakyReLU()
 24 | 
 25 | 
 26 | class ConvSigmoid(nn.Sequential):
 27 |     def __init__(self, in_features: int, out_features: int, kernel_size: int, **kwargs):
 28 |         super().__init__()
 29 |         padding = kernel_size // 2
 30 |         self.conv = nn.Conv2d(in_features, out_features, bias=True,
 31 |                               padding=padding, kernel_size=kernel_size, **kwargs)
 32 |         self.act = nn.Sigmoid()
 33 | 
 34 | 
 35 | class ConvLinear(nn.Sequential):
 36 |     def __init__(self, in_features: int, out_features: int, kernel_size: int, **kwargs):
 37 |         super().__init__()
 38 |         padding = kernel_size // 2
 39 |         self.conv = nn.Conv2d(in_features, out_features, bias=True,
 40 |                               padding=padding, kernel_size=kernel_size, **kwargs)
 41 | 
 42 | 
 43 | class FlatFullFullLeakyReluReshape(nn.Sequential):
 44 |     def __init__(self, image_at_bottleneck: Tuple[int, int], last_layer_f: int, btln_size: int = 128):
 45 |         super().__init__()
 46 |         self.flat1 = nn.Flatten()
 47 |         self.fc1 = nn.Linear(image_at_bottleneck[0] * image_at_bottleneck[1] * last_layer_f, btln_size)
 48 |         self.fc2 = nn.Linear(btln_size, image_at_bottleneck[0] * image_at_bottleneck[1] * last_layer_f)
 49 |         self.act = nn.LeakyReLU()
 50 |         self.resh = Reshaper((-1, last_layer_f, image_at_bottleneck[0], image_at_bottleneck[1]))
 51 | 
 52 | 
 53 | class Up(nn.Sequential):
 54 |     def __init__(self, *args, **kwargs):
 55 |         super().__init__(
 56 |             nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False),
 57 |             ConvLeakyRelu(*args, **kwargs)
 58 |             # ConvBNLeakyRelu(*args, **kwargs)
 59 |         )
 60 | 
 61 | 
 62 | class UpSigmoid(nn.Sequential):
 63 |     def __init__(self, *args, **kwargs):
 64 |         super().__init__(
 65 |             nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False),
 66 |             ConvSigmoid(*args, **kwargs)
 67 |             # ConvBNSigmoid(*args, **kwargs)
 68 |         )
 69 | 
 70 | 
 71 | class UpLinear(nn.Sequential):
 72 |     def __init__(self, *args, **kwargs):
 73 |         super().__init__(
 74 |             nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False),
 75 |             ConvLinear(*args, **kwargs)
 76 |         )
 77 | 
 78 | 
 79 | class Down(nn.Sequential):
 80 |     def __init__(self, *args, **kwargs):
 81 |         super().__init__(
 82 |             ConvLeakyRelu(*args, kernel_size=3, stride=2, **kwargs)
 83 |             # ConvBNLeakyRelu(*args, kernel_size=3, stride=2, **kwargs)
 84 |         )
 85 | 
 86 | 
 87 | class Encoder(nn.Sequential):
 88 |     def __init__(self, widths: List[int], block: nn.Sequential = Down):
 89 |         in_out_widths = zip(widths, widths[1:])
 90 |         super().__init__(
 91 |             *[block(in_f, out_f) for in_f, out_f in in_out_widths]
 92 |         )
 93 | 
 94 | 
 95 | class Bottleneck(nn.Sequential):
 96 |     def __init__(self, *args, block: nn.Sequential = FlatFullFullLeakyReluReshape, **kwargs):
 97 |         super().__init__(
 98 |             block(*args, **kwargs)
 99 |         )
100 | 
101 | 
102 | class Decoder(nn.Sequential):
103 |     def __init__(self, widths: List[int], block: nn.Sequential = Up, final_block: nn.Sequential = UpSigmoid):
104 |         in_out_widths = [el for el in zip(widths, widths[1:])]
105 |         in_ff, out_ff = in_out_widths[-1]
106 |         super().__init__(
107 |             *[block(in_f, out_f, kernel_size=3) for in_f, out_f in in_out_widths[:-1]],
108 |             final_block(in_ff, out_ff, kernel_size=3)
109 |         )
110 | 
111 | 
112 | class AE(nn.Sequential):
113 |     def __init__(self, widths: List[int], image_shape: Tuple[int, int], bottleneck_size: int = 128,
114 |                  down_block: nn.Sequential = Down, up_block: nn.Sequential = Up,
115 |                  bottleneck_block: nn.Sequential = FlatFullFullLeakyReluReshape,
116 |                  final_block: nn.Sequential = UpLinear):
117 |         super().__init__()
118 |         image_at_bottleneck = (image_shape[0] // (2 ** (len(widths) - 1)), image_shape[1] // (2 ** (len(widths) - 1)))
119 |         self.encoder = Encoder(widths, block=down_block)
120 |         self.bottleneck = Bottleneck(btln_size=bottleneck_size, image_at_bottleneck=image_at_bottleneck,
121 |                                      block=bottleneck_block, last_layer_f=widths[-1])
122 |         self.decoder = Decoder(widths[::-1], block=up_block, final_block=final_block)
123 | 
124 | 
125 | if __name__ == "__main__":
126 |     input_channel = 3
127 |     bottleneck = 16
128 |     layer_1_ft = 128
129 |     layer_2_ft = layer_1_ft * 2
130 |     layer_3_ft = layer_1_ft * 2
131 |     layer_4_ft = layer_1_ft * 2 * 2
132 | 
133 |     widths_ = [
134 |         input_channel,
135 |         layer_1_ft,
136 |         layer_2_ft,
137 |         layer_3_ft,
138 |         layer_4_ft,
139 |     ]
140 | 
141 |     model = AE(widths_, image_shape=(64, 64), bottleneck_size=bottleneck)
142 | 
143 |     device_string = "cpu"
144 | 
145 |     device = torch.device(device_string)
146 |     model.to(device)
147 | 
148 |     summary(model, input_size=(4, 3, 64, 64), device=device_string, depth=5,
149 |             col_names=["input_size", "output_size", "num_params"])
150 | 


--------------------------------------------------------------------------------
/code/Latest/Autoencoder/train_autoencoder.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from datetime import datetime
  3 | 
  4 | import albumentations as A
  5 | import torch
  6 | from rich.console import Console
  7 | from torch.optim.lr_scheduler import ReduceLROnPlateau
  8 | from torch.utils.data import Dataset
  9 | 
 10 | from autoencoder_dataset import OEImagesDataset, OEImagesTestset
 11 | from utils.artifacts_util import ae_pretrain_model_artifact_saver
 12 | from utils.check_create_folder import check_create_folder
 13 | from models.autoencoder.autoencoder import AE
 14 | from models.autoencoder.autoencoder_model_functions import epoch_loop_ae_oe, test_loop_oe, \
 15 |     set_model_and_train_parameters_autoencoder, autoencoder_auc_computation_and_logging
 16 | 
 17 | console = Console()
 18 | 
 19 | 
 20 | def params_parser():
 21 |     parser = argparse.ArgumentParser()
 22 |     parser.add_argument("--batch_size",
 23 |                         "-b", type=int, default=320)
 24 |     parser.add_argument("--bottleneck",
 25 |                         "-n", type=int, default=128)
 26 |     parser.add_argument("--num_workers",
 27 |                         '-w', type=int, default=4)
 28 |     parser.add_argument('--gpu_number',
 29 |                         '-g', type=int, default=0)
 30 |     parser.add_argument('--input_channels',
 31 |                         '-i', type=int, default=3)
 32 |     parser.add_argument('--root_path',
 33 |                         '-r', type=str, default=".")
 34 |     param = parser.parse_args()
 35 |     return param
 36 | 
 37 | 
 38 | def main():
 39 |     """
 40 |     if param.gpu_number = -1 it uses the cpu
 41 |     """
 42 |     param = params_parser()
 43 | 
 44 |     console.log(f'Using the following params:{param}')
 45 |     time_string = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
 46 |     image_size = (64, 64)
 47 |     if param.gpu_number == -1:
 48 |         cuda_gpu = f"cpu"
 49 |     else:
 50 |         cuda_gpu = f"cuda:{param.gpu_number}"
 51 |     device = torch.device(cuda_gpu if torch.cuda.is_available() else "cpu")
 52 |     (
 53 |         batch_size,
 54 |         bottleneck,
 55 |         epochs,
 56 |         input_channel,
 57 |         layer_1_ft,
 58 |         lr,
 59 |         max_patience,
 60 |         widths,
 61 |         workers,
 62 |     ) = set_model_and_train_parameters_autoencoder(param)
 63 |     root_path = param.root_path
 64 |     noise_path = f"{root_path}/data/perlin_noise"
 65 |     train_set_path = f"{root_path}/data/train_set"
 66 |     validation_set_path = f"{root_path}/data/validation_set"
 67 |     test_set_path = f"{root_path}/data/test_set"
 68 |     test_set_labels_csv = f"{root_path}/data/metadata/frames_labels.csv"
 69 |     model_key = f"AE_B{bottleneck}F{layer_1_ft}_{time_string}"
 70 |     model_save_folder = f"{root_path}/data/autoencoder/saves/{model_key}"
 71 |     check_create_folder(model_save_folder)
 72 | 
 73 |     model = AE(widths, image_shape=image_size, bottleneck_size=bottleneck)
 74 |     model.to(device)
 75 |     # DATA INIT
 76 |     composed_transform = A.Compose(
 77 |         [
 78 |             A.transforms.HorizontalFlip(p=0.5),
 79 |             A.transforms.RandomBrightnessContrast(brightness_limit=0.1,
 80 |                                                   contrast_limit=0.1,
 81 |                                                   p=0.5),
 82 |             A.RandomSizedCrop(min_max_height=[50, image_size[0]],
 83 |                               height=image_size[0],
 84 |                               width=image_size[1],
 85 |                               p=0.5),
 86 |             A.Rotate(limit=10,
 87 |                      p=0.5),
 88 |         ]
 89 |     )
 90 |     train_set = OEImagesDataset(
 91 |         train_set_path,
 92 |         image_shape=image_size,
 93 |         aug_flag=True,
 94 |         noise_flag=True,
 95 |         transform=composed_transform,
 96 |         noise_path=noise_path,
 97 |     )
 98 |     val_set = OEImagesDataset(validation_set_path,
 99 |                               image_shape=image_size,
100 |                               aug_flag=False,
101 |                               noise_flag=False,
102 |                               )
103 | 
104 |     train_loader = torch.utils.data.DataLoader(
105 |         train_set,
106 |         batch_size=batch_size,
107 |         shuffle=True,
108 |         num_workers=workers,
109 |         pin_memory=True,
110 |         drop_last=False,
111 |     )
112 | 
113 |     with torch.no_grad():
114 |         train_sample_batch = next(iter(train_loader))
115 | 
116 |     val_loader = torch.utils.data.DataLoader(
117 |         val_set,
118 |         batch_size=1,
119 |         shuffle=False,
120 |         num_workers=workers,
121 |         pin_memory=True,
122 |         drop_last=False,
123 |     )
124 | 
125 |     # Optimizer
126 |     optimizer = torch.optim.Adam(model.parameters(), lr=lr)
127 |     scheduler = ReduceLROnPlateau(optimizer, 'min', patience=max_patience, verbose=True)
128 | 
129 |     # TRAIN
130 |     metrics = epoch_loop_ae_oe(device, epochs, model, optimizer, scheduler, train_loader, val_loader)
131 | 
132 |     # TEST
133 |     console.log("Training Completed, Testing Started")
134 |     test_set = OEImagesTestset(test_set_path, test_set_labels_csv)
135 |     test_loader = torch.utils.data.DataLoader(
136 |         test_set,
137 |         batch_size=1,
138 |         shuffle=False,
139 |         num_workers=workers,
140 |         pin_memory=True,
141 |         drop_last=False
142 |     )
143 |     with torch.no_grad():
144 |         test_sample_batch = next(iter(test_loader))
145 | 
146 |     test_losses, test_labels = test_loop_oe(device, model, test_loader)
147 | 
148 |     # Compute AUC for ml flow logging
149 |     autoencoder_auc_computation_and_logging(test_labels, test_losses)
150 |     console.log("Testing Completed. Embedding Creation Started")
151 | 
152 |     # SAVE STUFF
153 |     check_create_folder(model_save_folder)
154 |     console.log("Saving model and artifacts")
155 |     artifacts_path = model_save_folder + "/artifacts/"
156 |     ae_pretrain_model_artifact_saver(
157 |         batch_size,
158 |         bottleneck,
159 |         epochs,
160 |         layer_1_ft,
161 |         lr,
162 |         metrics,
163 |         model,
164 |         input_channel,
165 |         image_size,
166 |         artifacts_path,
167 |         param=param,
168 |         bfs_key=model_key,
169 |         train_sample_batch=train_sample_batch,
170 |         test_sample_batch=test_sample_batch,
171 |     )
172 |     console.log(f"Script completed! Model and artifacts located at:\n{model_save_folder}")
173 | 
174 | 
175 | if __name__ == "__main__":
176 |     main()
177 | 


--------------------------------------------------------------------------------
/code/Latest/models/wrn_oe/wrn_model_functions.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | import pickle
  4 | import shutil
  5 | from typing import DefaultDict
  6 | 
  7 | import numpy as np
  8 | import pandas as pd
  9 | import torch
 10 | from torch.nn import functional as F
 11 | from torchinfo import summary
 12 | from tqdm import tqdm
 13 | 
 14 | from utils.check_create_folder import check_create_folder
 15 | 
 16 | logging.basicConfig(level=logging.INFO)
 17 | 
 18 | 
 19 | def set_wrn_class_parameters(
 20 |         param: argparse.ArgumentParser,
 21 | ):
 22 |     epochs = param.epochs
 23 |     batch_size = param.batch_size
 24 |     workers = param.workers
 25 |     lr = param.learning_rate
 26 |     outlier_batch_size = param.oe_batch_size
 27 |     analysis_number = param.analysis_number
 28 |     subset_size = param.subset_size
 29 |     split_date = param.split_date
 30 |     momentum = param.momentum
 31 |     decay = param.decay
 32 |     layers = param.layers
 33 |     widen_factor = param.widen_factor
 34 |     droprate = param.droprate
 35 |     return batch_size, epochs, lr, workers, momentum, decay, layers, widen_factor, droprate, outlier_batch_size, analysis_number, split_date, subset_size
 36 | 
 37 | 
 38 | def train_wrn(model, train_loader_out, train_loader_in, scheduler, optimizer, device):
 39 |     model.train()  # enter train mode
 40 |     loss_avg = 0.0
 41 |     # start at a random point of the outlier dataset; this induces more randomness without destroying locality
 42 |     train_loader_out.dataset.offset = np.random.randint(len(train_loader_out.dataset))
 43 |     for ok_data, an_data in zip(train_loader_in, train_loader_out):
 44 |         ok_input, target = ok_data
 45 |         oe_input, _ = an_data
 46 |         data = torch.cat((ok_input, oe_input), 0)
 47 |         data = data.to(device)
 48 |         target = target.to(device)
 49 | 
 50 |         # forward
 51 |         x = model(data)
 52 | 
 53 |         # backward
 54 |         optimizer.zero_grad()
 55 | 
 56 |         loss = F.cross_entropy(x[:len(ok_input)], target)
 57 |         # cross-entropy from softmax distribution to uniform distribution
 58 |         loss += 0.5 * -(x[len(ok_input):].mean(1) - torch.logsumexp(x[len(ok_input):], dim=1)).mean()
 59 | 
 60 |         loss.backward()
 61 |         optimizer.step()
 62 |         scheduler.step()
 63 | 
 64 |         # exponential moving average
 65 |         loss_avg = loss_avg * 0.8 + float(loss) * 0.2
 66 |     return model, loss_avg
 67 | 
 68 | 
 69 | def val_wrn(model, data_loader, device):
 70 |     model.eval()
 71 |     loss_avg = 0.0
 72 |     with torch.no_grad():
 73 |         for val_data in data_loader:
 74 |             data, target = val_data
 75 |             data = data.to(device)
 76 |             target = target.to(device)
 77 |             # forward
 78 |             output = model(data)
 79 |             loss = F.cross_entropy(output, target)
 80 | 
 81 |             # test loss average
 82 |             loss_avg += float(loss.data)
 83 |     return np.mean(loss_avg)
 84 | 
 85 | 
 86 | def test_wrn(best_model_path, model, test_loader, device):
 87 |     model.load_state_dict(torch.load(best_model_path, map_location=device))
 88 |     model.to(device)
 89 |     model.eval()
 90 |     loss_avg = 0.0
 91 |     df_dict = {'z': [], 'label': [], 'loss': []}
 92 |     with torch.no_grad():
 93 |         for data in tqdm(test_loader, total=len(test_loader), postfix="Running Test Set inference"):
 94 |             inputs, label = data
 95 |             inputs = inputs.to(device)
 96 |             # forward
 97 |             output = model(inputs)
 98 |             smax = F.softmax(output, dim=1).cpu().numpy()
 99 |             ad_score = -np.max(smax, axis=1)
100 |             df_dict["z"].append(output.data.cpu())
101 |             df_dict["label"].append(label.item())
102 |             df_dict["loss"].append(ad_score[0])
103 |     return df_dict
104 | 
105 | 
106 | def best_epoch_saver(epoch, model, param, save_folder, val_loss, model_key):
107 |     best_loss = val_loss
108 |     best_epoch = epoch
109 |     checkpoint_folder = save_folder + f'/checkpoints/'
110 |     check_create_folder(checkpoint_folder)
111 |     best_model_path = checkpoint_folder + f'model_{model_key}_epoch_{epoch}.pth'
112 |     torch.save(model.state_dict(), best_model_path)
113 |     logging.debug(f"Checkpoin model epoch {epoch},saved {best_model_path}")
114 |     return best_epoch, best_loss, best_model_path
115 | 
116 | 
117 | def cosine_annealing(step, total_steps, lr_max, lr_min):
118 |     return lr_min + (lr_max - lr_min) * 0.5 * (
119 |             1 + np.cos(step / total_steps * np.pi))
120 | 
121 | 
122 | def oe_wrn_artifact_saver(
123 |         batch_size: int,
124 |         epochs: int,
125 |         lr: float,
126 |         momentum,
127 |         decay,
128 |         layers,
129 |         widen_factor,
130 |         droprate,
131 |         metrics: DefaultDict[str, float],
132 |         test_set_df: pd.DataFrame,
133 |         model,
134 |         artifacts_save_folder: str,
135 |         param: argparse.ArgumentParser,
136 |         csv_row: DefaultDict,
137 |         csv_key: str,
138 |         best_model_path: str,
139 | ):
140 |     check_create_folder(artifacts_save_folder)
141 |     with open(artifacts_save_folder + 'train_info.txt', 'w') as txf:
142 |         txf.write(f"model type: ae\n"
143 |                   f"lr={lr}\n"
144 |                   f"momentum={momentum}\n"
145 |                   f"decay={decay}\n"
146 |                   f"layers={layers}\n"
147 |                   f"widen_factor={widen_factor}\n"
148 |                   f"droprate={droprate}\n"
149 |                   f"batch_size={batch_size}\n"
150 |                   f"epochs={epochs}\n"
151 |                   f"split_date = {param.split_date}")
152 |     model.eval()
153 |     summ = summary(model, input_size=(1, 3, 64, 64), device="cpu", depth=4,
154 |                    col_names=["input_size", "output_size", "kernel_size", "num_params"])
155 | 
156 |     with open(artifacts_save_folder + 'params.pk', 'wb') as fp:
157 |         pickle.dump(param, fp)
158 | 
159 |     with open(artifacts_save_folder + 'metrics.pk', 'wb') as fp:
160 |         pickle.dump(metrics, fp)
161 | 
162 |     with open(artifacts_save_folder + 'model_summary.txt', 'w') as txf:
163 |         txf.write(f"{summ}")
164 | 
165 |     pd.DataFrame(csv_row, index=[0]).to_csv(artifacts_save_folder + f'{csv_key}.csv')  # in the other code is from dict
166 | 
167 |     with open(artifacts_save_folder + f'{csv_key}.pk', "wb") as fp:
168 |         pickle.dump(csv_row, fp)
169 | 
170 |     shutil.copy2(best_model_path, artifacts_save_folder)
171 | 
172 |     test_set_df.to_csv(artifacts_save_folder + f'{csv_key}_test_set_df.csv')
173 |     print(f"artifacts saved at {artifacts_save_folder}")
174 | 


--------------------------------------------------------------------------------
/code/Latest/RealNVP/train_real_nvp.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | from datetime import datetime
  4 | 
  5 | import pandas as pd
  6 | import torch
  7 | from rich.console import Console
  8 | from torch.optim.lr_scheduler import ReduceLROnPlateau
  9 | from torch.utils.data import Dataset
 10 | 
 11 | from utils.artifacts_util import oe_rnvp_model_artifact_saver
 12 | from utils.check_create_folder import check_create_folder
 13 | from utils.rnvp_auc_computation_utils import \
 14 |     rnvp_auc_computation_and_logging, per_label_rnvp_metrics
 15 | from utils.variables_util import combined_labels_to_names
 16 | from models.real_nvp.real_nvp_model_functions import \
 17 |     build_network, set_parameters, epoch_loop_oe_normal_rnvp, model_tester
 18 | from real_nvp_dataset import OEEmbeddingsDataset, OEEmbeddingsTestSet
 19 | 
 20 | console = Console()
 21 | 
 22 | logging.basicConfig(level=logging.INFO)
 23 | 
 24 | 
 25 | def params_parser():
 26 |     parser = argparse.ArgumentParser()
 27 |     parser.add_argument("--batch_size",
 28 |                         "-b", type=int, default=2048)
 29 |     parser.add_argument("--num_workers",
 30 |                         '-w', type=int, default=4)
 31 |     parser.add_argument('--gpu_number',
 32 |                         '-g', type=int, default=0)
 33 |     parser.add_argument('--root_path',
 34 |                         '-r', type=str, default=".")
 35 |     param = parser.parse_args()
 36 |     return param
 37 | 
 38 | 
 39 | def main():
 40 |     """
 41 |     if param.gpu_number = -1 it uses the cpu
 42 |     Returns:
 43 |     """
 44 |     # MODEL INIT
 45 |     param = params_parser()
 46 | 
 47 |     console.log(f'Using the following params:{param}')
 48 |     time_string = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
 49 |     if param.gpu_number == -1:
 50 |         cuda_gpu = f"cpu"
 51 |     else:
 52 |         cuda_gpu = f"cuda:{param.gpu_number}"
 53 |     device = torch.device(cuda_gpu if torch.cuda.is_available() else "cpu")
 54 |     print(device)
 55 |     (
 56 |         batch_size,
 57 |         embedding_size,
 58 |         epochs,
 59 |         mask_type,
 60 |         n_layers,
 61 |         lr,
 62 |         coupling_topology,
 63 |         workers,
 64 |         max_patience
 65 |     ) = set_parameters(
 66 |         param
 67 |     )
 68 |     root_path = param.root_path
 69 | 
 70 |     test_set_path = f"{root_path}/data/embeddings/test_embs_dict.pk"
 71 |     train_set_path = f"{root_path}/data/embeddings/train_embs_dict.pk"
 72 |     validation_set_path = f"{root_path}/data/embeddings/val_embs_dict.pk"
 73 |     model_key = f"RNVP_E_{embedding_size}_T_{coupling_topology}_N_{n_layers}_{time_string}"
 74 |     save_folder = f"{root_path}/data/rnvp/saves/{model_key}"
 75 |     model = build_network(embedding_size, coupling_topology, n_layers, mask_type, batch_norm=False)
 76 |     model.to(device)
 77 |     check_create_folder(save_folder)
 78 | 
 79 |     # DATA INIT
 80 |     train_set = OEEmbeddingsDataset(train_set_path)
 81 |     train_loader = torch.utils.data.DataLoader(
 82 |         train_set,
 83 |         batch_size=batch_size,
 84 |         shuffle=True,
 85 |         num_workers=workers,
 86 |         pin_memory=True,
 87 |         drop_last=True
 88 |     )
 89 |     val_set = OEEmbeddingsDataset(validation_set_path)
 90 |     val_loader = torch.utils.data.DataLoader(
 91 |         val_set,
 92 |         batch_size=100,
 93 |         shuffle=False,
 94 |         num_workers=workers,
 95 |         pin_memory=True,
 96 |         drop_last=True
 97 |     )
 98 |     test_set = OEEmbeddingsTestSet(test_set_path)
 99 |     test_loader = torch.utils.data.DataLoader(
100 |         test_set,
101 |         batch_size=1,
102 |         shuffle=False,
103 |         num_workers=workers,
104 |         pin_memory=True,
105 |         drop_last=True
106 |     )
107 | 
108 |     # Optimizer
109 |     optimizer = torch.optim.Adam(model.parameters(), lr=lr)
110 |     scheduler = ReduceLROnPlateau(optimizer, 'min', patience=max_patience, verbose=True)
111 | 
112 |     # TRAIN
113 |     metrics, best_model_path = epoch_loop_oe_normal_rnvp(
114 |         device,
115 |         epochs,
116 |         model,
117 |         optimizer,
118 |         scheduler,
119 |         train_loader,
120 |         val_loader,
121 |         save_folder,
122 |         model_key,
123 |     )
124 | 
125 |     # TEST
126 |     console.log("Training Completed, Testing Started")
127 | 
128 |     df_dict = model_tester(best_model_path, model, device, test_loader)
129 | 
130 |     test_set_df = pd.DataFrame.from_dict(df_dict)
131 |     test_set_df["label"] = pd.to_numeric(test_set_df["label"])
132 | 
133 |     # Compute AUC fro ml flow logging
134 | 
135 |     rnvp_auc_computation_and_logging(test_set_df)
136 |     list_of_labels_in_test_set = list(set(test_set.labels))
137 |     # csv row building
138 |     metrics_dict = {}
139 |     for k in list_of_labels_in_test_set:
140 |         v = combined_labels_to_names[k]
141 |         class_metrics_dict = per_label_rnvp_metrics(test_set_df, k)
142 |         if k == 0:
143 |             v = "all"
144 |             metrics_dict[f"{v}_ok_mean_loss"] = class_metrics_dict["ok_mean_loss"]
145 |             metrics_dict[f"{v}_ok_mean_log_prob"] = class_metrics_dict["ok_mean_log_prob"]
146 |             metrics_dict[f"{v}_ok_mean_log_det_J"] = class_metrics_dict["ok_mean_log_det_J"]
147 |             metrics_dict[f"{v}_ok_mean_l2_norm_of_z"] = class_metrics_dict["ok_mean_l2_norm_of_z"]
148 |         metrics_dict[f"{v}_an_mean_loss"] = class_metrics_dict["an_mean_loss"]
149 |         metrics_dict[f"{v}_an_mean_log_prob"] = class_metrics_dict["an_mean_log_prob"]
150 |         metrics_dict[f"{v}_an_mean_log_det_J"] = class_metrics_dict["an_mean_log_det_J"]
151 |         metrics_dict[f"{v}_an_mean_l2_norm_of_z"] = class_metrics_dict["an_mean_l2_norm_of_z"]
152 |         metrics_dict[f"{v}_roc_auc"] = class_metrics_dict["roc_auc"]
153 |         metrics_dict[f"{v}_pr_auc"] = class_metrics_dict["pr_auc"]
154 | 
155 |     csv_row = {
156 |         **{"model_key": model_key,
157 |            "embedding_size": embedding_size,
158 |            "coupling_topology": coupling_topology,
159 |            "n_layers": n_layers,
160 |            },
161 |         **metrics_dict,
162 |     }
163 |     # SAVE STUFF
164 |     console.log("Testing Completed")
165 |     console.log("Creating and saving Artifacts")
166 |     artifacts_path = save_folder + "/artifacts/"
167 |     oe_rnvp_model_artifact_saver(
168 |         batch_size,
169 |         embedding_size,
170 |         epochs,
171 |         coupling_topology,
172 |         lr,
173 |         metrics,
174 |         test_set_df,
175 |         model,
176 |         artifacts_path,
177 |         param=param,
178 |         csv_row=csv_row,
179 |         csv_key=model_key,
180 |         best_model_path=best_model_path,
181 |     )
182 |     console.log(f"Script completed, artifacts located at {save_folder}.")
183 | 
184 | 
185 | if __name__ == "__main__":
186 |     main()
187 | 


--------------------------------------------------------------------------------
/code/Latest/Autoencoder/autoencoder_dataset.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import pickle
  3 | from typing import List, Tuple, Dict
  4 | 
  5 | import albumentations as A
  6 | import numpy as np
  7 | import pandas as pd
  8 | import torch
  9 | import torchvision
 10 | from rich.traceback import install
 11 | from torch.utils.data import Dataset
 12 | 
 13 | from utils.image_util import torch_standardize_image
 14 | 
 15 | install()
 16 | 
 17 | 
 18 | class OEImagesDataset(Dataset):
 19 |     def __init__(self,
 20 |                  root_dir: str,
 21 |                  image_shape: Tuple[int, int],
 22 |                  aug_flag=False,
 23 |                  noise_flag=False,
 24 |                  transform=None,
 25 |                  noise_path=None,
 26 |                  noise_alpha: int = 60,
 27 |                  noise_p=0.5,
 28 |                  ):
 29 |         list_files = sorted(glob.glob(root_dir + "/*"))
 30 |         assert len(list_files) != 0, "Error in loading frames"
 31 |         self.frames = list_files
 32 |         self.aug_flag = aug_flag
 33 |         self.transform = transform
 34 |         self.noise_path = noise_path
 35 |         self.image_shape = image_shape
 36 |         self.noise_flag = noise_flag
 37 |         self.noise_alpha = noise_alpha
 38 |         self.noise_p = noise_p
 39 |         if noise_flag:
 40 |             self.available_noises = len(glob.glob(noise_path + "/*"))
 41 |         else:
 42 |             self.available_noises = 0
 43 | 
 44 |     def __len__(self):
 45 |         return len(self.frames)
 46 | 
 47 |     def __getitem__(self, idx):
 48 |         pt_image = torchvision.io.read_image(self.frames[idx]).numpy()
 49 |         image = np.transpose(pt_image, (1, 2, 0))
 50 |         # we apply augmentations and apply standardization.
 51 |         if self.aug_flag and self.transform is not None:
 52 |             aug_image = self.transform(image=image)["image"]
 53 |             if self.noise_flag and np.random.rand() <= self.noise_p:
 54 |                 int_image = self.add_noise_to_image(aug_image)
 55 |             else:
 56 |                 int_image = aug_image
 57 |         else:
 58 |             int_image = image
 59 |         torch_float = torch.from_numpy(np.transpose(int_image, (2, 0, 1))).float()
 60 |         final_image = torch_standardize_image(torch_float).contiguous()
 61 |         return final_image
 62 | 
 63 |     def add_noise_to_image(self, aug_image):
 64 |         noise_id = np.random.randint(self.available_noises)
 65 |         with open(self.noise_path + f"/noise_{noise_id}.pk", "rb") as pk_file:
 66 |             noise = pickle.load(pk_file)
 67 |         x_crop = np.random.randint(1000 - aug_image.shape[1])
 68 |         y_crop = np.random.randint(1000 - aug_image.shape[0])
 69 |         crop_noise = A.Crop(
 70 |             x_min=x_crop,
 71 |             y_min=y_crop,
 72 |             x_max=x_crop + aug_image.shape[1],
 73 |             y_max=y_crop + aug_image.shape[0],
 74 |             always_apply=True, p=1.0,
 75 |         )(image=noise)["image"]
 76 |         noise_rand_alpha = np.random.randint(low=self.noise_alpha, high=100) / 100.0
 77 |         final_image = aug_image / 255 + (crop_noise * (1.0 - noise_rand_alpha))
 78 |         return final_image
 79 | 
 80 | 
 81 | class OEImagesDatasetForEmbeddings(Dataset):
 82 |     def __init__(self,
 83 |                  root_dir: str
 84 |                  ):
 85 |         list_files = sorted(glob.glob(root_dir + "/*"))
 86 |         assert len(list_files) != 0, "Error in loading frames"
 87 |         self.frames = list_files
 88 | 
 89 |     def __len__(self):
 90 |         return len(self.frames)
 91 | 
 92 |     def __getitem__(self, idx):
 93 |         frame_path = self.frames[idx]
 94 |         final_image = torch_standardize_image(torchvision.io.read_image(frame_path).float()).contiguous()
 95 |         return final_image, frame_path
 96 | 
 97 | 
 98 | class OEImagesTestset(Dataset):
 99 |     def __init__(
100 |             self,
101 |             test_set_path: str,
102 |             label_csv_path: str
103 |     ):
104 |         list_files = sorted(glob.glob(test_set_path + "/*"))
105 |         assert len(list_files) != 0, "Error in loading frames"
106 |         self.label_df = pd.read_csv(label_csv_path)["label"].values
107 |         self.frames = list_files
108 | 
109 |     def __len__(self):
110 |         return len(self.frames)
111 | 
112 |     def __getitem__(self, idx):
113 |         # we read a pil image
114 |         sel_id = idx
115 |         # sel_id = self.frames[idx]
116 |         final_image = torch_standardize_image(torchvision.io.read_image(self.frames[sel_id]).float()).contiguous()
117 |         return final_image, self.label_df[sel_id]
118 | 
119 | 
120 | class OEImagesLabeledSetForEmbs(Dataset):
121 |     def __init__(
122 |             self,
123 |             root_dir: str,
124 |             csv_path: str
125 |     ):
126 |         list_files = sorted(glob.glob(root_dir + "/*"))
127 |         assert len(list_files) != 0, "Error in loading frames"
128 |         self.label_df = pd.read_csv(csv_path)["label"].values
129 |         self.frames = list_files
130 | 
131 |     def __len__(self):
132 |         return len(self.frames)
133 | 
134 |     def __getitem__(self, idx):
135 |         # we read a pil image
136 |         # sel_id = self.frames[idx]
137 |         sel_id = idx
138 |         final_image = torch_standardize_image(torchvision.io.read_image(self.frames[sel_id]).float()).contiguous()
139 |         sample = {'image': final_image, 'label': self.label_df[sel_id], "frame_path": self.frames[sel_id]}
140 |         return sample
141 | 
142 | 
143 | class OEImagesTestsetForEmbs(Dataset):
144 |     def __init__(
145 |             self,
146 |             dict_root_dir_label_csv: Dict[str, str],
147 |             dataset_ids: List[int],
148 |             oe_flag=False,
149 |     ):
150 |         """
151 |         produces all labels since it is for embeddings production
152 |         Args:
153 |             dict_root_dir_label_csv:
154 |             oe_flag: if true produces only anomalies
155 |         """
156 |         self.frames = []
157 |         self.labels = []
158 |         self.difficulty = []
159 | 
160 |         for dataset_id, (root_dir, label_csv) in zip(dataset_ids, dict_root_dir_label_csv.items()):
161 |             list_files = sorted(glob.glob(root_dir + "/*"))
162 |             assert len(list_files) != 0, "Error in loading frames"
163 |             self.frames = self.frames + list_files
164 |             df = pd.read_csv(label_csv)
165 |             self.labels = self.labels + [f"{dataset_id}-{el}" for el in df["label"].tolist()]
166 |             self.difficulty = self.difficulty + df["difficulty"].tolist()
167 | 
168 |         if oe_flag:
169 |             self.available_frames_idx = [i for i in range(len(self.frames)) if self.labels[i].split('-')[1] != '0']
170 |         else:
171 |             self.available_frames_idx = [i for i in range(len(self.frames))]
172 | 
173 |     def __len__(self):
174 |         return len(self.available_frames_idx)
175 | 
176 |     def __getitem__(self, idx):
177 |         # we read a pil image
178 |         sel_id = self.available_frames_idx[idx]
179 |         frame_path = self.frames[sel_id]
180 |         final_image = torch_standardize_image(torchvision.io.read_image(frame_path).float()).contiguous()
181 |         sample = {'image': final_image, 'label': self.labels[sel_id], "frame_path": frame_path,
182 |                   'difficulty': self.difficulty[sel_id]}
183 |         return sample
184 | 


--------------------------------------------------------------------------------
/code/Latest/models/autoencoder/autoencoder.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from typing import Tuple, List
  4 | 
  5 | 
  6 | # MODULAR AUTO ENCODER
  7 | class Reshaper(nn.Module):
  8 |     def __init__(self, out_shape: int):
  9 |         super().__init__()
 10 |         self.out_shape = out_shape
 11 | 
 12 |     def forward(self, x: torch.Tensor):
 13 |         return torch.reshape(x, self.out_shape)
 14 | 
 15 | 
 16 | class Printer(nn.Module):
 17 |     def forward(self, x: torch.Tensor):
 18 |         print(x.shape)
 19 |         return x
 20 | 
 21 | 
 22 | class ConvLeakyRelu(nn.Sequential):
 23 |     def __init__(self, in_features: int, out_features: int, kernel_size: int, **kwargs):
 24 |         super().__init__()
 25 |         padding = kernel_size // 2
 26 |         self.conv = nn.Conv2d(in_features, out_features, bias=True,
 27 |                               padding=padding, kernel_size=kernel_size, **kwargs)
 28 |         self.act = nn.LeakyReLU()
 29 | 
 30 | 
 31 | class ConvBNLeakyRelu(nn.Sequential):
 32 |     def __init__(self, in_features: int, out_features: int, kernel_size: int, **kwargs):
 33 |         super().__init__()
 34 |         padding = kernel_size // 2
 35 |         self.conv = nn.Conv2d(in_features, out_features, bias=True,
 36 |                               padding=padding, kernel_size=kernel_size, **kwargs)
 37 |         self.batch_norm = nn.BatchNorm2d(out_features, affine=False)
 38 |         self.act = nn.LeakyReLU()
 39 | 
 40 | 
 41 | class ConvELU(nn.Sequential):
 42 |     def __init__(self, in_features: int, out_features: int, kernel_size: int, **kwargs):
 43 |         super().__init__()
 44 |         padding = kernel_size // 2
 45 |         self.conv = nn.Conv2d(in_features, out_features, bias=True,
 46 |                               padding=padding, kernel_size=kernel_size, **kwargs)
 47 |         self.act = nn.ELU()
 48 | 
 49 | 
 50 | class ConvSigmoid(nn.Sequential):
 51 |     def __init__(self, in_features: int, out_features: int, kernel_size: int, **kwargs):
 52 |         super().__init__()
 53 |         padding = kernel_size // 2
 54 |         self.conv = nn.Conv2d(in_features, out_features, bias=True,
 55 |                               padding=padding, kernel_size=kernel_size, **kwargs)
 56 |         self.act = nn.Sigmoid()
 57 | 
 58 | 
 59 | # TODO
 60 | class ConvLinear(nn.Sequential):
 61 |     def __init__(self, in_features: int, out_features: int, kernel_size: int, **kwargs):
 62 |         super().__init__()
 63 |         padding = kernel_size // 2
 64 |         self.conv = nn.Conv2d(in_features, out_features, bias=True,
 65 |                               padding=padding, kernel_size=kernel_size, **kwargs)
 66 |         # self.act = nn.Sigmoid()
 67 | 
 68 | 
 69 | class ConvBNSigmoid(nn.Sequential):
 70 |     def __init__(self, in_features: int, out_features: int, kernel_size: int, **kwargs):
 71 |         super().__init__()
 72 |         padding = kernel_size // 2
 73 |         self.conv = nn.Conv2d(in_features, out_features, bias=True,
 74 |                               padding=padding, kernel_size=kernel_size, **kwargs)
 75 |         self.batch_norm = nn.BatchNorm2d(out_features, affine=False)
 76 |         self.act = nn.Sigmoid()
 77 | 
 78 | 
 79 | class FlatFullFullLeakyReluReshape(nn.Sequential):
 80 |     def __init__(self, image_at_bottleneck: Tuple[int, int], last_layer_f: int, btln_size: int = 128):
 81 |         super().__init__()
 82 |         self.flat1 = nn.Flatten()
 83 |         self.fc1 = nn.Linear(image_at_bottleneck[0] * image_at_bottleneck[1] * last_layer_f, btln_size)
 84 |         self.fc2 = nn.Linear(btln_size, image_at_bottleneck[0] * image_at_bottleneck[1] * last_layer_f)
 85 |         self.act = nn.LeakyReLU()
 86 |         self.resh = Reshaper((-1, last_layer_f, image_at_bottleneck[0], image_at_bottleneck[1]))
 87 | 
 88 | 
 89 | class FlatFullFullELUReshape(nn.Sequential):
 90 |     def __init__(self, image_at_bottleneck: Tuple[int, int], last_layer_f: int, btln_size: int = 128):
 91 |         super().__init__()
 92 |         self.flat1 = nn.Flatten()
 93 |         self.fc1 = nn.Linear(image_at_bottleneck[0] * image_at_bottleneck[1] * last_layer_f, btln_size)
 94 |         self.fc2 = nn.Linear(btln_size, image_at_bottleneck[0] * image_at_bottleneck[1] * last_layer_f)
 95 |         self.act = nn.ELU()
 96 |         self.resh = Reshaper((-1, last_layer_f, image_at_bottleneck[0], image_at_bottleneck[1]))
 97 | 
 98 | 
 99 | class Up(nn.Sequential):
100 |     def __init__(self, *args, **kwargs):
101 |         super().__init__(
102 |             nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False),
103 |             ConvLeakyRelu(*args, **kwargs)
104 |             # ConvBNLeakyRelu(*args, **kwargs)
105 |         )
106 | 
107 | 
108 | class UpSigmoid(nn.Sequential):
109 |     def __init__(self, *args, **kwargs):
110 |         super().__init__(
111 |             nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False),
112 |             ConvSigmoid(*args, **kwargs)
113 |             # ConvBNSigmoid(*args, **kwargs)
114 |         )
115 | 
116 | 
117 | class UpLinear(nn.Sequential):
118 |     def __init__(self, *args, **kwargs):
119 |         super().__init__(
120 |             nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False),
121 |             ConvLinear(*args, **kwargs)
122 |         )
123 | 
124 | 
125 | class Down(nn.Sequential):
126 |     def __init__(self, *args, **kwargs):
127 |         super().__init__(
128 |             ConvLeakyRelu(*args, kernel_size=3, stride=2, **kwargs)
129 |             # ConvBNLeakyRelu(*args, kernel_size=3, stride=2, **kwargs)
130 |         )
131 | 
132 | 
133 | class Encoder(nn.Sequential):
134 |     def __init__(self, widths: List[int], block: nn.Sequential = Down):
135 |         in_out_widths = zip(widths, widths[1:])
136 |         super().__init__(
137 |             *[block(in_f, out_f) for in_f, out_f in in_out_widths]
138 |         )
139 | 
140 | 
141 | class Bottleneck(nn.Sequential):
142 |     def __init__(self, *args, block: nn.Sequential = FlatFullFullLeakyReluReshape, **kwargs):
143 |         super().__init__(
144 |             block(*args, **kwargs)
145 |         )
146 | 
147 | 
148 | class Decoder(nn.Sequential):
149 |     def __init__(self, widths: List[int], block: nn.Sequential = Up, final_block: nn.Sequential = UpSigmoid):
150 |         in_out_widths = [el for el in zip(widths, widths[1:])]
151 |         in_ff, out_ff = in_out_widths[-1]
152 |         super().__init__(
153 |             *[block(in_f, out_f, kernel_size=3) for in_f, out_f in in_out_widths[:-1]],
154 |             final_block(in_ff, out_ff, kernel_size=3)
155 |         )
156 | 
157 | 
158 | class AE(nn.Sequential):
159 |     def __init__(self,
160 |                  widths: List[int],
161 |                  image_shape: Tuple[int, int],
162 |                  bottleneck_size: int = 128,
163 |                  down_block: nn.Sequential = Down,
164 |                  up_block: nn.Sequential = Up,
165 |                  bottleneck_block: nn.Sequential = FlatFullFullLeakyReluReshape,
166 |                  final_block: nn.Sequential = UpLinear):
167 |         super().__init__()
168 |         image_at_bottleneck = (image_shape[0] // (2 ** (len(widths) - 1)), image_shape[1] // (2 ** (len(widths) - 1)))
169 |         self.encoder = Encoder(widths, block=down_block)
170 |         self.bottleneck = Bottleneck(btln_size=bottleneck_size,
171 |                                      image_at_bottleneck=image_at_bottleneck,
172 |                                      block=bottleneck_block,
173 |                                      last_layer_f=widths[-1])
174 |         self.decoder = Decoder(widths[::-1],
175 |                                block=up_block,
176 |                                final_block=final_block)
177 | 


--------------------------------------------------------------------------------
/code/Latest/Outlier_exposure/train_real_nvp_with_outlier_exposure.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | from datetime import datetime
  4 | 
  5 | import pandas as pd
  6 | import torch
  7 | from rich.console import Console
  8 | from torch.optim.lr_scheduler import ReduceLROnPlateau
  9 | from torch.utils.data import Dataset
 10 | 
 11 | from utils.artifacts_util import oe_rnvp_model_artifact_saver
 12 | from utils.check_create_folder import check_create_folder
 13 | from utils.rnvp_auc_computation_utils import \
 14 |     per_label_rnvp_metrics, rnvp_auc_computation_and_logging
 15 | from utils.variables_util import \
 16 |     combined_labels_to_names
 17 | from models.real_nvp.real_nvp_model_functions import \
 18 |     build_network, model_tester, set_oe_parameters, epoch_loop_oe_with_oe_rnvp
 19 | from RealNVP.real_nvp_dataset import OEEmbeddingsDataset, OEEmbeddingsOutliersSet, OEEmbeddingsTestSet
 20 | 
 21 | console = Console()
 22 | logging.basicConfig(level=logging.INFO)
 23 | 
 24 | 
 25 | def params_parser():
 26 |     parser = argparse.ArgumentParser()
 27 |     parser.add_argument("--batch_size",
 28 |                         "-b", type=int, default=2048)
 29 |     parser.add_argument("--num_workers",
 30 |                         '-w', type=int, default=4)
 31 |     parser.add_argument('--gpu_number',
 32 |                         '-g', type=int, default=0)
 33 | 
 34 |     parser.add_argument('--root_path',
 35 |                         '-r', type=str, default=".")
 36 |     param = parser.parse_args()
 37 |     return param
 38 | 
 39 | 
 40 | def main():
 41 |     """
 42 |     if param.gpu_number = -1 it uses the cpu
 43 |     Returns:
 44 |     """
 45 |     # MODEL INIT
 46 | 
 47 |     param = params_parser()
 48 | 
 49 |     console.log(f'Using the following params:{param}')
 50 |     time_string = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
 51 | 
 52 |     if param.gpu_number == -1:
 53 |         cuda_gpu = f"cpu"
 54 |     else:
 55 |         cuda_gpu = f"cuda:{param.gpu_number}"
 56 |     device = torch.device(cuda_gpu if torch.cuda.is_available() else "cpu")
 57 |     print(device)
 58 |     (
 59 |         batch_size,
 60 |         embedding_size,
 61 |         epochs,
 62 |         mask_type,
 63 |         n_layers,
 64 |         lr,
 65 |         coupling_topology,
 66 |         workers,
 67 |         max_patience,
 68 |         gamma,
 69 |         lambda_p,
 70 |         outlier_batch_size
 71 |     ) = set_oe_parameters(
 72 |         param
 73 |     )
 74 |     root_path = param.root_path
 75 | 
 76 |     test_set_path = f"{root_path}/data/embeddings/test_embs_dict.pk"
 77 |     train_set_path = f"{root_path}/data/embeddings/train_embs_dict.pk"
 78 |     validation_set_path = f"{root_path}/data/embeddings/val_embs_dict.pk"
 79 |     outliers_set_path = f"{root_path}/data/embeddings/oe_embs_dict.pk"
 80 |     model_key = f"RNVP_OE_E_{embedding_size}_T_{coupling_topology}_N_{n_layers}_{time_string}"
 81 |     save_folder = f"{root_path}/data/rnvp_oe/saves/{model_key}"
 82 |     model = build_network(embedding_size, coupling_topology, n_layers, mask_type, batch_norm=False)
 83 |     model.to(device)
 84 |     check_create_folder(save_folder)
 85 | 
 86 |     # DATA INIT
 87 |     train_set = OEEmbeddingsDataset(train_set_path)
 88 |     train_loader = torch.utils.data.DataLoader(
 89 |         train_set,
 90 |         batch_size=batch_size,
 91 |         shuffle=True,
 92 |         num_workers=workers,
 93 |         pin_memory=True,
 94 |         drop_last=True
 95 |     )
 96 |     test_set = OEEmbeddingsTestSet(
 97 |         file_path=test_set_path
 98 |     )
 99 | 
100 |     len_train_loader = len(train_loader)
101 |     final_size_oe = outlier_batch_size * len_train_loader
102 | 
103 |     outliers_set = OEEmbeddingsOutliersSet(
104 |         file_path=outliers_set_path,
105 |         required_dataset_size=final_size_oe,
106 |     )
107 | 
108 |     outliers_loader = torch.utils.data.DataLoader(
109 |         outliers_set,
110 |         batch_size=outlier_batch_size,
111 |         shuffle=True,
112 |         num_workers=workers,
113 |         pin_memory=True,
114 |         drop_last=True,
115 |     )
116 |     val_set = OEEmbeddingsDataset(validation_set_path)
117 |     val_loader = torch.utils.data.DataLoader(
118 |         val_set,
119 |         batch_size=100,
120 |         shuffle=False,
121 |         num_workers=workers,
122 |         pin_memory=True,
123 |         drop_last=True
124 |     )
125 |     test_loader = torch.utils.data.DataLoader(
126 |         test_set,
127 |         batch_size=1,
128 |         shuffle=False,
129 |         num_workers=workers,
130 |         pin_memory=True,
131 |         drop_last=True
132 |     )
133 |     # Optimizer
134 |     optimizer = torch.optim.Adam(model.parameters(), lr=lr)
135 |     scheduler = ReduceLROnPlateau(optimizer, 'min', patience=max_patience, verbose=True)
136 | 
137 |     # TRAIN
138 |     metrics, best_model_path = epoch_loop_oe_with_oe_rnvp(
139 |         device,
140 |         epochs,
141 |         model,
142 |         optimizer,
143 |         scheduler,
144 |         train_loader,
145 |         outliers_loader,
146 |         val_loader,
147 |         save_folder,
148 |         model_key,
149 |         gamma,
150 |         lambda_p,
151 |     )
152 | 
153 |     # TEST
154 |     console.log("Training Completed, Testing Started")
155 | 
156 |     df_dict = model_tester(best_model_path, model, device, test_loader)
157 | 
158 |     test_set_df = pd.DataFrame.from_dict(df_dict)
159 |     test_set_df["label"] = pd.to_numeric(test_set_df["label"])
160 | 
161 |     # Compute AUC for ml flow logging
162 |     rnvp_auc_computation_and_logging(test_set_df)
163 |     list_of_labels_in_test_set = list(set(test_set.labels))
164 |     # csv row building
165 |     metrics_dict = {}
166 |     for k in list_of_labels_in_test_set:
167 |         v = combined_labels_to_names[k]
168 |         class_metrics_dict = per_label_rnvp_metrics(test_set_df, k)
169 |         if k == 0:
170 |             v = "all"
171 |             metrics_dict[f"{v}_ok_mean_loss"] = class_metrics_dict["ok_mean_loss"]
172 |             metrics_dict[f"{v}_ok_mean_log_prob"] = class_metrics_dict["ok_mean_log_prob"]
173 |             metrics_dict[f"{v}_ok_mean_log_det_J"] = class_metrics_dict["ok_mean_log_det_J"]
174 |             metrics_dict[f"{v}_ok_mean_l2_norm_of_z"] = class_metrics_dict["ok_mean_l2_norm_of_z"]
175 |         metrics_dict[f"{v}_an_mean_loss"] = class_metrics_dict["an_mean_loss"]
176 |         metrics_dict[f"{v}_an_mean_log_prob"] = class_metrics_dict["an_mean_log_prob"]
177 |         metrics_dict[f"{v}_an_mean_log_det_J"] = class_metrics_dict["an_mean_log_det_J"]
178 |         metrics_dict[f"{v}_an_mean_l2_norm_of_z"] = class_metrics_dict["an_mean_l2_norm_of_z"]
179 |         metrics_dict[f"{v}_roc_auc"] = class_metrics_dict["roc_auc"]
180 |         metrics_dict[f"{v}_pr_auc"] = class_metrics_dict["pr_auc"]
181 | 
182 |     csv_row = {
183 |         **{"model_key": model_key,
184 |            "embedding_size": embedding_size,
185 |            "coupling_topology": coupling_topology,
186 |            "n_layers": n_layers,
187 |            },
188 |         **metrics_dict,
189 |     }
190 | 
191 |     # SAVE STUFF
192 |     console.log("Testing Completed")
193 |     console.log("Creating and saving Artifacts")
194 |     artifacts_path = save_folder + "/artifacts/"
195 |     oe_rnvp_model_artifact_saver(
196 |         batch_size,
197 |         embedding_size,
198 |         epochs,
199 |         coupling_topology,
200 |         lr,
201 |         metrics,
202 |         test_set_df,
203 |         model,
204 |         artifacts_path,
205 |         param=param,
206 |         csv_row=csv_row,
207 |         csv_key=model_key,
208 |         best_model_path=best_model_path,
209 |     )
210 | 
211 |     console.log(f"Script completed, artifacts located at {save_folder}.")
212 | 
213 | 
214 | if __name__ == "__main__":
215 |     main()
216 | 


--------------------------------------------------------------------------------
/code/Latest/Outlier_exposure/oe_rnvp_dataset.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | from random import shuffle
  3 | 
  4 | from torch.utils.data import Dataset
  5 | 
  6 | 
  7 | class OEEmbeddingsDatasetOnlyAnomalies(Dataset):
  8 |     def __init__(
  9 |             self,
 10 |             file_path: str,
 11 |     ):
 12 |         with open(file_path, "rb") as pkf:
 13 |             data = pickle.load(pkf)
 14 |         self.embs = data["embeddings"]
 15 |         self.labels = data["labels"]
 16 | 
 17 |         an_ids = []
 18 |         for i, x in enumerate(self.labels):
 19 |             if x == 1:
 20 |                 an_ids.append(i)
 21 |         self.an_ids = an_ids
 22 | 
 23 |     def __len__(self):
 24 |         return len(self.an_ids)
 25 | 
 26 |     def __getitem__(self, idx):
 27 |         set_id = self.an_ids[idx]
 28 |         return self.embs[set_id][0], self.labels[set_id]
 29 | 
 30 | 
 31 | class OEEmbeddingsDataset(Dataset):
 32 |     def __init__(
 33 |             self,
 34 |             file_path: str,
 35 |     ):
 36 |         with open(file_path, "rb") as pkf:
 37 |             data = pickle.load(pkf)
 38 |         self.embs = data["embeddings"]
 39 |         self.frame_paths = data["frame_paths"]
 40 | 
 41 |     def __len__(self):
 42 |         return len(self.embs)
 43 | 
 44 |     def __getitem__(self, idx):
 45 |         return self.embs[idx][0], self.frame_paths[idx]
 46 | 
 47 | 
 48 | class OEEmbeddingsOutliersSet(Dataset):
 49 |     def __init__(
 50 |             self,
 51 |             file_path: str,
 52 |             number_of_outliers=None,  # only used for analysis 2
 53 |             selected_anomalies=None,
 54 |             analysis_version=0,
 55 |             required_dataset_size=0
 56 |             # This is the size that the dataset should have to fit the normal data sampling and batching
 57 |     ):
 58 |         with open(file_path, "rb") as pkf:
 59 |             data = pickle.load(pkf)
 60 |         self.embs = data["embeddings"]
 61 |         self.frame_paths = data["frame_paths"]
 62 |         self.labels = data["labels"]
 63 |         all_idxs = [i for i in range(len(self.embs))]
 64 |         if analysis_version in [1, 4, 5]:  # use all Outlier set
 65 |             self.available_frames_idx = all_idxs
 66 | 
 67 |         elif analysis_version == 2:  # use a subset of the Outlier set
 68 |             assert number_of_outliers, "Analysis 2 requires a set number of Outliers"
 69 |             shuffle(all_idxs)
 70 |             self.available_frames_idx = all_idxs[:number_of_outliers]
 71 | 
 72 |         elif analysis_version == 3:  # use a subset of the anomaly type availables
 73 |             assert selected_anomalies, "Analysis 3 requires the prior definition of the anomalies to be used"
 74 |             self.selected_anomalies = selected_anomalies
 75 |             self.available_frames_idx = [i for i in all_idxs
 76 |                                          if self.labels[i] in selected_anomalies]
 77 | 
 78 |         else:
 79 |             assert analysis_version in [1, 2, 3, 4, 5], "Analysis code has to be 1, 2, 3, 4, 5"
 80 | 
 81 |         if len(self.available_frames_idx) < required_dataset_size:
 82 |             # this is useful only for analysis 2 when the available frame are less than those required
 83 |             #  if the available frames are less than the required frames the dataset is falsely increased
 84 |             self.len_dataset = required_dataset_size
 85 |         else:
 86 |             self.len_dataset = len(self.available_frames_idx)
 87 | 
 88 |     def __len__(self):
 89 |         return self.len_dataset
 90 | 
 91 |     def __getitem__(self, idx):
 92 |         # this allows resampling since len_dataset could be larger that the real number of frames
 93 |         idx = idx % len(self.available_frames_idx)
 94 |         sel_id = self.available_frames_idx[idx]
 95 |         label = self.labels[sel_id]
 96 |         return self.embs[sel_id][0], label, self.frame_paths[sel_id]
 97 | 
 98 | 
 99 | class OEEmbeddingsTestSet(Dataset):
100 |     def __init__(
101 |             self,
102 |             file_path: str,
103 |     ):
104 |         with open(file_path, "rb") as pkf:
105 |             data = pickle.load(pkf)
106 |         self.embs = data["embeddings"]
107 |         self.frame_paths = data["frame_paths"]
108 |         self.labels = data["labels"]
109 | 
110 |     def __len__(self):
111 |         return len(self.embs)
112 | 
113 |     def __getitem__(self, idx):
114 |         label = self.labels[idx]
115 |         emb = self.embs[idx][0]
116 |         frame_path = self.frame_paths[idx]
117 |         return emb, label, frame_path
118 | 
119 | #
120 | # # keeping old class for reference
121 | # class Old_OEEmbeddingsTestset(Dataset):
122 | #     def __init__(
123 | #             self,
124 | #             file_path: str,
125 | #             only_obvious_anomalies: int,  # will ignore for now since it will alwasys be 1
126 | #             number_oe_anomalies=None,
127 | #             predefined_available_frames=None,
128 | #             oe_flag=False,
129 | #             analysis_version=0,
130 | #             num_oe_sample=0,
131 | #     ):
132 | #         with open(file_path, "rb") as pkf:
133 | #             data = pickle.load(pkf)
134 | #         self.embs = data["embeddings"]
135 | #         self.frame_paths = data["frame_paths"]
136 | #         self.labels = [labels_to_combined_labels[el] for el in data["labels"]]
137 | #         self.difficulty = data["difficulty"]
138 | #         if predefined_available_frames is None:
139 | #             self.available_frames_idx = [i for i in range(len(self.embs))]
140 | #         else:
141 | #             self.available_frames_idx = predefined_available_frames
142 | #         self.only_obvious_anomalies = only_obvious_anomalies
143 | #         self.oe_flag = oe_flag
144 | #         if only_obvious_anomalies:
145 | #             self.available_frames_idx = [i for i in self.available_frames_idx if self.difficulty[i] != 1]
146 | #         if oe_flag:
147 | #             self.available_frames_idx = [i for i in self.available_frames_idx if self.labels[i] != 0]
148 | #
149 | #         if analysis_version == 2:
150 | #             shuffle(self.available_frames_idx)
151 | #             self.available_frames_idx = self.available_frames_idx[:num_oe_sample]
152 | #         self.dataset_len = len(self.available_frames_idx)
153 | #         # dataset len is set before specific_oe_anomalies selection to allow a resampling
154 | #         self.selected_anomalies = None
155 | #         if oe_flag and number_oe_anomalies is not None and analysis_version == 3:  # BUG we are taking in 0s
156 | #             avail_anomalies = list(combined_labels_to_names.keys())[1:]  # here we exclude 0
157 | #             shuffle(avail_anomalies)
158 | #             selected_anomalies = avail_anomalies[:number_oe_anomalies]
159 | #             self.selected_anomalies = selected_anomalies
160 | #             self.available_frames_idx = [i for i in self.available_frames_idx
161 | #                                          if self.labels[i] in selected_anomalies]
162 | #
163 | #     def __len__(self):
164 | #         return self.dataset_len
165 | #
166 | #     def __getitem__(self, idx):
167 | #         # this allows resampling in case of small amount of samples selected since self.dataset_len >= len(self.avalilable_frames_idx)
168 | #         idx = idx % len(self.available_frames_idx)
169 | #         sel_id = self.available_frames_idx[idx]
170 | #         emb_difficulty = self.difficulty[sel_id]
171 | #         label = self.labels[sel_id]
172 | #
173 | #         if self.only_obvious_anomalies:
174 | #             assert emb_difficulty in [0,
175 | #                                       2], "Something is wrong with the OEEmbeddingsTestset class and tricky anomalies are passing"
176 | #         if self.oe_flag:
177 | #             assert label != 0, f"Something is wrong with the OEEmbeddingsTestset class and normal samples are passing {label}"
178 | #         return self.embs[sel_id][0], label, emb_difficulty, self.frame_paths[sel_id]
179 | 


--------------------------------------------------------------------------------
/code/OLD_CODE/model/uniformed_model_functions.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from collections import defaultdict
  3 | from typing import Tuple
  4 | 
  5 | import mlflow
  6 | import numpy as np
  7 | import torch
  8 | from tqdm import tqdm
  9 | 
 10 | from paper_code_release.model.autoencoder import AE
 11 | from paper_code_release.paper_utils.losses_util import losses_list, compute_losses
 12 | from paper_code_release.paper_utils.variables_util import available_scale_levels, dataset_names, training_patches
 13 | 
 14 | 
 15 | def set_model_and_train_parameters_patches(
 16 |         device: torch.device,
 17 |         ml_flow_run_id: str,
 18 |         param: argparse.ArgumentParser,
 19 |         image_shape: Tuple[int, int],
 20 | ):
 21 |     batch_size = param.batch_size
 22 |     lr = param.learning_rate
 23 |     epochs = param.max_epochs
 24 |     max_patience = int(np.log2(epochs)) + 2
 25 |     patience_threshold = param.patience_thres
 26 |     workers = param.num_workers
 27 |     input_channel = param.input_channels
 28 |     bottleneck = param.bottleneck
 29 |     layer_1_ft = param.first_layer_size
 30 |     layer_2_ft = layer_1_ft * 2
 31 |     layer_3_ft = layer_1_ft * 2 * 2
 32 |     layer_4_ft = layer_1_ft * 2 * 2 * 2
 33 |     widths_ = [
 34 |         input_channel,
 35 |         layer_1_ft,
 36 |         layer_2_ft,
 37 |         layer_3_ft,
 38 |         layer_4_ft,
 39 |     ]
 40 |     if param.use_ml_flow:
 41 |         ml_flow_run_id = mlflow.active_run().info.run_id
 42 |         mlflow.log_param("image_shape", image_shape)
 43 |         mlflow.log_param("max_epochs", param.max_epochs)
 44 |         mlflow.log_param("batch_size", param.batch_size)
 45 |         mlflow.log_param("bottleneck", param.bottleneck)
 46 |         mlflow.log_param("num_workers", param.num_workers)
 47 |         mlflow.log_param("learning_rate", param.learning_rate)
 48 |         mlflow.log_param("first_layer_size", param.first_layer_size)
 49 |         mlflow.log_param("input_channels", param.input_channels)
 50 |         mlflow.log_param("optimized_loss", losses_list[param.id_optimized_loss])
 51 |         mlflow.log_param("device", device)
 52 |         mlflow.log_param("max_patience", max_patience)
 53 |         mlflow.log_param("patience_threshold", patience_threshold)
 54 |         mlflow.log_param("scale_level", available_scale_levels[param.scale_level])
 55 |         mlflow.log_param("dataset", dataset_names[param.dataset])
 56 |         mlflow.log_param("train_patches_number", training_patches[param.scale_level])
 57 |         mlflow.log_param("test_patches_number", param.test_patches_number)
 58 |     return batch_size, bottleneck, epochs, input_channel, layer_1_ft, lr, max_patience, ml_flow_run_id, widths_, workers
 59 | 
 60 | 
 61 | def train_loop_patches(
 62 |         model: AE,
 63 |         loader: torch.utils.data.DataLoader,
 64 |         optimizer,
 65 |         device: torch.device,
 66 |         id_loss: int,
 67 | ):
 68 |     running_mse = []
 69 |     running_mae = []
 70 |     model.train()
 71 |     clip_value = 10
 72 |     epoch_norm = 0
 73 |     for data in loader:
 74 |         # data has a shape of (batchsize//patchsize, patchsize, channels, height, width)
 75 |         # we change is to (batchsize,image)
 76 |         data_shape = data.shape
 77 |         data = data.view(data_shape[0] * data_shape[1], data_shape[2], data_shape[3], data_shape[4])
 78 |         input_data = data.to(device)
 79 | 
 80 |         optimizer.zero_grad()
 81 |         output_sample = model(input_data)
 82 |         losses = compute_losses(input_data, output_sample)
 83 |         loss = losses[id_loss]
 84 |         loss.backward()
 85 |         torch.nn.utils.clip_grad_norm_(model.parameters(), clip_value)
 86 |         total_norm = 0
 87 |         for p in model.parameters():
 88 |             param_norm = p.grad.detach().data.norm(2)
 89 |             total_norm += param_norm.item() ** 2
 90 |         epoch_norm = total_norm ** 0.5
 91 | 
 92 |         optimizer.step()
 93 | 
 94 |         running_mse.append(losses[0].item())
 95 |         running_mae.append(losses[1].item())
 96 | 
 97 |     epoch_mse = np.mean(running_mse)
 98 |     epoch_mae = np.mean(running_mae)
 99 | 
100 |     return epoch_mse, epoch_mae, epoch_norm
101 | 
102 | 
103 | def val_loop_patches(
104 |         model: AE,
105 |         loader: torch.utils.data.DataLoader,
106 |         device: torch.device,
107 | ):
108 |     running_mse = []
109 |     running_mae = []
110 | 
111 |     model.eval()
112 |     with torch.no_grad():
113 |         for data in loader:
114 |             # for data in tqdm(loader, total=len(loader)):
115 |             data_shape = data.shape
116 |             data = data.view(data_shape[0] * data_shape[1], data_shape[2], data_shape[3], data_shape[4])
117 |             input_data = data.to(device)
118 | 
119 |             output_sample = model(input_data)
120 |             losses = compute_losses(input_data, output_sample)
121 |             running_mse.append(losses[0].item())
122 |             running_mae.append(losses[1].item())
123 | 
124 |     epoch_mse = np.mean(running_mse)
125 |     epoch_mae = np.mean(running_mae)
126 | 
127 |     return epoch_mse, epoch_mae
128 | 
129 | 
130 | def epoch_loop_patches(
131 |         device: torch.device,
132 |         epochs: int,
133 |         model: AE,
134 |         optimizer,
135 |         param: argparse.ArgumentParser,
136 |         scheduler,
137 |         train_loader: torch.utils.data.DataLoader,
138 |         val_loader: torch.utils.data.DataLoader,
139 | ):
140 |     # Epoch Loop
141 |     etqdm = tqdm(range(epochs), total=epochs, postfix="Training")
142 |     metrics = defaultdict(list)
143 |     total_epoch_norms = 0
144 |     count_ep = 0
145 |     for epoch in etqdm:
146 |         count_ep += 1
147 |         train_loop_returns = train_loop_patches(
148 |             model,
149 |             train_loader,
150 |             optimizer,
151 |             device,
152 |             param.id_optimized_loss,
153 |         )
154 |         total_epoch_norms += train_loop_returns[2]
155 |         last_avg_norm = total_epoch_norms / count_ep
156 | 
157 |         if param.use_ml_flow:
158 |             mlflow.log_metric(f'train_{losses_list[0]}', train_loop_returns[0], epoch)
159 |             mlflow.log_metric(f'train_{losses_list[1]}', train_loop_returns[1], epoch)
160 |             mlflow.log_metric(f'train_epoch_norm', train_loop_returns[2], epoch)
161 |             mlflow.log_metric(f'last_avg_ep_norm_until_now', last_avg_norm, epoch)
162 | 
163 |         metrics[f'train_{losses_list[0]}'].append(train_loop_returns[0])
164 |         metrics[f'train_{losses_list[1]}'].append(train_loop_returns[1])
165 | 
166 |         val_losses = val_loop_patches(model, val_loader, device)
167 |         scheduler.step(val_losses[param.id_optimized_loss])
168 | 
169 |         if param.use_ml_flow:
170 |             mlflow.log_metric(f'val_{losses_list[0]}', val_losses[0], epoch)
171 |             mlflow.log_metric(f'val_{losses_list[1]}', val_losses[1], epoch)
172 |         metrics[f'val_{losses_list[0]}'].append(val_losses[0])
173 |         metrics[f'val_{losses_list[1]}'].append(val_losses[1])
174 | 
175 |         etqdm.set_postfix({f'Train {losses_list[param.id_optimized_loss]}': train_loop_returns[param.id_optimized_loss],
176 |                            f'Val {losses_list[param.id_optimized_loss]}': val_losses[param.id_optimized_loss]})
177 | 
178 |     return metrics
179 | 
180 | 
181 | def test_loop_df_rows(device: torch.device, model: AE, test_loader: torch.utils.data.DataLoader):
182 |     model.eval()
183 | 
184 |     df_dict = {'frame_id': [], 'patch_id': [], 'frame_label': [], 'mse_loss': [], 'mae_loss': []}
185 |     # DEBUG
186 |     # count = 0
187 |     with torch.no_grad():
188 |         for data in tqdm(test_loader, total=len(test_loader), postfix="Running Test Set inference"):
189 |             # data contains patches coming from a single frame so all with the same label
190 |             # we do the inference
191 |             inputs_ = data['patches'][0].to(device)
192 |             output_data = model(inputs_)
193 |             for i, (y_true, y_pred) in enumerate(zip(inputs_, output_data)):
194 |                 losses = compute_losses(y_true, y_pred)
195 |                 df_dict["frame_id"].append(data["frame_id"].item())
196 |                 df_dict["patch_id"].append(i)
197 |                 df_dict["frame_label"].append(data["label"].item())
198 |                 df_dict["mse_loss"].append(losses[0].item())
199 |                 df_dict["mae_loss"].append(losses[1].item())
200 |             # DEBUG
201 |             # if count == 25:
202 |             #     return df_dict
203 |             # count += 1
204 |     return df_dict
205 | 


--------------------------------------------------------------------------------
/code/Latest/BinClass/train_bin_class.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | from datetime import datetime
  4 | from typing import List
  5 | 
  6 | import pandas as pd
  7 | import torch
  8 | from rich.console import Console
  9 | from torch.optim.lr_scheduler import ReduceLROnPlateau
 10 | from torch.utils.data import Dataset
 11 | 
 12 | from models.binary_classifier.bin_class import BinaryClassifier
 13 | from models.binary_classifier.bin_class_model_functions import set_bin_class_parameters, epoch_loop_bin_class, \
 14 |     model_tester
 15 | from step_7_outlier_exposure.oe_rnvp_dataset import OEEmbeddingsDataset, OEEmbeddingsTestSet, OEEmbeddingsOutliersSet
 16 | from utils.artifacts_util import oe_bin_class_model_artifact_saver
 17 | from utils.check_create_folder import check_create_folder
 18 | from utils.metrics_util import compute_pr_aucs_single_loss, compute_roc_aucs_single_loss
 19 | from utils.variables_util import combined_labels_to_names
 20 | 
 21 | console = Console()
 22 | logging.basicConfig(level=logging.INFO)
 23 | 
 24 | 
 25 | def params_parser():
 26 |     parser = argparse.ArgumentParser()
 27 |     parser.add_argument("--batch_size",
 28 |                         "-b", type=int, default=2048)
 29 |     parser.add_argument("--num_workers",
 30 |                         '-w', type=int, default=4)
 31 |     parser.add_argument('--gpu_number',
 32 |                         '-g', type=int, default=0)
 33 |     parser.add_argument('--root_path',
 34 |                         '-r', type=str, default=".")
 35 |     param = parser.parse_args()
 36 |     return param
 37 | 
 38 | 
 39 | def main():
 40 |     """
 41 |     if param.gpu_number = -1 it uses the cpu
 42 |     Returns:
 43 |     """
 44 |     # MODEL INIT
 45 | 
 46 |     param = params_parser()
 47 | 
 48 |     console.log(f'Using the following params:{param}')
 49 |     time_string = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
 50 |     if param.gpu_number == -1:
 51 |         cuda_gpu = f"cpu"
 52 |     else:
 53 |         cuda_gpu = f"cuda:{param.gpu_number}"
 54 |     device = torch.device(cuda_gpu if torch.cuda.is_available() else "cpu")
 55 |     print(device)
 56 | 
 57 |     (
 58 |         batch_size,
 59 |         embedding_size,
 60 |         epochs,
 61 |         lr,
 62 |         workers,
 63 |         max_patience,
 64 |         outlier_batch_size,
 65 |     ) = set_bin_class_parameters(
 66 |         param
 67 |     )
 68 |     root_path = param.root_path
 69 | 
 70 |     test_set_path = f"{root_path}/data/embeddings/test_embs_dict.pk"
 71 |     train_set_path = f"{root_path}/data/embeddings/train_embs_dict.pk"
 72 |     validation_set_path = f"{root_path}/data/embeddings/val_embs_dict.pk"
 73 |     outliers_set_path = f"{root_path}/data/embeddings/oe_embs_dict.pk"
 74 |     model_key = f"binary_classifier_{time_string}"
 75 |     save_folder = f"{root_path}/data/bin_class/saves/{model_key}"
 76 |     model = BinaryClassifier()
 77 |     model.to(device)
 78 |     check_create_folder(save_folder)
 79 | 
 80 |     # DATA INIT
 81 |     train_set = OEEmbeddingsDataset(train_set_path)
 82 |     train_loader = torch.utils.data.DataLoader(
 83 |         train_set,
 84 |         batch_size=batch_size,
 85 |         shuffle=True,
 86 |         num_workers=workers,
 87 |         pin_memory=True,
 88 |         drop_last=True
 89 |     )
 90 |     outliers_set_required_size = outlier_batch_size * len(train_loader)
 91 |     outliers_set = OEEmbeddingsOutliersSet(
 92 |         file_path=outliers_set_path,
 93 |         required_dataset_size=outliers_set_required_size
 94 |     )
 95 | 
 96 |     outliers_loader = torch.utils.data.DataLoader(
 97 |         outliers_set,
 98 |         batch_size=outlier_batch_size,
 99 |         shuffle=True,
100 |         num_workers=workers,
101 |         pin_memory=True,
102 |         drop_last=True,
103 |     )
104 |     val_set = OEEmbeddingsDataset(validation_set_path)
105 |     val_loader = torch.utils.data.DataLoader(
106 |         val_set,
107 |         batch_size=100,
108 |         shuffle=False,
109 |         num_workers=workers,
110 |         pin_memory=True,
111 |         drop_last=True
112 |     )
113 |     test_set = OEEmbeddingsTestSet(
114 |         test_set_path,
115 |     )
116 |     test_loader = torch.utils.data.DataLoader(
117 |         test_set,
118 |         batch_size=1,
119 |         shuffle=False,
120 |         num_workers=workers,
121 |         pin_memory=True,
122 |         drop_last=True
123 |     )
124 |     # Optimizer
125 |     optimizer = torch.optim.Adam(model.parameters(), lr=lr)
126 |     scheduler = ReduceLROnPlateau(optimizer, 'min', patience=max_patience, verbose=True)
127 | 
128 |     # TRAIN
129 |     metrics, best_model_path = epoch_loop_bin_class(
130 |         device,
131 |         epochs,
132 |         model,
133 |         optimizer,
134 |         scheduler,
135 |         param,
136 |         train_loader,
137 |         outliers_loader,
138 |         val_loader,
139 |         save_folder,
140 |         model_key,
141 |     )
142 | 
143 |     # TEST
144 |     console.log("Training Completed, Testing Started")
145 | 
146 |     df_dict = model_tester(best_model_path, model, device, test_loader)
147 | 
148 |     # df_dict = test_loop_df_rows(device, model, test_loader)
149 |     test_set_df = pd.DataFrame.from_dict(df_dict)
150 |     test_set_df["label"] = pd.to_numeric(test_set_df["label"])
151 | 
152 |     # Compute AUC for ml flow logging
153 | 
154 |     rnvp_auc_computation_and_logging(test_set_df, param)
155 |     list_of_labels_in_test_set = list(set(test_set.labels))
156 | 
157 |     # csv row building
158 |     metrics_dict = {}
159 |     for k in list_of_labels_in_test_set:
160 |         v = combined_labels_to_names[k]
161 |         class_metrics_dict = per_label_rnvp_metrics(test_set_df, k)
162 |         if k == 0:
163 |             v = "all"
164 |             metrics_dict[f"{v}_ok_mean_loss"] = class_metrics_dict["ok_mean_loss"]
165 |         metrics_dict[f"{v}_an_mean_loss"] = class_metrics_dict["an_mean_loss"]
166 |         metrics_dict[f"{v}_roc_auc"] = class_metrics_dict["roc_auc"]
167 |         metrics_dict[f"{v}_pr_auc"] = class_metrics_dict["pr_auc"]
168 | 
169 |     csv_row = {
170 |         **{"model_key": model_key,
171 |            "embedding_size": embedding_size,
172 |            },
173 |         **metrics_dict,
174 |     }
175 | 
176 |     # SAVE STUFF
177 |     console.log("Testing Completed")
178 |     console.log("Creating and saving Artifacts")
179 |     artifacts_path = save_folder + "/artifacts/"
180 |     oe_bin_class_model_artifact_saver(
181 |         batch_size,
182 |         embedding_size,
183 |         epochs,
184 |         lr,
185 |         metrics,
186 |         test_set_df,
187 |         model,
188 |         artifacts_path,
189 |         param=param,
190 |         csv_row=csv_row,
191 |         csv_key=model_key,
192 |         best_model_path=best_model_path,
193 |     )
194 | 
195 |     console.log(f"Script completed, artifacts located at {save_folder}.")
196 | 
197 | 
198 | def avg_l2_norms(test_df):
199 |     ok_l2 = test_df.loc[test_df.label == 0].l2_norm_of_z.values.mean()
200 |     an_l2 = test_df.loc[test_df.label != 0].l2_norm_of_z.values.mean()
201 |     return an_l2, ok_l2
202 | 
203 | 
204 | def rnvp_auc_computation_and_logging(test_df, param):
205 |     labels = [0 if el == 0 else 1 for el in test_df["label"].values]
206 |     metrics_dict = compute_oe_rnvp_model_metrics(
207 |         labels,
208 |         test_df
209 |     )
210 |     print(f'test_set_ok_mean_loss = {metrics_dict["ok_mean_loss"]}\n'
211 |               f'test_set_an_mean_loss = {metrics_dict["an_mean_loss"]}\n'
212 |               f'test_set_roc_auc = {metrics_dict["roc_auc"]}\n'
213 |               f'test_set_pr_auc = {metrics_dict["pr_auc"]}\n'
214 |               )
215 | 
216 | 
217 | def per_label_rnvp_metrics(df, label_key):
218 |     if label_key == 0:
219 |         label_unique_values = [0 if el == 0 else 1 for el in df["label"].values]
220 |         return_dict = compute_oe_rnvp_model_metrics(
221 |             label_unique_values,
222 |             df,
223 |         )
224 |     else:
225 | 
226 |         df_anomaly = df[df.label.isin([0, label_key])]
227 |         label_unique_values = [0 if el == 0 else 1 for el in df_anomaly["label"].values]
228 |         return_dict = compute_oe_rnvp_model_metrics(
229 |             label_unique_values,
230 |             df_anomaly,
231 |         )
232 |     return return_dict
233 | 
234 | 
235 | def compute_oe_rnvp_model_metrics(
236 |         labels: List[int],
237 |         df_losses: pd.DataFrame,
238 | ):
239 |     y_true = labels
240 |     losses = df_losses["z"].values
241 |     pr_auc = compute_pr_aucs_single_loss(y_true, losses)
242 |     roc_auc = compute_roc_aucs_single_loss(y_true, losses)
243 |     an_mean_loss = df_losses[df_losses["label"] != 0]["loss"].values.mean()
244 |     ok_mean_loss = df_losses[df_losses["label"] == 0]["loss"].values.mean()
245 |     # composing return dict
246 |     return_dict = {
247 |         "an_mean_loss": an_mean_loss,
248 |         "ok_mean_loss": ok_mean_loss,
249 |         "roc_auc": roc_auc,
250 |         "pr_auc": pr_auc,
251 |     }
252 |     return return_dict
253 | 
254 | 
255 | if __name__ == "__main__":
256 |     main()
257 | 


--------------------------------------------------------------------------------
/code/Latest/models/real_nvp/real_nvp.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | 
  3 | import torch
  4 | from torch import nn, distributions
  5 | 
  6 | 
  7 | class LinearBatchNorm(nn.Module):
  8 |     """
  9 |     An (invertible) batch normalization layer.
 10 |     This class is mostly inspired from this one:
 11 |     https://github.com/kamenbliznashki/normalizing_flows/blob/master/maf.py
 12 |     """
 13 | 
 14 |     def __init__(self, input_size, momentum=0.9, eps=1e-5):
 15 |         super().__init__()
 16 |         self.momentum = momentum
 17 |         self.eps = eps
 18 | 
 19 |         self.log_gamma = nn.Parameter(torch.zeros(input_size))
 20 |         self.beta = nn.Parameter(torch.zeros(input_size))
 21 | 
 22 |         self.register_buffer('running_mean', torch.zeros(input_size))
 23 |         self.register_buffer('running_var', torch.ones(input_size))
 24 | 
 25 |     def forward(self, x, **kwargs):
 26 |         if self.training:
 27 |             self.batch_mean = x.mean(0)
 28 |             self.batch_var = x.var(0)
 29 | 
 30 |             self.running_mean.mul_(self.momentum).add_(self.batch_mean.data * (1 - self.momentum))
 31 |             self.running_var.mul_(self.momentum).add_(self.batch_var.data * (1 - self.momentum))
 32 | 
 33 |             mean = self.batch_mean
 34 |             var = self.batch_var
 35 |         else:
 36 |             mean = self.running_mean
 37 |             var = self.running_var
 38 | 
 39 |         x_hat = (x - mean) / torch.sqrt(var + self.eps)
 40 |         y = self.log_gamma.exp() * x_hat + self.beta
 41 | 
 42 |         log_det = self.log_gamma - 0.5 * torch.log(var + self.eps)
 43 | 
 44 |         return y, log_det.expand_as(x).sum(1)
 45 | 
 46 |     def backward(self, x, **kwargs):
 47 |         if self.training:
 48 |             mean = self.batch_mean
 49 |             var = self.batch_var
 50 |         else:
 51 |             mean = self.running_mean
 52 |             var = self.running_var
 53 | 
 54 |         x_hat = (x - self.beta) * torch.exp(-self.log_gamma)
 55 |         x = x_hat * torch.sqrt(var + self.eps) + mean
 56 | 
 57 |         log_det = 0.5 * torch.log(var + self.eps) - self.log_gamma
 58 | 
 59 |         return x, log_det.expand_as(x).sum(1)
 60 | 
 61 | 
 62 | class LinearCouplingLayer(nn.Module):
 63 |     """
 64 |     Linear coupling layer.
 65 |         (i) Split the input x into 2 parts x1 and x2 according to a given mask.
 66 |         (ii) Compute s(x2) and t(x2) with given neural network.
 67 |         (iii) Final output is [exp(s(x2))*x1 + t(x2); x2].
 68 |     The inverse is trivially [(x1 - t(x2))*exp(-s(x2)); x2].
 69 |     """
 70 | 
 71 |     def __init__(self, input_dim, mask, network_topology, conditioning_size=None, single_function=True):
 72 |         super().__init__()
 73 | 
 74 |         if conditioning_size is None:
 75 |             conditioning_size = 0
 76 | 
 77 |         if network_topology is None or len(network_topology) == 0:
 78 |             network_topology = [input_dim]
 79 | 
 80 |         self.register_buffer('mask', mask)
 81 | 
 82 |         self.dim = input_dim
 83 | 
 84 |         self.s = [nn.Linear(input_dim + conditioning_size, network_topology[0]), nn.ReLU()]
 85 | 
 86 |         for i in range(len(network_topology)):
 87 |             t = network_topology[i]
 88 |             t_p = network_topology[i - 1]
 89 |             self.s.extend([nn.Linear(t_p, t), nn.ReLU()])
 90 | 
 91 |         if single_function:
 92 |             input_dim = input_dim * 2
 93 | 
 94 |         ll = nn.Linear(network_topology[-1], input_dim)
 95 | 
 96 |         self.s.append(ll)
 97 |         self.s = nn.Sequential(*self.s)
 98 | 
 99 |         if single_function:
100 |             self.st = lambda x: (self.s(x).chunk(2, 1))
101 |         else:
102 |             self.t = copy.deepcopy(self.s)
103 |             self.st = lambda x: (self.s(x), self.t(x))
104 | 
105 |     def backward(self, x, y=None):
106 |         mx = x * self.mask
107 | 
108 |         if y is not None:
109 |             _mx = torch.cat([y, mx], dim=1)
110 |         else:
111 |             _mx = mx
112 | 
113 |         s, t = self.st(_mx)
114 |         s = torch.tanh(s)
115 | 
116 |         u = mx + (1 - self.mask) * (x - t) * torch.exp(-s)
117 | 
118 |         log_abs_det_jacobian = - (1 - self.mask) * s
119 | 
120 |         return u, log_abs_det_jacobian.sum(1)
121 | 
122 |     def forward(self, u, y=None):
123 |         mu = u * self.mask
124 | 
125 |         if y is not None:
126 |             _mu = torch.cat([y, mu], dim=1)
127 |         else:
128 |             _mu = mu
129 | 
130 |         s, t = self.st(_mu)
131 |         s = torch.tanh(s)
132 | 
133 |         x = mu + (1 - self.mask) * (u * s.exp() + t)
134 | 
135 |         log_abs_det_jacobian = (1 - self.mask) * s
136 | 
137 |         return x, log_abs_det_jacobian.sum(1)
138 | 
139 | 
140 | class Permutation(nn.Module):
141 |     """
142 |     A permutation layer.
143 |     """
144 | 
145 |     def __init__(self, in_ch):
146 |         super().__init__()
147 |         self.in_ch = in_ch
148 |         self.register_buffer('p', torch.randperm(in_ch))
149 |         self.register_buffer('invp', torch.argsort(self.p))
150 | 
151 |     def forward(self, x, y=None):
152 |         assert x.shape[1] == self.in_ch
153 |         out = x[:, self.p]
154 |         return out, 0
155 | 
156 |     def backward(self, x, y=None):
157 |         assert x.shape[1] == self.in_ch
158 |         out = x[:, self.invp]
159 |         return out, 0
160 | 
161 | 
162 | class SequentialFlow(nn.Sequential):
163 |     """
164 |     Utility class to build a normalizing flow from a sequence of base transformations.
165 |     During forward and inverse steps, aggregates the sum of the log determinants of the Jacobians.
166 |     """
167 | 
168 |     def forward(self, x, y=None):
169 |         log_det = 0
170 |         for module in self:
171 |             x, _log_det = module(x, y=y)
172 |             log_det = log_det + _log_det
173 |         return x, log_det
174 | 
175 |     def backward(self, u, y=None):
176 |         log_det = 0
177 |         for module in reversed(self):
178 |             u, _log_det = module.backward(u, y=y)
179 |             log_det = log_det + _log_det
180 |         return u, log_det
181 | 
182 |     def forward_steps(self, x, y=None):
183 |         log_det = 0
184 |         xs = [x]
185 |         for module in self:
186 |             x, _log_det = module(x, y=y)
187 |             xs.append(x)
188 |             log_det = log_det + _log_det
189 |         return xs, log_det
190 | 
191 |     def backward_steps(self, u, y=None):
192 |         log_det = 0
193 |         us = [u]
194 |         for module in reversed(self):
195 |             u, _log_det = module.backward(u, y=y)
196 |             us.append(u)
197 |             log_det = log_det + _log_det
198 |         return us, log_det
199 | 
200 | 
201 | class LinearRNVP(nn.Module):
202 |     """
203 |     Main RNVP model, alternating affine coupling layers
204 |     with permutations and/or batch normalization steps.
205 |     """
206 | 
207 |     def __init__(self,
208 |                  input_dim,
209 |                  coupling_topology,
210 |                  flow_n=2,
211 |                  use_permutation=False,
212 |                  batch_norm=False,
213 |                  mask_type='odds',
214 |                  conditioning_size=None,
215 |                  single_function=False,
216 |                  ):
217 |         super().__init__()
218 |         # buffers are saved in state dict
219 |         # https://discuss.pytorch.org/t/what-is-the-difference-between-register-buffer-and-register-parameter-of-nn-module/32723/2
220 |         self.register_buffer('prior_mean',
221 |                              torch.zeros(input_dim))
222 |         self.register_buffer('prior_var', torch.eye(input_dim))
223 | 
224 |         if mask_type == 'odds':
225 |             mask = torch.arange(0, input_dim).float() % 2
226 |         elif mask_type == 'half':
227 |             mask = torch.zeros(input_dim)
228 |             mask[:input_dim // 2] = 1
229 |         else:
230 |             assert False, "Mask type indicated is not accepted"
231 | 
232 |         # this is wrong wrt RNVP paper. the hidden layers should always have a higher or equal size wrt the input dim
233 |         if coupling_topology is None:
234 |             coupling_topology = [input_dim // 2, input_dim // 2]
235 | 
236 |         blocks = []
237 | 
238 |         for i in range(flow_n):
239 |             blocks.append(
240 |                 LinearCouplingLayer(input_dim,
241 |                                     mask,
242 |                                     network_topology=coupling_topology,
243 |                                     conditioning_size=conditioning_size,
244 |                                     single_function=single_function)
245 |             )
246 | 
247 |             if use_permutation:
248 |                 blocks.append(Permutation(input_dim))
249 |             else:
250 |                 mask = 1 - mask
251 | 
252 |             if batch_norm:
253 |                 blocks.append(LinearBatchNorm(input_dim))
254 | 
255 |         self.flows = SequentialFlow(*blocks)
256 | 
257 |     def log_prob(self, x):
258 |         return self.prior.log_prob(x)
259 | 
260 |     @property
261 |     def prior(self):
262 |         # return distributions.Normal(self.prior_mean, self.prior_var)
263 |         return distributions.MultivariateNormal(self.prior_mean, self.prior_var)
264 | 
265 |     def forward(self, x, y=None, return_step=False):
266 |         if return_step:
267 |             return self.flows.forward_steps(x, y)
268 |         return self.flows.forward(x, y)
269 | 
270 |     def backward(self, u, y=None, return_step=False):
271 |         if return_step:
272 |             return self.flows.backward_steps(u, y)
273 |         return self.flows.backward(u, y)
274 | 
275 |     def sample(self, samples=1, y=None, return_step=False, return_logdet=False):
276 |         u = self.prior.sample((samples,))
277 |         z, d = self.backward(u, y=y, return_step=return_step)
278 |         if return_logdet:
279 |             d = self.logprob(u).sum(1) + d
280 |             return z, d
281 |         return z
282 | 


--------------------------------------------------------------------------------
/code/Latest/utils/artifacts_util.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import pickle
  3 | import shutil
  4 | from typing import DefaultDict, List, Tuple
  5 | 
  6 | import pandas as pd
  7 | import torch
  8 | from torchinfo import summary
  9 | 
 10 | from check_create_folder import check_create_folder
 11 | from models.autoencoder.autoencoder import AE
 12 | 
 13 | 
 14 | 
 15 | def oe_bin_class_model_artifact_saver(
 16 |         batch_size: int,
 17 |         embedding_size: int,
 18 |         epochs: int,
 19 |         lr: float,
 20 |         metrics: DefaultDict[str, float],
 21 |         test_set_df: pd.DataFrame,
 22 |         model: AE,
 23 |         artifacts_save_folder: str,
 24 |         param: argparse.ArgumentParser,
 25 |         csv_row: DefaultDict,
 26 |         csv_key: str,
 27 |         best_model_path: str,
 28 | ):
 29 |     check_create_folder(artifacts_save_folder)
 30 |     with open(artifacts_save_folder + 'train_info.txt', 'w') as txf:
 31 |         txf.write(f"model type: ae\n"
 32 |                       f"lr={lr}\n"
 33 |                       f"batch_size={batch_size}\n"
 34 |                       f"epochs={epochs}\n"
 35 |                       f"embedding_size={embedding_size}\n"
 36 |                       f"split_date = {param.split_date}")
 37 |     model.eval()
 38 |     summ = summary(model, input_size=(1, embedding_size), device="cpu", depth=4,
 39 |                    col_names=["input_size", "output_size", "kernel_size", "num_params"])
 40 | 
 41 |     with open(artifacts_save_folder + 'params.pk', 'wb') as fp:
 42 |         pickle.dump(param, fp)
 43 | 
 44 |     with open(artifacts_save_folder + 'metrics.pk', 'wb') as fp:
 45 |         pickle.dump(metrics, fp)
 46 | 
 47 |     with open(artifacts_save_folder + 'model_summary.txt', 'w') as txf:
 48 |         txf.write(f"{summ}")
 49 | 
 50 |     pd.DataFrame(csv_row, index=[0]).to_csv(artifacts_save_folder + f'{csv_key}.csv') # in the other code is from dict
 51 | 
 52 |     with open(artifacts_save_folder + f'{csv_key}.pk', "wb") as fp:
 53 |         pickle.dump(csv_row, fp)
 54 | 
 55 |     shutil.copy2(best_model_path, artifacts_save_folder)
 56 |     test_set_df.to_csv(artifacts_save_folder + f'{csv_key}_test_set_df.csv')
 57 |     print(f"artifacts saved at {artifacts_save_folder}")
 58 | 
 59 | 
 60 | def oe_rnvp_model_artifact_saver(
 61 |         batch_size: int,
 62 |         embedding_size: int,
 63 |         epochs: int,
 64 |         coupling_topology: List[int],
 65 |         lr: float,
 66 |         metrics: DefaultDict[str, float],
 67 |         test_set_df: pd.DataFrame,
 68 |         model: AE,
 69 |         artifacts_save_folder: str,
 70 |         param: argparse.ArgumentParser,
 71 |         csv_row: DefaultDict,
 72 |         csv_key: str,
 73 |         best_model_path: str,
 74 | ):
 75 |     check_create_folder(artifacts_save_folder)
 76 |     with open(artifacts_save_folder + 'train_info.txt', 'w') as txf:
 77 |         txf.write(f"model type: ae\n"
 78 |                   f"lr={lr}\n"
 79 |                   f"batch_size={batch_size}\n"
 80 |                   f"epochs={epochs}\n"
 81 |                   f"embedding_size={embedding_size}\n"
 82 |                   f"coupling_topology={coupling_topology}\n"
 83 |                   f"split_date = {param.split_date}")
 84 | 
 85 |     model.eval()
 86 |     summ = summary(model, input_size=(1, embedding_size), device="cpu", depth=4,
 87 |                    col_names=["input_size", "output_size", "kernel_size", "num_params"])
 88 | 
 89 |     with open(artifacts_save_folder + 'params.pk', 'wb') as fp:
 90 |         pickle.dump(param, fp)
 91 | 
 92 |     with open(artifacts_save_folder + 'metrics.pk', 'wb') as fp:
 93 |         pickle.dump(metrics, fp)
 94 | 
 95 |     with open(artifacts_save_folder + 'model_summary.txt', 'w') as txf:
 96 |         txf.write(f"{summ}")
 97 | 
 98 |     pd.DataFrame.from_dict(csv_row).to_csv(artifacts_save_folder + f'{csv_key}.csv')
 99 | 
100 |     with open(artifacts_save_folder + f'{csv_key}.pk', "wb") as fp:
101 |         pickle.dump(csv_row, fp)
102 | 
103 |     shutil.copy2(best_model_path, artifacts_save_folder)
104 |     test_set_df.to_csv(artifacts_save_folder + f'{csv_key}_test_set_df.csv')
105 |     test_set_df.to_pickle(artifacts_save_folder + f'{csv_key}_test_set_df.pk')
106 |     print(f"artifacts saved at {artifacts_save_folder}")
107 | 
108 | 
109 | # USED
110 | def small_ae_pretrain_model_artifact_saver(
111 |         artifacts_save_folder: str,
112 |         train_embs_dict,
113 |         oe_embs_dict,
114 |         val_embs_dict,
115 |         test_embs_dict,
116 | 
117 | ):
118 |     check_create_folder(artifacts_save_folder)
119 |     with open(artifacts_save_folder + f'train_embs_dict.pk', "wb") as fp:
120 |         pickle.dump(train_embs_dict, fp)
121 |     with open(artifacts_save_folder + f'oe_embs_dict.pk', "wb") as fp:
122 |         pickle.dump(oe_embs_dict, fp)
123 |     with open(artifacts_save_folder + f'val_embs_dict.pk', "wb") as fp:
124 |         pickle.dump(val_embs_dict, fp)
125 |     with open(artifacts_save_folder + f'test_embs_dict.pk', "wb") as fp:
126 |         pickle.dump(test_embs_dict, fp)
127 | 
128 |     print(f"artifacts saved at {artifacts_save_folder}")
129 | 
130 | 
131 | # USED
132 | def ae_pretrain_model_artifact_saver(
133 |         batch_size: int,
134 |         bottleneck: int,
135 |         epochs: int,
136 |         layer_1_ft: int,
137 |         lr: float,
138 |         metrics: DefaultDict[str, float],
139 |         model: AE,
140 |         image_channels: int,
141 |         image_size: Tuple[int, int],
142 |         artifacts_save_folder: str,
143 |         param: argparse.ArgumentParser,
144 |         bfs_key: str,
145 |         train_sample_batch=None,
146 |         test_sample_batch=None,
147 | ):
148 |     check_create_folder(artifacts_save_folder)
149 |     torch.save(model.state_dict(), artifacts_save_folder + f'{bfs_key}_last.pth')
150 | 
151 |     with open(artifacts_save_folder + 'train_info.txt', 'w') as txf:
152 |         txf.write(f"model type: ae\n"
153 |                   f"lr={lr}\n"
154 |                   f"batch_size={batch_size}\n"
155 |                   f"epochs={epochs}\n"
156 |                   f"bottleneck={bottleneck}\n"
157 |                   f"first_layer_channels={layer_1_ft}\n"
158 |                   f"data_channels={param.input_channels}")
159 | 
160 |     model.eval()
161 |     summ = summary(model, input_size=(1, image_channels, image_size[0], image_size[1]), device="cpu", depth=4,
162 |                    col_names=["output_size", "kernel_size", "num_params"])
163 | 
164 |     with open(artifacts_save_folder + 'params.pk', 'wb') as fp:
165 |         pickle.dump(param, fp)
166 | 
167 |     with open(artifacts_save_folder + 'metrics.pk', 'wb') as fp:
168 |         pickle.dump(metrics, fp)
169 | 
170 |     with open(artifacts_save_folder + 'model_summary.txt', 'w') as txf:
171 |         txf.write(f"{summ}")
172 | 
173 |     with open(artifacts_save_folder + 'train_sample_batch.pk', "wb") as fp:
174 |         pickle.dump(train_sample_batch, fp)
175 |     with open(artifacts_save_folder + 'test_sample_batch.pk', "wb") as fp:
176 |         pickle.dump(test_sample_batch, fp)
177 | 
178 |     print(f"artifacts saved at {artifacts_save_folder}")
179 | 
180 | 
181 | # USED
182 | def ae_embedding_saver(
183 |         batch_size: int,
184 |         bottleneck: int,
185 |         epochs: int,
186 |         layer_1_ft: int,
187 |         lr: float,
188 |         metrics: DefaultDict[str, float],
189 |         losses_list: List[str],
190 |         model: AE,
191 |         image_channels: int,
192 |         image_size: Tuple[int, int],
193 |         artifacts_save_folder: str,
194 |         ml_flow_run_id: str,
195 |         id_optimized_loss: int,
196 |         param: argparse.ArgumentParser,
197 |         train_embs_dict,
198 |         oe_embs_dict,
199 |         val_embs_dict,
200 |         test_embs_dict,
201 |         bfs_key: str,
202 |         train_sample_batch=None,
203 |         test_sample_batch=None,
204 | ):
205 |     check_create_folder(artifacts_save_folder)
206 |     torch.save(model.state_dict(), artifacts_save_folder + f'{bfs_key}_last.pth')
207 | 
208 |     with open(artifacts_save_folder + 'train_info.txt', 'w') as txf:
209 |         if ml_flow_run_id is not None:
210 |             txf.write(f"model type: ae\n"
211 |                       f"lr={lr}\n"
212 |                       f"batch_size={batch_size}\n"
213 |                       f"epochs={epochs}\n"
214 |                       f"bottleneck={bottleneck}\n"
215 |                       f"first_layer_channels={layer_1_ft}\n"
216 |                       f"loss optimized={losses_list[id_optimized_loss]}\n"
217 |                       f"mlflow_run_id={ml_flow_run_id}\n"
218 |                       f"data_channels={param.input_channels}")
219 |         else:
220 |             txf.write(f"model type: ae\n"
221 |                       f"lr={lr}\n"
222 |                       f"batch_size={batch_size}\n"
223 |                       f"epochs={epochs}\n"
224 |                       f"bottleneck={bottleneck}\n"
225 |                       f"first_layer_channels={layer_1_ft}\n"
226 |                       f"loss optimized={losses_list[id_optimized_loss]}\n"
227 |                       f"data_channels={param.input_channels}")
228 | 
229 |     model.eval()
230 |     summ = summary(model, input_size=(1, image_channels, image_size[0], image_size[1]), device="cpu", depth=4,
231 |                    col_names=["output_size", "kernel_size", "num_params"])
232 | 
233 |     with open(artifacts_save_folder + 'params.pk', 'wb') as fp:
234 |         pickle.dump(param, fp)
235 | 
236 |     with open(artifacts_save_folder + 'metrics.pk', 'wb') as fp:
237 |         pickle.dump(metrics, fp)
238 | 
239 |     with open(artifacts_save_folder + 'model_summary.txt', 'w') as txf:
240 |         txf.write(f"{summ}")
241 | 
242 |     with open(artifacts_save_folder + 'train_sample_batch.pk', "wb") as fp:
243 |         pickle.dump(train_sample_batch, fp)
244 |     with open(artifacts_save_folder + 'test_sample_batch.pk', "wb") as fp:
245 |         pickle.dump(test_sample_batch, fp)
246 | 
247 |     with open(artifacts_save_folder + f'{bfs_key}_train_embs_dict.pk', "wb") as fp:
248 |         pickle.dump(train_embs_dict, fp)
249 |     with open(artifacts_save_folder + f'{bfs_key}_oe_embs_dict.pk', "wb") as fp:
250 |         pickle.dump(oe_embs_dict, fp)
251 |     with open(artifacts_save_folder + f'{bfs_key}_val_embs_dict.pk', "wb") as fp:
252 |         pickle.dump(val_embs_dict, fp)
253 |     with open(artifacts_save_folder + f'{bfs_key}_test_embs_dict.pk', "wb") as fp:
254 |         pickle.dump(test_embs_dict, fp)
255 | 
256 |     print(f"artifacts saved at {artifacts_save_folder}")
257 | 


--------------------------------------------------------------------------------
/code/Latest/models/autoencoder/autoencoder_model_functions.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from collections import defaultdict
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | from tqdm import tqdm
  7 | 
  8 | from utils.losses_util import losses_list, compute_losses
  9 | from utils.metrics_util import compute_pr_aucs, compute_roc_aucs
 10 | from models.autoencoder.autoencoder import AE
 11 | 
 12 | 
 13 | # USED
 14 | def set_model_and_train_parameters_autoencoder(
 15 |         param: argparse.ArgumentParser,
 16 | ):
 17 |     batch_size = param.batch_size
 18 |     lr = 0.001
 19 |     epochs = 100
 20 |     max_patience = int(np.log2(epochs)) + 2
 21 |     workers = param.num_workers
 22 |     input_channel = param.input_channels
 23 |     bottleneck = param.bottleneck
 24 |     layer_1_ft = param.first_layer_size
 25 |     layer_2_ft = layer_1_ft * 2
 26 |     layer_3_ft = layer_1_ft * 2 * 2
 27 |     layer_4_ft = layer_1_ft * 2 * 2 * 2
 28 |     widths_ = [
 29 |         input_channel,
 30 |         layer_1_ft,
 31 |         layer_2_ft,
 32 |         layer_3_ft,
 33 |         layer_4_ft,
 34 |     ]
 35 |     return batch_size, bottleneck, epochs, input_channel, layer_1_ft, lr, max_patience, widths_, workers
 36 | 
 37 | 
 38 | # USED
 39 | def set_model_parameters_for_embedding_creation(
 40 |         param: argparse.ArgumentParser,
 41 | ):
 42 |     workers = param.num_workers
 43 |     input_channel = param.input_channels
 44 |     bottleneck = 128
 45 |     layer_1_ft = param.first_layer_size
 46 |     layer_2_ft = layer_1_ft * 2
 47 |     layer_3_ft = layer_1_ft * 2 * 2
 48 |     layer_4_ft = layer_1_ft * 2 * 2 * 2
 49 |     widths_ = [
 50 |         input_channel,
 51 |         layer_1_ft,
 52 |         layer_2_ft,
 53 |         layer_3_ft,
 54 |         layer_4_ft,
 55 |     ]
 56 |     return bottleneck, input_channel, layer_1_ft, widths_, workers
 57 | 
 58 | 
 59 | # USED
 60 | def train_loop_oe(
 61 |         model: AE,
 62 |         loader: torch.utils.data.DataLoader,
 63 |         optimizer,
 64 |         device: torch.device,
 65 |         id_loss: int,
 66 | ):
 67 |     running_mse = []
 68 |     running_mae = []
 69 |     model.train()
 70 |     clip_value = 10
 71 |     epoch_norm = 0
 72 |     # DEBUG
 73 |     # counter = 0
 74 |     for data in loader:
 75 |         images = data
 76 |         inputs = images.to(device)
 77 |         optimizer.zero_grad()
 78 |         outputs = model(inputs)
 79 |         losses = compute_losses(inputs, outputs)
 80 |         loss = losses[id_loss]
 81 |         loss.backward()
 82 |         torch.nn.utils.clip_grad_norm_(model.parameters(), clip_value)
 83 |         total_norm = 0
 84 |         for p in model.parameters():  # https://discuss.pytorch.org/t/check-the-norm-of-gradients/27961
 85 |             param_norm = p.grad.detach().data.norm(2)
 86 |             total_norm += param_norm.item() ** 2
 87 |         epoch_norm = total_norm ** 0.5
 88 | 
 89 |         optimizer.step()
 90 | 
 91 |         running_mse.append(losses[0].item())
 92 |         running_mae.append(losses[1].item())
 93 |         # DEBUG
 94 | 
 95 |         # if counter == 2:
 96 |         #     break
 97 |         # counter += 1
 98 | 
 99 |     epoch_mse = np.mean(running_mse)
100 |     epoch_mae = np.mean(running_mae)
101 | 
102 |     return epoch_mse, epoch_mae, epoch_norm
103 | 
104 | 
105 | # USED
106 | def val_loop_oe(
107 |         model: AE,
108 |         loader: torch.utils.data.DataLoader,
109 |         device: torch.device,
110 | ):
111 |     running_mse = []
112 |     running_mae = []
113 |     model.eval()
114 |     # DEBUG
115 |     # counter = 0
116 |     with torch.no_grad():
117 |         for data in loader:
118 |             images = data
119 |             inputs = images.to(device)
120 |             outputs = model(inputs)
121 |             losses = compute_losses(inputs, outputs)
122 |             running_mse.append(losses[0].item())
123 |             running_mae.append(losses[1].item())
124 |             # DEBUG
125 |             # if counter == 2:
126 |             #     break
127 |             # counter += 1
128 |     epoch_mse = np.mean(running_mse)
129 |     epoch_mae = np.mean(running_mae)
130 |     return epoch_mse, epoch_mae
131 | 
132 | 
133 | # USED
134 | def epoch_loop_ae_oe(
135 |         device: torch.device,
136 |         epochs: int,
137 |         model: AE,
138 |         optimizer,
139 |         scheduler,
140 |         train_loader: torch.utils.data.DataLoader,
141 |         val_loader: torch.utils.data.DataLoader,
142 | ):
143 |     # Epoch Loop
144 |     id_loss = 0
145 |     etqdm = tqdm(range(epochs), total=epochs, postfix="Training")
146 |     metrics = defaultdict(list)
147 |     total_epoch_norms = 0
148 |     count_ep = 0
149 |     for _ in etqdm:
150 |         count_ep += 1
151 |         train_loop_returns = train_loop_oe(  # train_loop_returns are [epoch_mse, epoch_mae, epoch_norm]
152 |             model,
153 |             train_loader,
154 |             optimizer,
155 |             device,
156 |             id_loss,
157 |         )
158 |         total_epoch_norms += train_loop_returns[2]
159 |         metrics[f'train_{losses_list[0]}'].append(train_loop_returns[0])
160 |         metrics[f'train_{losses_list[1]}'].append(train_loop_returns[1])
161 | 
162 |         val_losses = val_loop_oe(model, val_loader, device)  # val_losses are [epoch_mse, epoch_mae]
163 |         scheduler.step(val_losses[id_loss])
164 | 
165 |         metrics[f'val_{losses_list[0]}'].append(val_losses[0])
166 |         metrics[f'val_{losses_list[1]}'].append(val_losses[1])
167 | 
168 |         etqdm.set_postfix({f'Train {losses_list[id_loss]}': train_loop_returns[id_loss],
169 |                            f'Val {losses_list[id_loss]}': val_losses[id_loss]})
170 | 
171 |     return metrics
172 | 
173 | 
174 | # USED
175 | def test_loop_oe(device: torch.device, model: AE, test_loader: torch.utils.data.DataLoader):
176 |     model.eval()
177 |     test_losses = {'mse': [], 'mae': []}
178 |     test_labels = []
179 |     # DEBUG
180 |     # count = 0
181 |     with torch.no_grad():
182 |         for data in tqdm(test_loader, total=len(test_loader), postfix="Running Test Set inference"):
183 |             images = data[0]
184 |             labels = data[1]
185 |             inputs = images.to(device)
186 |             outputs = model(inputs)
187 |             losses = compute_losses(inputs, outputs)
188 |             test_losses["mse"].append(losses[0].item())
189 |             test_losses["mae"].append(losses[1].item())
190 |             test_labels.append(labels[0])
191 |             # DEBUG
192 |             # if count == 25:
193 |             #     return test_losses, test_labels
194 |             # count += 1
195 | 
196 |     return test_losses, test_labels
197 | 
198 | 
199 | # USED
200 | def embedding_production(
201 |         device,
202 |         model,
203 |         train_loader,
204 |         oe_loader,
205 |         test_loader,
206 |         val_loader,
207 | ):
208 |     model.eval()
209 |     with torch.no_grad():
210 |         # NORMAL Train_set
211 |         train_set_frame_paths = []
212 |         train_set_embeddings = []
213 |         # debug
214 |         # cts = 0
215 |         for data in tqdm(train_loader, total=len(train_loader), postfix="Producing Train Set embeddings"):
216 |             images = data[0]
217 |             inputs = images.to(device)
218 |             embeddings = model(inputs)
219 |             train_set_embeddings.append(embeddings.cpu())
220 |             train_set_frame_paths.append(data[1][0])
221 |             # debug
222 |             # if cts == 25:
223 |             #     break
224 |             # cts += 1
225 |         train_embs_dict = {
226 |             "embeddings": train_set_embeddings,
227 |             "frame_paths": train_set_frame_paths,
228 |         }
229 |         # Val_set
230 |         val_set_frame_paths = []
231 |         val_set_embeddings = []
232 |         # debug
233 |         # cts = 0
234 |         for data in tqdm(val_loader, total=len(val_loader), postfix="Producing Val Set embeddings"):
235 |             images = data[0]
236 |             inputs = images.to(device)
237 |             embeddings = model(inputs)
238 |             val_set_embeddings.append(embeddings.cpu())
239 |             val_set_frame_paths.append(data[1][0])
240 |             # debug
241 |             # if cts == 25:
242 |             #     break
243 |             # cts += 1
244 | 
245 |         val_embs_dict = {
246 |             "embeddings": val_set_embeddings,
247 |             "frame_paths": val_set_frame_paths,
248 |         }
249 | 
250 |         # OE set
251 |         oe_set_frame_paths = []
252 |         oe_set_labels = []
253 |         oe_set_embeddings = []
254 |         # debug
255 |         # cts = 0
256 |         for data in tqdm(oe_loader, total=len(oe_loader),
257 |                          postfix="Producing Outliers embeddings"):
258 |             images = data["image"]
259 |             inputs = images.to(device)
260 |             embeddings = model(inputs)
261 |             oe_set_embeddings.append(embeddings.cpu())
262 |             oe_set_labels.append(data['label'][0].item())
263 |             oe_set_frame_paths.append(data['frame_path'][0])
264 |             # debug
265 |             # if cts == 25:
266 |             #     break
267 |             # cts += 1
268 | 
269 |         oe_embs_dict = {
270 |             "embeddings": oe_set_embeddings,
271 |             "labels": oe_set_labels,
272 |             # "difficulty": oe_set_difficulty,
273 |             "frame_paths": oe_set_frame_paths,
274 |         }
275 |         # Test_set
276 |         test_set_frame_paths = []
277 |         test_set_labels = []
278 |         test_set_embeddings = []
279 |         # debug
280 |         # cts = 0
281 |         for data in tqdm(test_loader, total=len(test_loader), postfix="Producing Test Set embeddings"):
282 |             images = data["image"]
283 |             inputs = images.to(device)
284 |             embeddings = model(inputs)
285 |             test_set_embeddings.append(embeddings.cpu())
286 |             test_set_labels.append(data['label'][0].item())
287 |             test_set_frame_paths.append(data['frame_path'][0])
288 |             # debug
289 |             # if cts == 25:
290 |             #     break
291 |             # cts += 1
292 | 
293 |         test_embs_dict = {
294 |             "embeddings": test_set_embeddings,
295 |             "labels": test_set_labels,
296 |             # "difficulty": test_set_difficulty,
297 |             "frame_paths": test_set_frame_paths,
298 |         }
299 |     return train_embs_dict, oe_embs_dict, val_embs_dict, test_embs_dict
300 | 
301 | 
302 | # USED
303 | def autoencoder_auc_computation_and_logging(test_labels, test_losses):
304 |     flattened_labels = [0 if el == 0 else 1 for el in test_labels]
305 |     # DEBUG code
306 |     # flattened_labels[0] = 1
307 |     # flattened_labels[1] = 0
308 |     pr_auc = compute_pr_aucs(flattened_labels, losses_list, test_losses)
309 |     roc_auc = compute_roc_aucs(flattened_labels, losses_list, test_losses)
310 |     print(f"Test set {pr_auc=}, {roc_auc=}")
311 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Hazards&Robots: A Dataset for Visual Anomaly Detection in Robotics
  2 | This is the main repository for *Hazards&Robots: A Dataset for Visual Anomaly Detection in Robotics* and relative papers.
  3 | 
  4 | The dataset can be find on Zenodo:
  5 | - (v1) TAROS version : https://zenodo.org/record/7035788
  6 | - (v2) RAL extension : https://zenodo.org/record/7074958
  7 | - ### (v3) Data in Brief final version: https://zenodo.org/record/7859211
  8 | 
  9 | 
 10 | ## Papers
 11 | 
 12 | ### <em>Sensing Anomalies as Potential Hazards: Datasets and Benchmarks</em>
 13 | Dario Mantegazza, Carlos Redondo, Fran Espada, Luca M. Gambardella, Alessandro Giusti and Jerome Guzzi
 14 | 
 15 |     We consider the problem of detecting, in the visual sensing data stream of an 
 16 |     autonomous mobile robot, semantic patterns that are unusual (i.e., anomalous) with
 17 |     respect to the robot's previous experience in similar environments.  These 
 18 |     anomalies might indicate unforeseen hazards and, in scenarios where failure is 
 19 |     costly, can be used to trigger an avoidance behavior.  We contribute three novel 
 20 |     image-based datasets acquired in robot exploration scenarios, comprising a total
 21 |     of more than 200k labeled frames, spanning various types of anomalies.  On these 
 22 |     datasets, we study the performance of an anomaly detection approach based on 
 23 |     autoencoders operating at different scales.
 24 | 
 25 | In the Proceedings of [23rd TAROS 2022 Conference](https://ukaeaevents.com/23rd-taros/)
 26 | 
 27 | DOI: https://doi.org/10.1007/978-3-031-15908-4_17
 28 | 
 29 | ArXiv: https://arxiv.org/abs/2110.14706
 30 | 
 31 | ### <em>An Outlier Exposure Approach to Improve Visual Anomaly Detection Performance for Mobile Robots.</em>
 32 | Dario Mantegazza, Alessandro Giusti, Luca M. Gambardella and Jerome Guzzi
 33 | 
 34 |     We consider the problem of building visual anomaly detection systems for mobile 
 35 |     robots. Standard anomaly detection models are trained using large datasets composed 
 36 |     only of non-anomalous data. However, in robotics applications, it is often the case 
 37 |     that (potentially very few) examples of anomalies are available. We tackle the 
 38 |     problem of exploiting these data to improve the performance of a Real-NVP anomaly 
 39 |     detection model, by minimizing, jointly with the Real-NVP loss, an auxiliary outlier 
 40 |     exposure margin loss. We perform quantitative experiments on a novel dataset (which 
 41 |     we publish as supplementary material) designed for anomaly detection in an indoor 
 42 |     patrolling scenario. On a disjoint test set, our approach outperforms alternatives 
 43 |     and shows that exposing even a small number of anomalous frames yields significant 
 44 |     performance improvements.
 45 | 
 46 | Published in [Robotics and Automation Letters](https://www.ieee-ras.org/publications/ra-l) October 2022 Volume 7 Issue 4
 47 | 
 48 | 
 49 | DOI: https://doi.org/10.1109/LRA.2022.3192794
 50 | 
 51 | ArXiv: https://arxiv.org/abs/2209.09786
 52 | 
 53 | ### <em>Hazards&Robots: A Dataset for Visual Anomaly Detection in Robotics</em>
 54 | Dario Mantegazza, Alind Xhyra, Luca M. Gambardella, Alessandro Giusti, Jérôme Guzzi
 55 | 
 56 |     We propose Hazards&Robots, a dataset for Visual Anomaly Detection in Robotics. 
 57 |     The dataset is composed of 324,408 RGB frames, and corresponding feature vectors; 
 58 |     it contains 145,470 normal frames and 178,938 anomalous ones categorized in 20 
 59 |     different anomaly classes. The dataset can be used to train and test current and 
 60 |     novel visual anomaly detection methods such as those based on deep learning vision models.
 61 |     The data is recorded with a DJI Robomaster S1 front facing camera. The ground robot, 
 62 |     controlled by a human operator, traverses university corridors. Considered anomalies 
 63 |     include presence of humans, unexpected objects on the floor, defects to the robot. 
 64 | 
 65 | DOI: https://doi.org/10.1016/j.dib.2023.109264
 66 | 
 67 | This is an Open-Access paper published in [Data in Brief](https://www.sciencedirect.com/journal/data-in-brief/vol/48/suppl/C) Volume 48, June 2023, Journal
 68 | 
 69 | ### <em>NEXT</em>: Active Learning for Visual Anomaly Detection in Robotics? Stay Tuned ;)
 70 | 
 71 | ## Codes
 72 | Under `./code` you can find the code used for the <em>TAROS</em> paper under `./code/OLD_CODE` and the code for <em>RAL</em> paper under `./code/Latest`; the code for the <em>Data in Brief</em> is available on the Zenodo repository.
 73 | 
 74 | We use python 3.8 and the requirements in `./code/Latest/requirements.txt`; follow the README.md under the `./code/Latest` to install and run the models.
 75 | 
 76 | 
 77 | # Description
 78 | The dataset is composed of three different scenarios:
 79 | - Tunnel
 80 | - Factory
 81 | - Corridors
 82 | 
 83 | The <em>TAROS</em>  version paper the Corridors scenario has 52'607 samples and 8 anomalies. 
 84 | 
 85 | In the <em>RAL</em> paper we extended this scenario up to 132'838 frames and 16 anomalies.
 86 | 
 87 | #### The latest <em>Data in Brief</em> release has 324'408 frames and 20 anomalies; for the first time we provide 512-dimension features vectors extracted with CLIP.
 88 | 
 89 | <figure>
 90 | <img src="images/samples.png" alt="DiB_paper_anomalies" style="background-color:white;"/>
 91 | <p align = "center">Examples of samples of the <em>Corridors</em> scenario from the <em>Data in Brief</em> paper </p>
 92 | </figure>
 93 | 
 94 | ### Funding
 95 | This work was supported as a part of NCCR Robotics, a National Centre of Competence in Research, funded by the Swiss National Science Foundation (grant number 51NF40\_185543) and by the European Commission through the Horizon 2020 project 1-SWARM, grant ID 871743.
 96 | # Contact
 97 | 
 98 | - If you have questions please contact us via email dario (dot) mantegazza (at) idsia (dot) ch
 99 | - Questions or problems with the code? Just open an ISSUE, we will do our best to answer you as soon as possible :)
100 | - For more information about us visit our site https://idsia-robotics.github.io/
101 | 
102 | # How to cite
103 | If you use this dataset please cite it using the following bib
104 | 
105 |     @ARTICLE{mantegazza2022outlier,
106 |         author={Mantegazza, Dario and Giusti, Alessandro and Gambardella, Luca Maria and Guzzi, Jérôme}, 
107 |         journal={IEEE Robotics and Automation Letters},
108 |         title={An Outlier Exposure Approach to Improve Visual Anomaly Detection Performance for Mobile Robots.},
109 |         year={2022}, 
110 |         volume={7},
111 |         number={4}, 
112 |         pages={11354-11361}, 
113 |         doi={10.1109/LRA.2022.3192794}
114 |       }
115 | 
116 | # Frames Examples
117 | Across the three scenarios described before, we recorded various normal situations and numerous anomalies.
118 | The anomalies are the following:
119 | ### Tunnel Anomalies
120 | <details>
121 |   <summary>Click for high resolution examples</summary>
122 | 
123 | <figure class="image">
124 | <img src="images/tunnel/normal1.jpg" alt="tun_normal" width="512"/>
125 | <p><b>Normal</b> - Empty underground man made tunnel</p>
126 | </figure>
127 | <p></p>
128 | <figure>
129 | <img src="images/tunnel/wet1.jpg" alt="wet" width="512"/>
130 | <p><b>Wet</b> - Water condensation on the tunnel walls and ceiling</p>
131 | </figure>
132 | <p></p>
133 | 
134 | <figure>
135 | <img src="images/tunnel/root1.jpg" alt="root" width="512"/>
136 | <p><b>Root</b> - Roots coming down from the ceiling and walls</p>
137 | </figure>
138 | <p></p>
139 | 
140 | <figure>
141 | <img src="images/tunnel/dust1.jpg" alt="dust" width="512"/>
142 | <p><b>Dust</b> - Dust moved by the drone </p>
143 | </figure>
144 | <p></p>
145 | 
146 | </details>
147 | 
148 | ### Factory Anomalies
149 | <details>
150 |   <summary>Click for high resolution examples</summary>
151 | 
152 | <figure>
153 | <img src="images/factory/normal1.jpg" alt="fact_normal" width="512"/>
154 | <p><b>Normal</b> - Empty factory facility</p>
155 | </figure>
156 | <p></p>
157 | 
158 | 
159 | <figure>
160 | <img src="images/factory/mist1.jpg" alt="mist" width="512"/>
161 | <p><b>Mist</b> - Mist coming from a smoke machine</p>
162 | </figure>
163 | <p></p>
164 | 
165 | <figure>
166 | <img src="images/factory/tape1.jpg" alt="tape" width="512"/>
167 | <p><b>Tape</b> - Signaling tape stretched across the facility</p>
168 | </figure>
169 | 
170 | </details>
171 | 
172 | ### Corridors Anomalies
173 | <details>
174 |   <summary>Click for high resolution examples</summary>
175 | 
176 | <figure>
177 | <img src="images/corridor/normal1.jpg" alt="corridor_normal" width="512"/>
178 | <img src="images/corridor/normal2.jpg" alt="corridor_normal2" width="512"/>
179 | <img src="images/corridor/normal3.jpg" alt="corridor_normal3" width="512"/>
180 | <p><b>Normal</b> - Empty university corridors (on different floors)</p>
181 | </figure>
182 | <p></p>
183 | 
184 | <figure>
185 | <img src="images/corridor/box.jpg" alt="box" width="512"/>
186 | <p><b>Box</b> - Cardboard boxes placed in front/near of the robot</p>
187 | </figure>
188 | <p></p>
189 | 
190 | <figure>
191 | <img src="images/corridor/cable.jpg" alt="cable" width="512"/>
192 | <p><b>Cable</b> - Various cables layed on the floor around and in front of the robot</p>
193 | </figure>
194 | <p></p>
195 | 
196 | <figure>
197 | <img src="images/corridor/debris.jpg" alt="debris" width="512"/>
198 | <p><b>Debris</b> - Various debris </p>
199 | </figure>
200 | <p></p>
201 | 
202 | <figure>
203 | <img src="images/corridor/defects.jpg" alt="defects" width="512"/>
204 | <p><b>Defects</b> - Defects of the robot</p>
205 | </figure>
206 | <p></p>
207 | 
208 | <figure>
209 | <img src="images/corridor/door.jpg" alt="door" width="512"/>
210 | <p><b>Door</b> - Open doors where doors should be closed</p>
211 | </figure>
212 | <p></p>
213 | 
214 | <figure>
215 | <img src="images/corridor/human.jpg" alt="human" width="512"/>
216 | <p><b>Human</b> - Human presence</p>
217 | </figure>
218 | <p></p>
219 | 
220 | <figure>
221 | <img src="images/corridor/clutter.jpg" alt="clutter" width="512"/>
222 | <p><b>Clutter</b> - Chairs, tables and furniture moved around the corridor</p>
223 | </figure>
224 | <p></p>
225 | 
226 | <figure>
227 | <img src="images/corridor/foam.jpg" alt="foam" width="512"/>
228 | <p><b>Foam</b> - Foam placed on the floor</p>
229 | </figure>
230 | <p></p>
231 | 
232 | <figure>
233 | <img src="images/corridor/sawdust.jpg" alt="sawdust" width="512"/>
234 | <p><b>Sawdust</b> - Sawdust placed on the floor</p>
235 | </figure>
236 | <p></p>
237 | 
238 | <figure>
239 | <img src="images/corridor/cellophane.jpg" alt="cellophane" width="512"/>
240 | <p><b>Cellophane</b> - Cellophane foil stretched between walls</p>
241 | </figure>
242 | <p></p>
243 | 
244 | <figure>
245 | <img src="images/corridor/floor.jpg" alt="floor" width="512"/>
246 | <p><b>Floor</b> - Fake flooring different than original floor</p>
247 | </figure>
248 | <p></p>
249 | 
250 | <figure>
251 | <img src="images/corridor/screws.jpg" alt="screws" width="512"/>
252 | <p><b>Screws</b> - Small screws and bolts placed in front of the robot</p>
253 | </figure>
254 | <p></p>
255 | 
256 | <figure>
257 | <img src="images/corridor/water.jpg" alt="water" width="512"/>
258 | <p><b>Water</b> - Water puddle in front of robot</p>
259 | </figure>
260 | <p></p>
261 | 
262 | <figure>
263 | <img src="images/corridor/cones.jpg" alt="cones" width="512"/>
264 | <p><b>Cones</b> - Multiple orange cones placed in the corridor</p>
265 | </figure>
266 | <p></p>
267 | 
268 | <figure>
269 | <img src="images/corridor/hanging_cable.jpg" alt="hanghingcables" width="512"/>
270 | <p><b>Hanging cables</b> - Cables hanging from the ceiling</p>
271 | </figure>
272 | 
273 | </details>
274 | 


--------------------------------------------------------------------------------
/code/Latest/WRN/train_oe_wrn.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from collections import defaultdict
  3 | from datetime import datetime
  4 | from typing import List
  5 | 
  6 | from models.wrn_oe.wrn import WideResNet
  7 | from utils.variables_util import combined_labels_to_names
  8 | from utils.metrics_util import compute_pr_aucs_single_loss, compute_roc_aucs_single_loss
  9 | import pandas as pd
 10 | import torch
 11 | import torch.backends.cudnn as cudnn
 12 | import torchvision.transforms as trn
 13 | from rich.console import Console
 14 | from tqdm import tqdm
 15 | 
 16 | from models.wrn_oe.wrn_model_functions import set_wrn_class_parameters, \
 17 |     cosine_annealing, train_wrn, val_wrn, best_epoch_saver, test_wrn, oe_wrn_artifact_saver
 18 | from oe_wrn_dataset import OEWRNImagesDataset, OEWRNImagesTestset, \
 19 |     OEWRNImagesOutlierset
 20 | from utils.check_create_folder import check_create_folder
 21 | 
 22 | console = Console()
 23 | 
 24 | 
 25 | def main():
 26 |     parser = argparse.ArgumentParser(description='Trains a WRT with OE',
 27 |                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 28 |     parser.add_argument('--model', '-m', type=str, default='wrn',
 29 |                         choices=['allconv', 'wrn'], help='Choose architecture.')
 30 |     # parser.add_argument('--calibration', '-c', action='store_true',
 31 |     #                     help='Train a model to be used for calibration. This holds out some data for validation.')
 32 |     # Optimization options
 33 |     parser.add_argument('--epochs', '-e', type=int, default=100, help='Number of epochs to train.')
 34 |     parser.add_argument('--learning_rate', '-lr', type=float, default=0.1, help='The initial learning rate.')
 35 |     parser.add_argument('--batch_size', '-b', type=int, default=128, help='Batch size.')
 36 |     parser.add_argument('--oe_batch_size', type=int, default=256, help='Batch size.')
 37 |     parser.add_argument('--test_bs', type=int, default=200)
 38 |     parser.add_argument('--momentum', type=float, default=0.9, help='Momentum.')
 39 |     parser.add_argument('--decay', '-d', type=float, default=0.0005, help='Weight decay (L2 penalty).')
 40 |     # WRN Architecture
 41 |     parser.add_argument('--layers', default=16, type=int, help='total number of layers')
 42 |     parser.add_argument('--widen-factor', default=4, type=int, help='widen factor')
 43 |     parser.add_argument('--droprate', default=0.3, type=float, help='dropout probability')
 44 |     # Acceleration
 45 |     parser.add_argument('--ngpu', type=int, default=1, help='0 = CPU.')
 46 |     # parser.add_argument('--prefetch', type=int, default=4, help='Pre-fetching threads.')
 47 |     parser.add_argument('--workers', type=int, default=4, help='Pre-fetching threads.')
 48 |     # IDSIA params
 49 |     parser.add_argument('--gpu_number',
 50 |                         '-g', type=int, default=0)
 51 |     parser.add_argument('--root_path',
 52 |                         '-r', type=str, default=".")
 53 |     param = parser.parse_args()
 54 | 
 55 |     mean = [0.485, 0.456, 0.406]
 56 |     std = [0.229, 0.224, 0.225]
 57 |     train_transform = trn.Compose([trn.RandomHorizontalFlip(), trn.RandomCrop(64, padding=8),
 58 |                                    trn.Normalize(mean, std)])
 59 |     test_transform = trn.Compose([trn.Normalize(mean, std)])
 60 | 
 61 |     num_classes = 3  # the classes are the envs: -1, 1long, 1short
 62 | 
 63 |     console.log(f'Using the following params:{param}')
 64 |     time_string = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
 65 | 
 66 |     if param.gpu_number == -1:
 67 |         cuda_gpu = f"cpu"
 68 |     else:
 69 |         cuda_gpu = f"cuda:{param.gpu_number}"
 70 |     # (
 71 |     #     folder_prefix,
 72 |     #     train_set_path,
 73 |     #     outliers_set_path,
 74 |     #     validation_set_path,
 75 |     #     test_set_path,
 76 |     #     test_set_labels_csv,
 77 |     #     outliers_set_labels_csv,
 78 |     #     train_set_labels_csv,
 79 |     #     validation_set_labels_csv
 80 |     # ) = oe_wrn_model_paths_init(param)
 81 |     root_path = param.root_path
 82 | 
 83 |     train_set_path = f"{root_path}/data/train_set"
 84 |     outliers_set_path = f"{root_path}/data/outlier_set"
 85 |     validation_set_path = f"{root_path}/data/validation_set"
 86 |     test_set_path = f"{root_path}/data/test_set"
 87 |     test_set_labels_csv = f"{root_path}/data/metadata/frames_labels.csv"
 88 |     outliers_set_labels_csv = f"{root_path}/data/metadata/outliers_frames_labels.csv"
 89 |     train_set_labels_csv = f"{root_path}/data/metadata/train_frames_envs.csv"
 90 |     validation_set_labels_csv = f"{root_path}/data/metadata/validation_frames_envs.csv"
 91 | 
 92 |     model_key = f"WRN_{time_string}"
 93 |     save_folder = f"{root_path}/data/wrn/saves/{model_key}"
 94 |     check_create_folder(save_folder)
 95 | 
 96 |     device = torch.device(cuda_gpu if torch.cuda.is_available() else "cpu")
 97 |     print(device)
 98 |     (
 99 |         batch_size,
100 |         epochs,
101 |         lr,
102 |         ml_flow_run_id,
103 |         workers,
104 |         momentum,
105 |         decay,
106 |         layers,
107 |         widen_factor,
108 |         droprate,
109 |         outlier_batch_size,
110 |         analysis_number,
111 |         split_date,
112 |         subset_size
113 |     ) = set_wrn_class_parameters(
114 |         param
115 |     )
116 | 
117 |     model = WideResNet(layers, num_classes, widen_factor, dropRate=droprate)
118 |     model.to(device)
119 |     cudnn.benchmark = True  # fire on all cylinders
120 | 
121 |     # DATA INIT
122 |     train_set = OEWRNImagesDataset(train_set_path, train_transform, train_set_labels_csv)
123 |     train_loader_in = torch.utils.data.DataLoader(
124 |         train_set,
125 |         batch_size=batch_size, shuffle=True,
126 |         num_workers=workers, pin_memory=True)
127 |     outliers_set_required_size = outlier_batch_size * len(train_loader_in)
128 |     outliers_set = OEWRNImagesOutlierset(
129 |         files_path=outliers_set_path,
130 |         label_csv_path=outliers_set_labels_csv,
131 |         transform=test_transform,
132 |         required_dataset_size=outliers_set_required_size,
133 |     )
134 |     train_loader_out = torch.utils.data.DataLoader(
135 |         outliers_set,
136 |         batch_size=outlier_batch_size, shuffle=True,
137 |         num_workers=workers, pin_memory=True)
138 |     val_set = OEWRNImagesDataset(validation_set_path, test_transform, validation_set_labels_csv)
139 |     val_loader = torch.utils.data.DataLoader(
140 |         val_set,
141 |         batch_size=batch_size, shuffle=False,
142 |         num_workers=workers, pin_memory=True)
143 |     test_set = OEWRNImagesTestset(test_set_path, label_csv_path=test_set_labels_csv, transform=test_transform)
144 |     test_loader = torch.utils.data.DataLoader(
145 |         test_set,
146 |         batch_size=1, shuffle=False,
147 |         num_workers=workers, pin_memory=True)
148 |     print('Beginning Training\n')
149 |     optimizer = torch.optim.SGD(model.parameters(), lr, momentum=momentum, weight_decay=decay, nesterov=True)
150 |     scheduler = torch.optim.lr_scheduler.LambdaLR(
151 |         optimizer,
152 |         lr_lambda=lambda step: cosine_annealing(
153 |             step,
154 |             epochs * len(train_loader_in),
155 |             1,  # since lr_lambda computes multiplicative factor
156 |             1e-6 / lr))
157 |     # Main loop
158 |     etqdm = tqdm(range(epochs), total=epochs, postfix="Training")
159 |     metrics = defaultdict(list)
160 |     best_loss = None
161 |     best_epoch = 0
162 |     best_model_path = None
163 |     for epoch in etqdm:
164 |         model, train_loss = train_wrn(model, train_loader_out, train_loader_in, scheduler, optimizer, device=device)
165 |         metrics[f'train_loss'].append(train_loss)
166 |         val_loss = val_wrn(model, val_loader, device)
167 |         metrics[f'val_loss'].append(val_loss)
168 |         if best_loss is None:
169 |             best_epoch, best_loss, best_model_path = best_epoch_saver(epoch, model, param,
170 |                                                                       save_folder, val_loss, model_key)
171 |         if val_loss < best_loss:
172 |             best_epoch, best_loss, best_model_path = best_epoch_saver(epoch, model, param,
173 |                                                                       save_folder, val_loss, model_key)
174 | 
175 |         etqdm.set_description(
176 |             f"Train loss: {train_loss:.3f} | Val loss: {val_loss:.3f} | best model @ epoch {best_epoch}")
177 | 
178 |     console.log("Training Completed, Testing Started")
179 | 
180 |     df_dict = test_wrn(best_model_path, model, test_loader, device, )
181 |     test_set_df = pd.DataFrame.from_dict(df_dict)
182 |     test_set_df["label"] = pd.to_numeric(test_set_df["label"])
183 | 
184 |     auc_computation_and_logging(test_set_df)
185 |     list_of_labels_in_test_set = list(set(test_set.labels))
186 | 
187 |     # csv row building
188 |     metrics_dict = {}
189 |     for k in list_of_labels_in_test_set:
190 |         v = combined_labels_to_names[k]
191 |         class_metrics_dict = per_label_metrics(test_set_df, k)
192 |         if k == 0:
193 |             v = "all"
194 |             metrics_dict[f"{v}_ok_mean_loss"] = class_metrics_dict["ok_mean_loss"]
195 |         metrics_dict[f"{v}_an_mean_loss"] = class_metrics_dict["an_mean_loss"]
196 |         metrics_dict[f"{v}_roc_auc"] = class_metrics_dict["roc_auc"]
197 |         metrics_dict[f"{v}_pr_auc"] = class_metrics_dict["pr_auc"]
198 | 
199 |     csv_row = {
200 |         **{"model_key": model_key,
201 |            },
202 |         **metrics_dict,
203 |     }
204 | 
205 |     # SAVE STUFF
206 |     console.log("Testing Completed")
207 |     console.log("Creating and saving Artifacts")
208 |     artifacts_path = save_folder + "/artifacts/"
209 |     oe_wrn_artifact_saver(
210 |         batch_size,
211 |         epochs,
212 |         lr,
213 |         momentum,
214 |         decay,
215 |         layers,
216 |         widen_factor,
217 |         droprate,
218 |         metrics,
219 |         test_set_df,
220 |         model,
221 |         artifacts_path,
222 |         param=param,
223 |         csv_row=csv_row,
224 |         csv_key=model_key,
225 |         best_model_path=best_model_path,
226 |     )
227 | 
228 |     console.log(f"Script completed, artifacts located at {save_folder}.")
229 | 
230 | 
231 | def auc_computation_and_logging(test_df):
232 |     labels = [0 if el == 0 else 1 for el in test_df["label"].values]
233 |     metrics_dict = compute_model_metrics(
234 |         labels,
235 |         test_df
236 |     )
237 |     print(f'test_set_ok_mean_loss = {metrics_dict["ok_mean_loss"]}\n'
238 |           f'test_set_an_mean_loss = {metrics_dict["an_mean_loss"]}\n'
239 |           f'test_set_roc_auc = {metrics_dict["roc_auc"]}\n'
240 |           f'test_set_pr_auc = {metrics_dict["pr_auc"]}\n'
241 |           )
242 | 
243 | 
244 | def per_label_metrics(df, label_key):
245 |     if label_key == 0:
246 |         label_unique_values = [0 if el == 0 else 1 for el in df["label"].values]
247 |         return_dict = compute_model_metrics(
248 |             label_unique_values,
249 |             df,
250 |         )
251 |     else:
252 |         df_anomaly = df[df.label.isin([0, label_key])]
253 |         label_unique_values = [0 if el == 0 else 1 for el in df_anomaly["label"].values]
254 |         return_dict = compute_model_metrics(
255 |             label_unique_values,
256 |             df_anomaly,
257 |         )
258 |     return return_dict
259 | 
260 | 
261 | def compute_model_metrics(
262 |         labels: List[int],
263 |         df_losses: pd.DataFrame,
264 | ):
265 |     y_true = labels
266 |     losses = df_losses["loss"].values
267 |     pr_auc = compute_pr_aucs_single_loss(y_true, losses)
268 |     roc_auc = compute_roc_aucs_single_loss(y_true, losses)
269 |     an_mean_loss = df_losses[df_losses["label"] != 0]["loss"].values.mean()
270 |     ok_mean_loss = df_losses[df_losses["label"] == 0]["loss"].values.mean()
271 |     # composing return dict
272 |     return_dict = {
273 |         "an_mean_loss": an_mean_loss,
274 |         "ok_mean_loss": ok_mean_loss,
275 |         "roc_auc": roc_auc,
276 |         "pr_auc": pr_auc,
277 |     }
278 |     return return_dict
279 | 
280 | 
281 | if __name__ == '__main__':
282 |     main()
283 | 


--------------------------------------------------------------------------------
/code/Latest/models/real_nvp/real_nvp_model_functions.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | from collections import defaultdict
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | from torch import linalg as LA
  8 | from tqdm import tqdm
  9 | 
 10 | from utils.check_create_folder import check_create_folder
 11 | from models.real_nvp import real_nvp
 12 | 
 13 | logging.basicConfig(level=logging.INFO)
 14 | 
 15 | 
 16 | # USED
 17 | def build_network(embedding_size, coupling_topology, n_layers, mask_type, batch_norm=True):
 18 |     """Builds the neural network."""
 19 |     model = real_nvp.LinearRNVP(input_dim=embedding_size,
 20 |                                 coupling_topology=coupling_topology,
 21 |                                 flow_n=n_layers,
 22 |                                 batch_norm=batch_norm,
 23 |                                 mask_type=mask_type,
 24 |                                 conditioning_size=None,
 25 |                                 use_permutation=False,
 26 |                                 single_function=False)
 27 | 
 28 |     return model
 29 | 
 30 | 
 31 | # USED
 32 | def epoch_loop_oe_normal_rnvp(device: torch.device,
 33 |                               epochs: int,
 34 |                               model: real_nvp,
 35 |                               optimizer,
 36 |                               scheduler,
 37 |                               train_loader: torch.utils.data.DataLoader,
 38 |                               val_loader: torch.utils.data.DataLoader,
 39 |                               save_folder: str,
 40 |                               model_key: str,
 41 |                               ):
 42 |     # Epoch Loop
 43 |     logging.info('Training started')
 44 | 
 45 |     etqdm = tqdm(range(epochs), total=epochs, postfix="Training")
 46 |     best_loss = None
 47 |     best_epoch = 0
 48 |     best_model_path = None
 49 |     metrics = defaultdict(list)
 50 |     for epoch in etqdm:
 51 |         model, train_loss, train_log_prob, train_log_det_j = trainer(
 52 |             device, model, optimizer, train_loader
 53 |         )
 54 | 
 55 |         metrics[f'train_loss'].append(train_loss)
 56 |         val_loss, val_log_prob, val_log_det_j = validator(val_loader, model, device)
 57 |         scheduler.step(val_loss)
 58 |         metrics[f'val_loss'].append(val_loss)
 59 |         if best_loss is None:
 60 |             best_epoch, best_loss, best_model_path = best_epoch_saver(epoch, model,
 61 |                                                                       save_folder, val_loss, model_key)
 62 |         if val_loss < best_loss:
 63 |             best_epoch, best_loss, best_model_path = best_epoch_saver(epoch, model,
 64 |                                                                       save_folder, val_loss, model_key)
 65 | 
 66 |         etqdm.set_description(
 67 |             f"Train loss: {train_loss:.3f} | Val loss: {val_loss:.3f} | best model @ epoch {best_epoch}")
 68 |     logging.info('Finished training.')
 69 | 
 70 |     return metrics, best_model_path
 71 | 
 72 | 
 73 | # USED
 74 | def epoch_loop_oe_with_oe_rnvp(device: torch.device,
 75 |                                epochs: int,
 76 |                                model: real_nvp,
 77 |                                optimizer,
 78 |                                scheduler,
 79 |                                normal_train_loader: torch.utils.data.DataLoader,
 80 |                                anomalies_train_loader: torch.utils.data.DataLoader,
 81 |                                val_loader: torch.utils.data.DataLoader,
 82 |                                save_folder: str,
 83 |                                etn_key: str,
 84 |                                gamma: float,
 85 |                                lambda_p: float,
 86 |                                ):
 87 |     # Epoch Loop
 88 |     logging.info('Training started')
 89 | 
 90 |     etqdm = tqdm(range(epochs), total=epochs, postfix="Training")
 91 |     best_loss = None
 92 |     best_epoch = 0
 93 |     best_model_path = None
 94 |     metrics = defaultdict(list)
 95 |     for epoch in etqdm:
 96 |         model, train_loss, train_log_prob, train_log_det_j, oe_loss_component_list, ok_l2_mean, an_l2_mean = trainer_oe(
 97 |             device,
 98 |             model,
 99 |             optimizer,
100 |             normal_train_loader,
101 |             anomalies_train_loader,
102 |             gamma=gamma,
103 |             lambda_p=lambda_p
104 |         )
105 |         metrics[f'train_loss'].append(train_loss)
106 | 
107 |         val_loss, val_log_prob, val_log_det_j = validator(val_loader, model, device)
108 |         scheduler.step(val_loss)
109 | 
110 |         metrics[f'val_loss'].append(val_loss)
111 |         if best_loss is None:
112 |             best_epoch, best_loss, best_model_path = best_epoch_saver(epoch, model,
113 |                                                                       save_folder, val_loss, etn_key)
114 |         if val_loss < best_loss:
115 |             best_epoch, best_loss, best_model_path = best_epoch_saver(epoch, model,
116 |                                                                       save_folder, val_loss, etn_key)
117 | 
118 |         etqdm.set_description(
119 |             f"Train loss: {train_loss:.3f} | Val loss: {val_loss:.3f} | best model @ epoch {best_epoch}")
120 |     logging.info('Finished training.')
121 | 
122 |     return metrics, best_model_path
123 | 
124 | 
125 | # USED
126 | def best_epoch_saver(epoch, model, save_folder, val_loss, etn_key):
127 |     best_loss = val_loss
128 |     best_epoch = epoch
129 |     checkpoint_folder = save_folder + f'/checkpoints/'
130 |     check_create_folder(checkpoint_folder)
131 |     best_model_path = checkpoint_folder + f'model_{etn_key}_epoch_{epoch}.pth'
132 |     torch.save(model.state_dict(), best_model_path)
133 |     logging.debug(f"Checkpoin model epoch {epoch},saved {best_model_path}")
134 |     return best_epoch, best_loss, best_model_path
135 | 
136 | 
137 | # USED
138 | def set_parameters(
139 |         param: argparse.ArgumentParser,
140 | ):
141 |     epochs = 500
142 |     batch_size = param.batch_size
143 |     embedding_size = 128
144 |     workers = param.num_workers
145 |     lr = 0.001
146 |     max_patience = int(np.log2(epochs)) + 2
147 |     coupling_topology = [128]
148 |     num_layers = 4
149 |     mask_type = 'odds'
150 | 
151 |     return batch_size, embedding_size, epochs, mask_type, num_layers, lr, coupling_topology, workers, max_patience
152 | 
153 | 
154 | # USED
155 | def set_parameters_rnvp_inference(
156 |         param: argparse.ArgumentParser,
157 | ):
158 |     embedding_size = 128
159 |     workers = param.num_workers
160 |     coupling_topology = [128]
161 |     num_layers = 4
162 |     mask_type = 'odds'
163 |     layer_1_ft = param.first_layer_size
164 |     layer_2_ft = layer_1_ft * 2
165 |     layer_3_ft = layer_1_ft * 2 * 2
166 |     layer_4_ft = layer_1_ft * 2 * 2 * 2
167 |     input_channel = 3
168 |     image_size = (64, 64)
169 |     widths_ = [
170 |         input_channel,
171 |         layer_1_ft,
172 |         layer_2_ft,
173 |         layer_3_ft,
174 |         layer_4_ft,
175 |     ]
176 |     return embedding_size, mask_type, num_layers, coupling_topology, workers, widths_, image_size
177 | 
178 | 
179 | # USED
180 | def set_oe_parameters(
181 |         param: argparse.ArgumentParser,
182 | ):
183 |     epochs = 500
184 |     batch_size = param.batch_size
185 |     embedding_size = 128
186 |     workers = param.num_workers
187 |     lr = 0.001
188 |     max_patience = int(np.log2(epochs)) + 2
189 |     coupling_topology = [128]
190 |     num_layers = 4
191 |     mask_type = 'odds'
192 |     outlier_batch_size = int(batch_size * 0.1)
193 |     gamma = 100
194 |     lambda_p = 1
195 | 
196 |     return batch_size, embedding_size, epochs, mask_type, num_layers, lr, coupling_topology, workers, max_patience, gamma, lambda_p, outlier_batch_size
197 | 
198 | 
199 | # USED
200 | def model_tester(best_model_path, model, device, test_loader):
201 |     model.load_state_dict(torch.load(best_model_path, map_location=device))
202 |     model.to(device)
203 |     model.eval()
204 |     df_dict = {'z': [], 'label': [], 'loss': [], 'log_prob': [], 'log_det_J': [], 'l2_norm_of_z': []}
205 |     with torch.no_grad():
206 |         for data in tqdm(test_loader, total=len(test_loader), postfix="Running Test Set inference"):
207 |             inputs, label, _ = data
208 |             inputs = inputs.to(device)
209 |             outputs = model(inputs)
210 |             z, log_det_J = outputs
211 |             log_prob_z = model.log_prob(z)
212 |             log_prob_z_mean = log_prob_z.mean()
213 |             log_det_j_mean = log_det_J.mean()
214 |             loss = - log_prob_z_mean - log_det_j_mean
215 |             logging.debug(f"TEST: inputs.shape={inputs.shape}\n"
216 |                           f"      outputs[0].shape={outputs[0].shape}\n"
217 |                           f"      loss {loss} = - {log_prob_z_mean} - {log_det_j_mean}")
218 |             df_dict["z"].append(z.detach().squeeze(0).cpu().numpy())
219 |             df_dict["label"].append(label.item())
220 |             df_dict["loss"].append(loss.detach().cpu().item())
221 |             df_dict["log_prob"].append(log_prob_z.detach().cpu().item())
222 |             df_dict["log_det_J"].append(log_det_J.detach().cpu().item())
223 |             df_dict["l2_norm_of_z"].append(LA.norm(z).detach().cpu().item())
224 |     return df_dict
225 | 
226 | 
227 | # USED
228 | def validator(val_loader, model, device):
229 |     with torch.no_grad():
230 |         model.eval()
231 |         losses = []
232 |         log_prob_mean_list = []
233 |         log_det_j_list = []
234 |         for data in val_loader:
235 |             inputs = data[0]
236 |             inputs = inputs.to(device)
237 |             outputs = model(inputs)
238 |             z, log_det_J = outputs
239 |             log_prob_z = model.log_prob(z)
240 |             log_prob_z_mean = log_prob_z.mean()
241 |             log_det_j_mean = log_det_J.mean()
242 |             log_prob_mean_list.append(log_prob_z_mean.item())
243 |             log_det_j_list.append(log_det_j_mean.item())
244 |             loss = - log_prob_z_mean - log_det_j_mean
245 |             logging.debug(f"VAL: inputs.shape={inputs.shape}\n"
246 |                           f"     outputs[0].shape={outputs[0].shape}\n"
247 |                           f"     loss {loss} = - {log_prob_z_mean} - {log_det_j_mean}")
248 |             losses.append(loss.detach().cpu().item())
249 |     return np.mean(losses), np.mean(log_prob_mean_list), np.mean(log_det_j_list)
250 | 
251 | 
252 | # USED
253 | def trainer_oe(device, model, optimizer, normal_train_loader, anomalies_train_loader, gamma, lambda_p):
254 |     model.train()
255 |     losses = []
256 |     log_prob_z_mean_list = []
257 |     log_det_j_mean_list = []
258 |     oe_loss_component_list = []
259 |     ok_l2norm_list = []
260 |     an_l2norm_list = []
261 |     for ok_data, an_data in zip(normal_train_loader, anomalies_train_loader):
262 |         # normal data
263 |         ok_inputs, _ = ok_data
264 |         ok_inputs = ok_inputs.to(device)
265 |         optimizer.zero_grad()
266 |         ok_outputs = model(ok_inputs)
267 |         ok_z, ok_log_det_J = ok_outputs
268 |         ok_log_prob_z = model.log_prob(ok_z)
269 |         ok_log_prob_z_mean = ok_log_prob_z.mean()
270 |         ok_log_det_j_mean = ok_log_det_J.mean()
271 | 
272 |         an_inputs, _, _ = an_data
273 |         an_inputs = an_inputs.to(device)
274 | 
275 |         model.eval()
276 |         an_outputs = model(an_inputs)
277 |         model.train()
278 | 
279 |         an_z, an_log_det_J = an_outputs
280 |         an_log_prob_z = model.log_prob(an_z)
281 |         maxs = torch.maximum(torch.zeros((len(an_log_prob_z)), device=device),
282 |                              gamma
283 |                              +
284 |                              (- ok_log_prob_z[:len(an_log_prob_z)] - ok_log_det_J[:len(an_log_det_J)])
285 |                              -
286 |                              (- an_log_prob_z - an_log_det_J))
287 |         oe_loss_component = maxs.mean()
288 |         log_prob_z_mean_list.append(ok_log_prob_z_mean.item())
289 |         log_det_j_mean_list.append(ok_log_det_j_mean.item())
290 |         oe_loss_component_list.append(oe_loss_component.item())
291 |         loss = - ok_log_prob_z_mean - ok_log_det_j_mean + lambda_p * oe_loss_component
292 |         logging.debug(f"TRAIN: ok_inputs.shape={ok_inputs.shape}\n"
293 |                       f"       ok_outputs[0].shape={ok_outputs[0].shape}\n"
294 |                       f"       ok_log_prob_z = {ok_log_prob_z}\n"
295 |                       f"       an_log_prob_z = {an_log_prob_z}\n"
296 |                       f"       maxs = {maxs}\n"
297 |                       f"       loss {loss} = - {ok_log_prob_z_mean} - {ok_log_det_j_mean} + {lambda_p} * {oe_loss_component}\n")
298 |         loss.backward()
299 |         optimizer.step()
300 |         losses.append(loss.item())
301 |         ok_l2norm_list += LA.norm(ok_z, dim=1).detach().cpu().tolist()
302 |         an_l2norm_list += LA.norm(an_z, dim=1).detach().cpu().tolist()
303 |     return model, np.mean(losses), np.mean(log_prob_z_mean_list), np.mean(log_det_j_mean_list), np.mean(
304 |         oe_loss_component_list), np.mean(ok_l2norm_list), np.mean(an_l2norm_list)
305 | 
306 | 
307 | # USED
308 | def trainer(device, model, optimizer, train_loader):
309 |     model.train()
310 |     losses = []
311 |     log_prob_z_mean_list = []
312 |     log_det_j_mean_list = []
313 |     for data in train_loader:
314 |         inputs, _ = data
315 |         inputs = inputs.to(device)
316 | 
317 |         optimizer.zero_grad()
318 |         outputs = model(inputs)
319 |         z, log_det_J = outputs
320 |         log_prob_z = model.log_prob(z)
321 |         log_prob_z_mean = log_prob_z.mean()
322 |         log_det_j_mean = log_det_J.mean()
323 |         log_prob_z_mean_list.append(log_prob_z_mean.item())
324 |         log_det_j_mean_list.append(log_det_j_mean.item())
325 |         loss = - log_prob_z_mean - log_det_j_mean
326 |         logging.debug(f"TRAIN: inputs.shape={inputs.shape}\n"
327 |                       f"       outputs[0].shape={outputs[0].shape}\n"
328 |                       f"       loss {loss} = - {log_prob_z_mean} - {log_det_j_mean}")
329 |         loss.backward()
330 |         optimizer.step()
331 |         losses.append(loss.item())
332 | 
333 |     return model, np.mean(losses), np.mean(log_prob_z_mean_list), np.mean(log_det_j_mean_list)
334 | 


--------------------------------------------------------------------------------
/code/OLD_CODE/run.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from datetime import datetime
  3 | 
  4 | import albumentations as A
  5 | 
  6 | import mlflow
  7 | import pandas as pd
  8 | import torch
  9 | from rich.console import Console
 10 | from torch.optim.lr_scheduler import ReduceLROnPlateau
 11 | from torch.utils.data import Dataset
 12 | 
 13 | from paper_code_release.model.autoencoder import AE
 14 | from paper_code_release.model.uniformed_model_functions import set_model_and_train_parameters_patches, epoch_loop_patches, \
 15 |     test_loop_df_rows
 16 | from paper_code_release.paper_utils.artifacts_util import uniformed_model_artifact_saver
 17 | from paper_code_release.dataset import UniformedPatchesDataset, UniformedPatchesTestset
 18 | 
 19 | from paper_code_release.paper_utils.check_create_folder import check_create_folder
 20 | from paper_code_release.paper_utils.init_utils import uniformed_model_paths_init
 21 | from paper_code_release.paper_utils.losses_util import losses_list
 22 | from paper_code_release.paper_utils.metrics_util import compute_uniformed_model_metrics
 23 | from paper_code_release.paper_utils.variables_util import dataset_names, available_scale_levels, datasets_labels_names, scaled_image_shapes
 24 | 
 25 | console = Console()
 26 | 
 27 | 
 28 | def params_parser():
 29 |     parser = argparse.ArgumentParser()
 30 |     parser.add_argument("--max_epochs",
 31 |                         '-e', type=int, default=15)
 32 |     parser.add_argument("--batch_size",
 33 |                         "-b", type=int, default=320)
 34 |     parser.add_argument("--bottleneck",
 35 |                         "-n", type=int, default=128)
 36 |     parser.add_argument("--num_workers",
 37 |                         '-w', type=int, default=4)
 38 |     parser.add_argument('--gpu_number',
 39 |                         '-g', type=int, default=0)
 40 |     parser.add_argument('--learning_rate',
 41 |                         '-l', type=float, default=1e-3)
 42 |     parser.add_argument('--use_ml_flow',
 43 |                         '-m', type=int, default=1)
 44 |     parser.add_argument('--first_layer_size',
 45 |                         '-f', type=int, default=128)
 46 |     parser.add_argument('--input_channels',
 47 |                         '-i', type=int, default=3)
 48 |     parser.add_argument('--id_optimized_loss',
 49 |                         '-o', type=int, default=0)
 50 |     parser.add_argument('--render_video',
 51 |                         '-v', type=int, default=0)
 52 |     parser.add_argument('--patience_thres',
 53 |                         '-p', type=float, default=0.001)
 54 |     parser.add_argument('--dataset',
 55 |                         '-d', type=int, default=1)
 56 |     parser.add_argument('--scale_level',
 57 |                         '-s', type=int, default=2)
 58 |     parser.add_argument('--test_patches_number',
 59 |                         '-t', type=int, default=250)
 60 |     param = parser.parse_args()
 61 |     return param
 62 | 
 63 | 
 64 | def per_label_metrics(df, label_key):
 65 |     if label_key == 0:
 66 |         label_unique_values = [0 if el[0] == 0 else 1 for el in
 67 |                                df[["frame_id", "frame_label"]].groupby("frame_id")["frame_label"].unique().values]
 68 |         return_dict = compute_uniformed_model_metrics(
 69 |             label_unique_values,
 70 |             losses_list,
 71 |             df[["frame_id", "mse_loss", "mae_loss"]],
 72 |             stdev=True,
 73 |         )
 74 |     else:
 75 |         df_anomaly = df[df["frame_label"].isin([0, label_key])]
 76 |         label_unique_values = [0 if el[0] == 0 else 1 for el in
 77 |                                df_anomaly[["frame_id", "frame_label"]].groupby("frame_id")[
 78 |                                    "frame_label"].unique().values]
 79 | 
 80 |         return_dict = compute_uniformed_model_metrics(
 81 |             label_unique_values,
 82 |             losses_list,
 83 |             df_anomaly[["frame_id", "mse_loss", "mae_loss"]],
 84 |             stdev=True,
 85 |         )
 86 |     return return_dict
 87 | 
 88 | 
 89 | def main():
 90 |     """
 91 |     if param.gpu_number = -1 it uses the cpu
 92 |     Returns:
 93 |     """
 94 |     # MODEL INIT
 95 |     ml_flow_run_id = None
 96 |     param = params_parser()
 97 |     console.log(f'Using the following params:{param}')
 98 |     time_string = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
 99 |     if param.scale_level not in available_scale_levels.keys():
100 |         raise ValueError(
101 |             f"Oops!  That was no valid value for flag --scale_level. It has to be one of {available_scale_levels}")
102 | 
103 |     if param.dataset not in dataset_names.keys():
104 |         raise ValueError(f"Oops!  That was no valid value for flag --dataset. It has to be be one of {dataset_names}")
105 | 
106 |     if param.use_ml_flow:
107 |         mlflow.set_tracking_uri("http://localhost:9999")
108 |         mlflow.set_experiment("ICRA 2022 experiments")
109 |         mlflow.start_run()
110 |         artifact_uri = mlflow.get_artifact_uri()
111 |         console.log(f"artifact uri {artifact_uri}")
112 | 
113 |     patch_shape = (64, 64)
114 |     image_shape = scaled_image_shapes[param.scale_level]
115 | 
116 |     model_save_folder_prefix, qualitative_paths, test_path, test_labels_csv, train_path, val_path, noise_path = uniformed_model_paths_init(
117 |         param)
118 |     if param.gpu_number == "-1":
119 |         cuda_gpu = f"cpu"
120 |     else:
121 |         cuda_gpu = f"cuda:{param.gpu_number}"
122 |     device = torch.device(cuda_gpu if torch.cuda.is_available() else "cpu")
123 | 
124 |     batch_size, bottleneck, epochs, input_channel, layer_1_ft, lr, max_patience, ml_flow_run_id, widths, workers = set_model_and_train_parameters_patches(
125 |         device, ml_flow_run_id, param, patch_shape)
126 | 
127 |     train_patch_num = 1
128 | 
129 |     model = AE(widths, image_shape=patch_shape, bottleneck_size=bottleneck)
130 |     model.to(device)
131 |     if param.use_ml_flow:
132 |         # noinspection PyUnboundLocalVariable
133 |         model_save_folder = artifact_uri
134 |     else:
135 |         model_save_folder = model_save_folder_prefix + f'/saves/dataset_{dataset_names[param.dataset]}_B_{bottleneck}_F_{layer_1_ft}_S_{param.scale_level}_{time_string}'
136 | 
137 |     # AUG INIT
138 | 
139 |     composed_transform = A.Compose(
140 |         [
141 |             A.transforms.HorizontalFlip(p=0.5),
142 |             A.transforms.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.5),
143 |             A.RandomSizedCrop(min_max_height=[50, image_shape[0]], height=image_shape[0],
144 |                               width=image_shape[1], p=0.5),
145 |             A.Rotate(limit=10, p=0.5),
146 |         ]
147 |     )
148 | 
149 |     # DATA INIT
150 |     train_set = UniformedPatchesDataset(train_path,
151 |                                         patch_shape,
152 |                                         max_patches=train_patch_num,
153 |                                         aug_flag=True,
154 |                                         transform=composed_transform,
155 |                                         noise_path=noise_path,
156 |                                         noise_flag=True,
157 |                                         )
158 |     train_loader = torch.utils.data.DataLoader(
159 |         train_set,
160 |         batch_size=batch_size,
161 |         shuffle=True,
162 |         num_workers=workers,
163 |         pin_memory=True,
164 |         drop_last=False
165 |     )
166 |     with torch.no_grad():
167 |         train_sample_batch = next(iter(train_loader))
168 |         train_sample_batch = train_sample_batch.cpu()
169 |         train_sample_batch_shape = train_sample_batch.shape
170 |         train_sample_batch = train_sample_batch.view(train_sample_batch_shape[0] * train_sample_batch_shape[1],
171 |                                                      train_sample_batch_shape[2],
172 |                                                      train_sample_batch_shape[3],
173 |                                                      train_sample_batch_shape[4])
174 | 
175 |     val_set = UniformedPatchesDataset(
176 |         val_path,
177 |         patch_shape,
178 |         max_patches=train_patch_num,
179 |         aug_flag=False,
180 |         transform=None,
181 |     )
182 |     val_loader = torch.utils.data.DataLoader(
183 |         val_set,
184 |         batch_size=1,
185 |         shuffle=False,
186 |         num_workers=workers,
187 |         pin_memory=True
188 |     )
189 | 
190 |     # Optimizer
191 |     optimizer = torch.optim.Adam(model.parameters(), lr=lr)
192 |     scheduler = ReduceLROnPlateau(optimizer, 'min', patience=max_patience, verbose=True)
193 | 
194 |     # TRAIN
195 |     metrics = epoch_loop_patches(device, epochs, model, optimizer, param, scheduler, train_loader, val_loader)
196 | 
197 |     # TEST
198 |     console.log("Training Completed, Testing Started")
199 | 
200 |     test_set = UniformedPatchesTestset(
201 |         test_path,
202 |         patch_shape=patch_shape,
203 |         label_csv=test_labels_csv,
204 |         max_patches=param.test_patches_number,
205 |     )
206 |     test_loader = torch.utils.data.DataLoader(
207 |         test_set,
208 |         batch_size=1,
209 |         shuffle=False,
210 |         num_workers=workers,
211 |         pin_memory=True,
212 |         drop_last=False
213 |     )
214 |     with torch.no_grad():
215 |         test_sample_batch = next(iter(test_loader))["patches"]
216 |         test_sample_batch = test_sample_batch.cpu()
217 |         test_sample_batch_shape = test_sample_batch.shape
218 |         test_sample_batch = test_sample_batch.view(test_sample_batch_shape[0] * test_sample_batch_shape[1],
219 |                                                    test_sample_batch_shape[2],
220 |                                                    test_sample_batch_shape[3],
221 |                                                    test_sample_batch_shape[4])
222 |     df_dict = test_loop_df_rows(device, model, test_loader)
223 |     test_set_df = pd.DataFrame.from_dict(df_dict)
224 | 
225 |     # Compute AUC fro ml flow logging
226 | 
227 |     uniformed_auc_computation_and_logging(test_set_df, param)
228 | 
229 |     # csv row building
230 |     metrics_dict = {}
231 |     for k, v in datasets_labels_names[param.dataset].items():
232 |         auc_dict = per_label_metrics(test_set_df, k)
233 |         if k == 0:
234 |             v = "all"
235 |         metrics_dict[f"{v}_mean_roc_auc_mse"] = auc_dict["mean_roc_auc"]["mse"]
236 |         metrics_dict[f"{v}_mean_roc_auc_mae"] = auc_dict["mean_roc_auc"]["mae"]
237 |         metrics_dict[f"{v}_mean_pr_auc_mse"] = auc_dict["mean_pr_auc"]["mse"]
238 |         metrics_dict[f"{v}_mean_pr_auc_mae"] = auc_dict["mean_pr_auc"]["mae"]
239 |         metrics_dict[f"{v}_q99_roc_auc_mse"] = auc_dict["q99_roc_auc"]["mse"]
240 |         metrics_dict[f"{v}_q99_roc_auc_mae"] = auc_dict["q99_roc_auc"]["mae"]
241 |         metrics_dict[f"{v}_q99_pr_auc_mse"] = auc_dict["q99_pr_auc"]["mse"]
242 |         metrics_dict[f"{v}_q99_pr_auc_mae"] = auc_dict["q99_pr_auc"]["mae"]
243 |         metrics_dict[f"{v}_std_roc_auc_mse"] = auc_dict["std_roc_auc"]["mse"]
244 |         metrics_dict[f"{v}_std_roc_auc_mae"] = auc_dict["std_roc_auc"]["mae"]
245 |         metrics_dict[f"{v}_std_pr_auc_mse"] = auc_dict["std_pr_auc"]["mse"]
246 |         metrics_dict[f"{v}_std_pr_auc_mae"] = auc_dict["std_pr_auc"]["mae"]
247 | 
248 |     bfs_key = f"B{bottleneck}F{layer_1_ft}S{available_scale_levels[param.scale_level]}"
249 |     csv_row = {
250 |         **{"BFS": bfs_key,
251 |            "dataset": param.dataset,
252 |            "bottleneck": bottleneck,
253 |            "first layer size": layer_1_ft,
254 |            "scale level": available_scale_levels[param.scale_level],
255 |            },
256 |         **metrics_dict,
257 |     }
258 |     # SAVE STUFF
259 |     console.log("Testing Completed")
260 |     check_create_folder(model_save_folder)
261 |     console.log("Creating and saving Artifacts")
262 |     artifacts_path = model_save_folder + "/artifacts/"
263 |     uniformed_model_artifact_saver(
264 |         batch_size,
265 |         bottleneck,
266 |         epochs,
267 |         layer_1_ft,
268 |         lr,
269 |         metrics,
270 |         test_set_df,
271 |         losses_list,
272 |         model,
273 |         input_channel,
274 |         patch_shape,
275 |         artifacts_path,
276 |         ml_flow_run_id,
277 |         param.id_optimized_loss,
278 |         param=param,
279 |         csv_row=csv_row,
280 |         csv_key=f"d_{param.dataset}_{bfs_key}",
281 |         train_sample_batch=train_sample_batch,
282 |         test_sample_batch=test_sample_batch,
283 |     )
284 |     console.log(f"Script completed, artifacts located at {model_save_folder}."
285 |                 f" Videos and plots are skipped because still in todo ")
286 | 
287 | 
288 | def uniformed_auc_computation_and_logging(test_df, param):
289 |     label_unique_values = [0 if el[0] == 0 else 1 for el in
290 |                            test_df[["frame_id", "frame_label"]].groupby("frame_id")["frame_label"].unique().values]
291 |     metrics_dict = compute_uniformed_model_metrics(
292 |         label_unique_values,
293 |         losses_list, test_df[["frame_id", "mse_loss", "mae_loss"]])
294 |     if param.use_ml_flow:
295 |         mlflow.log_metric(f'test_set_q99_roc_auc_{losses_list[0]}', metrics_dict["q99_roc_auc"][losses_list[0]])
296 |         mlflow.log_metric(f'test_set_q99_roc_auc_{losses_list[1]}', metrics_dict["q99_roc_auc"][losses_list[1]])
297 |         mlflow.log_metric(f'test_set_mean_roc_auc_{losses_list[0]}', metrics_dict["mean_roc_auc"][losses_list[0]])
298 |         mlflow.log_metric(f'test_set_mean_roc_auc_{losses_list[1]}', metrics_dict["mean_roc_auc"][losses_list[1]])
299 |         mlflow.log_metric(f'test_set_q99_pr_auc_{losses_list[0]}', metrics_dict["q99_pr_auc"][losses_list[0]])
300 |         mlflow.log_metric(f'test_set_q99_pr_auc_{losses_list[1]}', metrics_dict["q99_pr_auc"][losses_list[1]])
301 |         mlflow.log_metric(f'test_set_mean_pr_auc_{losses_list[0]}', metrics_dict["mean_pr_auc"][losses_list[0]])
302 |         mlflow.log_metric(f'test_set_mean_pr_auc_{losses_list[1]}', metrics_dict["mean_pr_auc"][losses_list[1]])
303 |     else:
304 |         print(f'test_set_q99_roc_auc_{losses_list[0]} = {metrics_dict["q99_roc_auc"][losses_list[0]]}\n'
305 |               f'test_set_q99_roc_auc_{losses_list[1]} = {metrics_dict["q99_roc_auc"][losses_list[1]]}\n'
306 |               f'test_set_mean_roc_auc_{losses_list[0]} = {metrics_dict["mean_roc_auc"][losses_list[0]]}\n'
307 |               f'test_set_mean_roc_auc_{losses_list[1]} = {metrics_dict["mean_roc_auc"][losses_list[1]]}\n'
308 |               f'test_set_q99_pr_auc_{losses_list[0]} = {metrics_dict["q99_pr_auc"][losses_list[0]]}\n'
309 |               f'test_set_q99_pr_auc_{losses_list[1]} = {metrics_dict["q99_pr_auc"][losses_list[1]]}\n'
310 |               f'test_set_mean_pr_auc_{losses_list[0]} = {metrics_dict["mean_pr_auc"][losses_list[0]]}\n'
311 |               f'test_set_mean_pr_auc_{losses_list[1]} = {metrics_dict["mean_pr_auc"][losses_list[1]]}\n'
312 |               )
313 | 
314 | 
315 | if __name__ == "__main__":
316 |     # with torch.autograd.detect_anomaly():
317 |     #     main()
318 |     main()
319 | 


--------------------------------------------------------------------------------