├── .gitignore
├── README.md
├── data
    ├── .gitkeep
    └── raw
    │   └── .gitkeep
├── environment.yml
├── output
    └── models
    │   └── .gitkeep
└── source
    ├── config.py
    ├── data_prep.py
    ├── dataset.py
    ├── main.py
    ├── metrics.py
    ├── model.py
    ├── test.py
    └── train.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | #ignore python cache files
 2 | source/__pycache__/config.cpython-310.pyc
 3 | source/__pycache__/dataset.cpython-310.pyc
 4 | source/__pycache__/model.cpython-310.pyc
 5 | source/__pycache__/train.cpython-310.pyc
 6 | 
 7 | #ignore processed files
 8 | data/processed/*
 9 | data/processed/*
10 | 
11 | #ignore raw folders
12 | data/raw/*
13 | #ignore model checkpoints
14 | output/models/*
15 | 
16 | #ignore results
17 | results/*


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DCASE Task 2
 2 | Repo for DCASE 2023 Task 2, anomaly detection.
 3 | 
 4 | # Set up environment with dependencies
 5 | 1. Load conda env with "conda env create -f environment.yml"
 6 | 2. Activate conda environment with "conda activate dcase2023"
 7 | 3. If on Mac/Linux install sox with "conda install -c conda-forge sox" (a dependency of torchaudio)
 8 | 
 9 | 
10 | # Prepare data
11 | By default, when using source/data_prep.py, full length audio files are segmented into fixed length samples of 1 second (length is configurable). Mel spectrograms are computed for each segmented sample. The segmented audio samples and mel spectrograms are saved.
12 | 
13 | 1. Set up a folder "data/raw" in root directory of this repository
14 | 2. Download the ZIP folders for each machine in the Task 2 train set: https://zenodo.org/record/7690157#.ZC7sl3tBzMY
15 | 3. Extract the ZIP folders into "data/raw" ("data" folder has to be on same level as "output" and "source" folders)
16 | 4. Remove one folder level for each machine such that the directory has the following structure:
17 |    "raw_path/machine_name/train/...", for example /raw_path/gearbox/train/
18 | 5. Run "python source/data_prep.py" to chop samples in "data/raw" into 1 second snippets and save to "data/processed/audio_segments" and spectrograms to "data/processed/spectrograms"
19 |    The resulting files will have following namings: f"index={index}__segment_id={segment_id}__domain={domain}__label={label}"
20 |    index: a number indicating the original audio file index (row index in the sorted list of raw audio files) belonged to
21 |    segment_id is a number from 0 to the number of audio segments produced from one audio file 
22 |    label is "normal" or "anomaly" (only relevant for test data)
23 |    domain is "source" or "target"
24 | 
25 | # How to train a model
26 | 1. Set desired config parameters in "source/config.py"
27 | 2. Run "python source/main.py" to start training and testing
28 |    main.py will train a model for each machine type and produce evaluation results: Two files in each results/machine/ with the anomaly scores and the prediction
29 |    The results follow the official submission format:
30 |    "anomaly_score_<machine_type>_section_<section_index>.csv" with rows: "[filename (string)],[anomaly score (real value)]"
31 |    "decision_result_<machine_type>_section_<section_index>.csv" with rows "[filename (string)],[decision result (0: normal, 1: anomaly)]"
32 | Every 5 epochs accurracy (which is not really meaningful right now) as well aus auc is tracked with weight and biases as well as in the end all metrics are computed and saved to results/metrics.csv
33 | 
34 | # TBD in the future
35 | 1. Find reasonable choices for the anomaly score detection threshold (for each machine). This will also be model dependent!
36 |    For now this is set to a fixed value in the configs DETECTION_TRESHOLD_DICT
37 | 2. If the full evaluation pipeline is done, we can start comparing to the baselines/previous years scores and improve the model
38 | 
39 | 


--------------------------------------------------------------------------------
/data/.gitkeep:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore


--------------------------------------------------------------------------------
/data/raw/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kinggongzilla/DCASE2023_Task2/a00a325d37f287cc66550599ff9fe1d9ce273062/data/raw/.gitkeep


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: dcase2023
 2 | channels:
 3 |   - conda-forge
 4 |   - defaults
 5 |   - pytorch
 6 | dependencies:
 7 |   - jupyter
 8 |   - torchaudio
 9 |   - pysoundfile
10 |   - pydub
11 |   - ffmpeg
12 | prefix: C:\Users\hause\miniconda3\envs\dcase2023
13 | 


--------------------------------------------------------------------------------
/output/models/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kinggongzilla/DCASE2023_Task2/a00a325d37f287cc66550599ff9fe1d9ce273062/output/models/.gitkeep


--------------------------------------------------------------------------------
/source/config.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | import os
 4 | 
 5 | # GENERAL SETTINGS
 6 | IS_NORMAL = 0
 7 | IS_ANOMALY = 1
 8 | IS_SOURCE = 0
 9 | IS_TARGET = 1
10 | 
11 | # MODEL SETTINGS
12 | BATCH_SIZE = 64
13 | SAMPLE_RATE = 22500  # 22.5kHz
14 | LEARNING_RATE = 1e-4
15 | EPOCHS = 10
16 | MODEL_PATH = 'models'
17 | RESULTS_PATH = 'results'
18 | 
19 | DETECTION_TRESHOLD_DICT = dict(
20 |     bearing=0.001,
21 |     fan=0.001,
22 |     gearbox=0.001,
23 |     slider=0.001,
24 |     ToyCar=0.001,
25 |     ToyTrain=0.001,
26 |     valve=0.001,
27 | )
28 | 
29 | # if reconstruction loss is higher than this --> consider sample an anomaly
30 | 
31 | # DATA PREP SETTINGS
32 | SAMPLE_LENGTH_SECONDS = 1
33 | WINDOW_LENGTH = 1024
34 | HOP_LENGTH = 256
35 | N_FFT = 1024
36 | N_MELS = 80
37 | FMIN = 20.0
38 | FMAX = SAMPLE_RATE / 2
39 | POWER = 1.0
40 | NORMALIZED = True
41 | PARENT_PATH = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
42 | DATA_PATH = os.path.join(PARENT_PATH, 'data')
43 | RAW_PATH = os.path.join(DATA_PATH, 'raw')
44 | PROCESSED_PATH = os.path.join(DATA_PATH, 'processed')
45 | SPECTROGRAMS_PATH = os.path.join(PROCESSED_PATH, 'spectrograms')
46 | AUDIO_SEGMENTS_PATH = os.path.join(PROCESSED_PATH, 'audio_segments')
47 | RESULT_PATH = 'results'


--------------------------------------------------------------------------------
/source/data_prep.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from pydub import AudioSegment
 4 | from config import SAMPLE_RATE, SAMPLE_LENGTH_SECONDS, WINDOW_LENGTH, HOP_LENGTH, N_FFT, N_MELS, FMIN, FMAX, POWER, NORMALIZED, RAW_PATH, AUDIO_SEGMENTS_PATH, SPECTROGRAMS_PATH
 5 | import torchaudio
 6 | import numpy as np
 7 | import torch
 8 | from tqdm import tqdm
 9 | #Note: ffmpeg package required for pydub
10 | 
11 | def transform_to_spectrogram(in_file_path, out_dir):
12 |     if not os.path.exists(out_dir):
13 |         raise f'out_dir {out_dir} does not exist'
14 |     if not os.path.isfile(in_file_path):
15 |         raise f'given in_path {in_file_path} is not a file'
16 | 
17 |     audio = torchaudio.load(in_file_path)[0]
18 |     mel_spectrogram = torchaudio.transforms.MelSpectrogram(
19 |         sample_rate=SAMPLE_RATE,
20 |         win_length=WINDOW_LENGTH,
21 |         hop_length=HOP_LENGTH,
22 |         n_fft=N_FFT,
23 |         f_min=FMIN,
24 |         f_max=FMAX,
25 |         n_mels=N_MELS,
26 |         power=POWER,
27 |         normalized=NORMALIZED)(audio)
28 | 
29 |     mel_spectrogram = 20 * torch.log10(torch.clamp(mel_spectrogram, min=1e-5)) - 20
30 |     mel_spectrogram = torch.clamp((mel_spectrogram + 100) / 100, 0.0, 1.0)
31 |     file_name = os.path.basename(in_file_path)
32 |     np.save(os.path.join(out_dir, f'{file_name[:-4]}.spec.npy'), mel_spectrogram.cpu().numpy())
33 | 
34 | 
35 | #load data of one wav and split it into chunks
36 | def chop_wav(index, domain, label, sample_length, in_file_path: str, out_dir: str):
37 | 
38 |     #check if out_dir exists
39 |     if not os.path.exists(out_dir):
40 |         raise 'out_dir does not exist'
41 |     if not os.path.isfile(in_file_path):
42 |         raise 'given in_file_path is not a file'
43 | 
44 |     # load audio
45 |     if in_file_path.endswith('.wav'):
46 |         audio = AudioSegment.from_wav(in_file_path)
47 |         file_ending = '.wav'
48 |     elif in_file_path.endswith('.mp3'):
49 |         audio = AudioSegment.from_mp3(in_file_path)
50 |         file_ending = '.mp3'
51 |     else:
52 |         raise 'wav_path must be a .wav or .mp3 file'
53 | 
54 |     start = 0
55 |     end = sample_length
56 |     n_iters = int(len(audio)) // (SAMPLE_LENGTH_SECONDS * 1000)
57 | 
58 |     for segment_id in range(n_iters):
59 |         newAudio = audio[start:end]
60 |         out_file_name = f"index={index}__segment_id={segment_id}__domain={domain}__label={label}{file_ending}"
61 |         newAudio.export(os.path.join(out_dir, out_file_name), format="wav")
62 |         start += sample_length
63 |         end += sample_length
64 | 
65 | if __name__ == '__main__':
66 | 
67 |     sample_length = SAMPLE_LENGTH_SECONDS * 1000 #milliseconds
68 | 
69 |     # create segments
70 |     for machine_name in tqdm(os.listdir(RAW_PATH)):
71 |         if machine_name == '.gitkeep':
72 |             continue
73 |         for set_name in ["train", "test"]:
74 |             for index, file_name in enumerate(os.listdir(os.path.join(RAW_PATH, machine_name, set_name))):
75 |                 file_path = os.path.join(RAW_PATH, machine_name, set_name, file_name)
76 |                 domain = "source" if "source" in file_name else "target"
77 |                 label = "normal" if "normal" in file_name else "anomaly"
78 |                 out_dir = os.path.join(AUDIO_SEGMENTS_PATH, machine_name, set_name)
79 |                 os.makedirs(out_dir, exist_ok=True)
80 |                 chop_wav(index, domain, label, sample_length, in_file_path=file_path, out_dir=out_dir)
81 | 
82 |     # create spectograms
83 |     for machine_name in tqdm(os.listdir(RAW_PATH)):
84 |         #ignore .gitkeep file
85 |         if machine_name == '.gitkeep':
86 |             continue
87 |         for set_name in ["train", "test"]:
88 |             for file_name in os.listdir(os.path.join(AUDIO_SEGMENTS_PATH, machine_name, set_name)):
89 |                 file_path = os.path.join(AUDIO_SEGMENTS_PATH, machine_name, set_name, file_name)
90 |                 out_dir = os.path.join(SPECTROGRAMS_PATH, machine_name, set_name)
91 |                 os.makedirs(out_dir, exist_ok=True)
92 |                 transform_to_spectrogram(in_file_path=file_path, out_dir=out_dir)
93 | 


--------------------------------------------------------------------------------
/source/dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import numpy as np
  4 | import torch
  5 | import torch.nn.functional as F
  6 | from torch.utils.data import Dataset
  7 | import torchaudio
  8 | import re
  9 | from config import SAMPLE_RATE, RAW_PATH, SPECTROGRAMS_PATH, AUDIO_SEGMENTS_PATH, IS_NORMAL, IS_ANOMALY, IS_SOURCE, IS_TARGET
 10 | 
 11 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 12 | 
 13 | 
 14 | def parse_filename(filename):
 15 |     """
 16 |     Parse a filename of the format 'index=0__segment_id=0__domain=source__label=anomaly.wav.spec'
 17 |     and return a tuple containing the values of 'index', 'segment_id', 'domain', and 'label'.
 18 |     """
 19 |     # Split the filename into its components using the '__' separator
 20 |     components = filename.split('__')
 21 | 
 22 |     # Extract the values of 'index', 'segment_id', 'domain', and 'label' from the components
 23 |     index = int(components[0].split('=')[1])
 24 |     segment_id = int(components[1].split('=')[1])
 25 |     domain = components[2].split('=')[1]
 26 |     label = components[3].split('=')[1].split('.')[0]
 27 | 
 28 |     # Return the values as a tuple
 29 |     return index, segment_id, domain, label
 30 | 
 31 | 
 32 | def get_data(audio_segment_file_path, spectrogram_file_path):
 33 | 
 34 |     audio_segment_file_name = os.path.basename(audio_segment_file_path)
 35 |     spectrogram_file_name = os.path.basename(spectrogram_file_path)
 36 | 
 37 |     index, segment_id, domain, label = parse_filename(spectrogram_file_name)
 38 | 
 39 |     if (index, segment_id, domain, label) != parse_filename(audio_segment_file_name):
 40 |         raise AttributeError("The dataset is inconsistent!")
 41 | 
 42 |     waveform, sample_rate = torchaudio.load(audio_segment_file_path)
 43 |     spectrogram = torch.from_numpy(np.load(spectrogram_file_path))
 44 | 
 45 |     spectrogram = spectrogram[0:1, :, :] #get single channel spectrogram slicing [0:1] to preserve dimensions
 46 |     #resample waveform if sample rate is higher than SAMPLE_RATE from config.py
 47 |     if sample_rate > SAMPLE_RATE:
 48 |         waveform = torchaudio.functional.resample(waveform, orig_freq=sample_rate, new_freq=SAMPLE_RATE)
 49 |     waveform = waveform[0:1, :] #get single channel waveform from waveform with two channels; slicing [0:1] to preserve dimensions
 50 | 
 51 |     domain = IS_SOURCE if "source" in audio_segment_file_name else IS_TARGET
 52 |     label = IS_NORMAL if "normal" in audio_segment_file_name else IS_ANOMALY
 53 | 
 54 |     return spectrogram, waveform, index, domain, label
 55 | 
 56 | 
 57 | class MachineTrainDataset(Dataset):
 58 | 
 59 |     def __init__(self, machine_name) -> None:
 60 | 
 61 |         self.spectrograms_folder_path = os.path.join(SPECTROGRAMS_PATH, machine_name, "train")
 62 |         self.audio_segments_folder_path = os.path.join(AUDIO_SEGMENTS_PATH, machine_name, "train")
 63 |         self.spectrograms_file_names = sorted(os.listdir(self.spectrograms_folder_path))
 64 |         self.audio_segments_file_names = sorted(os.listdir(self.audio_segments_folder_path))
 65 |         self.length = len(self.spectrograms_file_names)
 66 | 
 67 |     def __len__(self):
 68 |         return self.length
 69 | 
 70 |     def __getitem__(self, index):
 71 |         audio_segment_file_name = self.audio_segments_file_names[index]
 72 |         spectrogram_file_name = self.spectrograms_file_names[index]
 73 |         audio_segment_file_path = os.path.join(self.audio_segments_folder_path, audio_segment_file_name)
 74 |         spectrogram_file_path = os.path.join(self.spectrograms_folder_path, spectrogram_file_name)
 75 |         return get_data(audio_segment_file_path, spectrogram_file_path)
 76 | 
 77 | 
 78 | class MachineTestLoader:
 79 |     """
 80 |     Creates a batch for each original audio file
 81 |     """
 82 |     def __init__(self, machine_name):
 83 |         self.index = 0
 84 |         self.machine_name = machine_name
 85 |         self.length = len(os.listdir(os.path.join(RAW_PATH, self.machine_name, "test")))
 86 |         self.spectrograms_folder_path = os.path.join(SPECTROGRAMS_PATH, machine_name, "test")
 87 |         self.audio_segments_folder_path = os.path.join(AUDIO_SEGMENTS_PATH, machine_name, "test")
 88 |         self.spectrograms_file_names = sorted(os.listdir(self.spectrograms_folder_path))
 89 |         self.audio_segments_file_names = sorted(os.listdir(self.audio_segments_folder_path))
 90 | 
 91 |     @property
 92 |     def index_file_names(self):
 93 |         """
 94 |         Get all filenames in the 'in_folder_path' that have the specified 'index'.
 95 |         """
 96 |         # Get a list of all filenames in the folder
 97 |         index_spectrograms_file_names = sorted([filename for filename in self.spectrograms_file_names if f"index={self.index}__" in filename])
 98 |         index_audio_segments_file_names = sorted([filename for filename in self.audio_segments_file_names if f"index={self.index}__" in filename])
 99 | 
100 |         # Return the filtered filenames
101 |         return index_spectrograms_file_names, index_audio_segments_file_names
102 | 
103 |     def __len__(self):
104 |         return self.length
105 | 
106 |     def __iter__(self):
107 |         return self
108 | 
109 |     def __next__(self):
110 | 
111 |         index_spectrograms_file_names, index_audio_segments_file_names = self.index_file_names
112 | 
113 |         if not index_spectrograms_file_names:
114 |             raise StopIteration
115 | 
116 |         data = dict(
117 |             spectrograms=[],
118 |             waveforms=[],
119 |             indices=[],
120 |             domains=[],
121 |             labels=[]
122 |         )
123 | 
124 |         for spectrogram_file_name, audio_segment_file_name in zip(index_spectrograms_file_names, index_audio_segments_file_names):
125 |             audio_segment_file_path = os.path.join(self.audio_segments_folder_path, audio_segment_file_name)
126 |             spectrogram_file_path = os.path.join(self.spectrograms_folder_path, spectrogram_file_name)
127 |             spectrogram, waveform, index, domain, label = get_data(audio_segment_file_path, spectrogram_file_path)
128 |             data["spectrograms"].append(spectrogram)
129 |             data["waveforms"].append(waveform)
130 |             data["indices"].append(index)
131 |             data["domains"].append(domain)
132 |             data["labels"].append(label)
133 | 
134 |         # Convert the lists of numerical input features to tensors
135 |         data["spectrograms"] = torch.unsqueeze(torch.cat(data["spectrograms"], dim=0), 1)
136 |         data["waveforms"] = torch.unsqueeze(torch.cat(data["waveforms"], dim=0), 1)
137 | 
138 |         self.index += 1
139 | 
140 |         return data["spectrograms"], data["waveforms"], data["indices"], data["domains"], data["labels"]
141 | 
142 | 
143 | 


--------------------------------------------------------------------------------
/source/main.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import torch
 4 | import numpy as np
 5 | from torch.utils.data import Dataset
 6 | import wandb
 7 | from model import CNNAutoencoder
 8 | from dataset import MachineTrainDataset, MachineTestLoader
 9 | from train import train
10 | from test import test
11 | from config import BATCH_SIZE, LEARNING_RATE, RAW_PATH, RESULT_PATH, EPOCHS
12 | from metrics import metrics_data, overall_score
13 | 
14 | #start with empty cache
15 | torch.cuda.empty_cache()
16 | 
17 | 
18 | #initialize wandb
19 | wandb.init(
20 |     project="dcase2023_task2", 
21 |     entity="dcasetask2",
22 |     config = {
23 |     "learning_rate": LEARNING_RATE,
24 |     "epochs": EPOCHS,
25 |     "batch_size": BATCH_SIZE,
26 |     }
27 | )
28 | 
29 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
30 | print(f'Using device: {device}\n\n')
31 | 
32 | for machine_name in os.listdir(RAW_PATH):
33 |     if machine_name == '.gitkeep':
34 |         continue
35 | 
36 |     print(f"Machine: {machine_name}\n")
37 | 
38 |     train_set = MachineTrainDataset(machine_name)
39 |     train_loader = torch.utils.data.DataLoader(
40 |         train_set,
41 |         batch_size=BATCH_SIZE,
42 |         shuffle=True,
43 |     )
44 | 
45 |     test_loader = MachineTestLoader(machine_name)
46 | 
47 |     model = CNNAutoencoder()
48 | 
49 |     model_parameters = filter(lambda p: p.requires_grad, model.parameters())
50 |     params = sum([np.prod(p.size()) for p in model_parameters])
51 |     print(f'Total number of parameters: {params}\n') #print number of parameters
52 | 
53 |     optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
54 | 
55 |     print("\nTraining\n")
56 |     train(model, optimizer, train_loader, test_loader, machine_name)
57 |     print("\nTesting\n")
58 |     test(model, test_loader, machine_name)
59 |     print("\n\n")
60 | 
61 | print(f"overall_score:{overall_score(RESULT_PATH)}")
62 | df = metrics_data(RESULT_PATH)
63 | df.to_csv('results/metrics.csv')
64 | 
65 | 


--------------------------------------------------------------------------------
/source/metrics.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | import os
  4 | import sys
  5 | from statistics import harmonic_mean
  6 | from sklearn.metrics import roc_auc_score, confusion_matrix, accuracy_score
  7 | 
  8 | 
  9 | def metrics(anomaly_score_path, decison_result_path):
 10 |     """
 11 |     input:
 12 |     anomaly_score_path: path to a csv file containing the filnames in the first and the anomaly score in the second column
 13 |     decision_result_path: path to a csv file containing the filnames in the first and the anomaly decisions in the second column
 14 |     assumption currently: filename contains 'anomaly' for anomalous sample
 15 |     returns:
 16 |     accuracy, auc, p_auc with p=0.1, precision, recall, f1
 17 |     """
 18 |     anomaly_df = pd.read_csv(anomaly_score_path)
 19 |     anomaly_df = anomaly_df.reset_index()
 20 |     decision_df = pd.read_csv(decison_result_path)
 21 |     y = np.zeros((len(anomaly_df), 3))
 22 |     for ind in anomaly_df.index:
 23 |         name = anomaly_df.iloc[ind][1]
 24 |         anomaly_score = anomaly_df.iloc[ind][2]
 25 |         y_pred = decision_df.iloc[ind][1]
 26 |         y_true = 0.0
 27 |         if 'anomaly' in name:
 28 |             y_true = 1.0
 29 |         y[ind] = (y_pred, y_true, anomaly_score)
 30 |     y_preds = y[:, 0]
 31 |     y_trues = y[:, 1]
 32 |     anomalies = y[:, 2]
 33 |     accurracy = accuracy_score(y_trues, y_preds)
 34 |     auc = roc_auc_score(y_trues, anomalies)
 35 |     p_auc = roc_auc_score(y_trues, anomalies, max_fpr=0.1)
 36 |     tn_s, fp_s, fn_s, tp_s = confusion_matrix(y_trues, y_preds).ravel()
 37 |     prec = tp_s / np.maximum(tp_s + fp_s, sys.float_info.epsilon)
 38 |     recall = tp_s / np.maximum(tp_s + fn_s, sys.float_info.epsilon)
 39 |     f1 = 2.0 * prec * recall / np.maximum(prec + recall, sys.float_info.epsilon)
 40 |     return accurracy, auc, p_auc, prec, recall, f1
 41 | 
 42 | def metrics_data(resultspath):
 43 |     """
 44 |     input:
 45 |     path to results directory with the following structure:
 46 |     results
 47 |         machinetype name
 48 |             anomaly_score.csv
 49 |             decision_result.csv
 50 |     returns:
 51 |     datframe containing the metrics for each machine type
 52 |     """
 53 |     columns = ["machine", "accuracy", "auc", "p_auc", "precision", "recall", "f1"]
 54 | 
 55 |     df = pd.DataFrame({col: [] for col in columns})
 56 | 
 57 |     for dir_name in os.listdir(resultspath):
 58 |         anomaly_score_path = None
 59 |         decision_result_path = None
 60 |         if os.path.isdir(os.path.join(resultspath, dir_name)):
 61 |             for file in os.listdir(os.path.join(resultspath, dir_name)):
 62 |                 if 'anomaly_score' in file:
 63 |                     anomaly_score_path = os.path.join(os.path.join(resultspath, dir_name), file)
 64 |                 if 'decision_result' in file:
 65 |                     decision_result_path = os.path.join(os.path.join(resultspath, dir_name), file)
 66 |             if anomaly_score_path and decision_result_path is not None:
 67 |                 accuracy, auc, p_auc, prec, recall, f1 = metrics(anomaly_score_path, decision_result_path)
 68 |             else:
 69 |                 raise f'{os.path.join(resultspath, dir_name)} does not contain anomaly_score or decision_results file'
 70 |         df = df.append({"machine": dir_name, "accuracy": accuracy, "auc": auc, "p_auc": p_auc, "precision": prec, "recall": recall, "f1": f1},
 71 |                        ignore_index=True)
 72 |     return df
 73 | 
 74 | 
 75 | 
 76 | def overall_score(resultspath):
 77 |     """
 78 |     input:
 79 |     path to results directory with the following structure:
 80 |     results
 81 |         machinetype name
 82 |             anomaly_score.csv
 83 |             decision_result.csv
 84 |     returns:
 85 |     harmonic mean of auc and pauc scores over all machine types
 86 |     """
 87 |     pauc_auc_list = []
 88 |     for dir_name in os.listdir(resultspath):
 89 |         anomaly_score_path = None
 90 |         decision_result_path = None
 91 |         if os.path.isdir(os.path.join(resultspath, dir_name)):
 92 |             for file in os.listdir(os.path.join(resultspath, dir_name)):
 93 |                 if 'anomaly_score' in file:
 94 |                     anomaly_score_path = os.path.join(os.path.join(resultspath, dir_name), file)
 95 |                 if 'decision_result' in file:
 96 |                     decision_result_path = os.path.join(os.path.join(resultspath, dir_name), file)
 97 |             if anomaly_score_path and decision_result_path is not None:
 98 |                 accuracy, auc, p_auc, prec, recall, f1 = metrics(anomaly_score_path, decision_result_path)
 99 |             else:
100 |                 raise f'{os.path.join(resultspath, dir_name)} does not contain anomaly_score or decision_results file'
101 |         pauc_auc_list.append(auc)
102 |         pauc_auc_list.append(p_auc)
103 |     return harmonic_mean(pauc_auc_list)
104 | 
105 | 
106 | 
107 | 
108 | if __name__ == '__main__':
109 |     ANOMALY_SCORE_PATH = 'results/bearing/anomaly_score_bearing_section_0.csv'
110 |     DECISION_RESULT_PATH = 'results/bearing/decision_result_bearing_section_0.csv'
111 |     RESULT_PATH = 'results'
112 |     accuracy, auc, p_auc, prec, recall, f1 = metrics(ANOMALY_SCORE_PATH, DECISION_RESULT_PATH)
113 |     overall = overall_score(RESULT_PATH)
114 |     df = metrics_data(RESULT_PATH)
115 | 


--------------------------------------------------------------------------------
/source/model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | class CNNAutoencoder(nn.Module):
 5 |     def __init__(self):
 6 |         super(CNNAutoencoder, self).__init__()
 7 | 
 8 |         #input size: (1, 80, 63), 1 channel, 80x63 pixels
 9 |         
10 |         # Encoder
11 |         self.conv1 = nn.Conv2d(1, 16, kernel_size=3)
12 |         self.bn1 = nn.BatchNorm2d(16)
13 |         self.conv2 = nn.Conv2d(16, 16, kernel_size=3)
14 |         self.bn2 = nn.BatchNorm2d(16)
15 |         self.conv3 = nn.Conv2d(16, 8, kernel_size=3)
16 |         self.bn3 = nn.BatchNorm2d(8)
17 |         self.conv4 = nn.Conv2d(8, 4, kernel_size=3)
18 |         self.bn4 = nn.BatchNorm2d(4)
19 |         self.conv5 = nn.Conv2d(4, 4, kernel_size=3)
20 |         self.bn5 = nn.BatchNorm2d(4)
21 |         
22 |         #compressed latent space size: ???
23 | 
24 |         # Decoder
25 |         self.t_conv1 = nn.ConvTranspose2d(4, 4, kernel_size=3)
26 |         self.t_bn1 = nn.BatchNorm2d(4)
27 |         self.t_conv2 = nn.ConvTranspose2d(4, 8, kernel_size=3)
28 |         self.t_bn2 = nn.BatchNorm2d(8)
29 |         self.t_conv3 = nn.ConvTranspose2d(8, 16, kernel_size=3)
30 |         self.t_bn3 = nn.BatchNorm2d(16)
31 |         self.t_conv4 = nn.ConvTranspose2d(16, 16, kernel_size=3)
32 |         self.t_bn4 = nn.BatchNorm2d(16)
33 |         self.t_conv5 = nn.ConvTranspose2d(16, 1, kernel_size=3)
34 | 
35 |         #output size: (1, 80, 63), 1 channel, 80x63 pixels
36 | 
37 | 
38 |     def forward(self, x):
39 |         # Encoder
40 |         x = torch.relu(self.bn1(self.conv1(x)))
41 |         x = torch.relu(self.bn2(self.conv2(x)))
42 |         x = torch.relu(self.bn3(self.conv3(x)))
43 |         x = torch.relu(self.bn4(self.conv4(x)))
44 |         x = torch.relu(self.bn5(self.conv5(x)))
45 |         
46 |         # Decoder
47 |         x = torch.relu(self.t_bn1(self.t_conv1(x)))
48 |         x = torch.relu(self.t_bn2(self.t_conv2(x)))
49 |         x = torch.relu(self.t_bn3(self.t_conv3(x)))
50 |         x = torch.relu(self.t_bn4(self.t_conv4(x)))
51 |         x = torch.sigmoid(self.t_conv5(x))
52 |         
53 |         return x


--------------------------------------------------------------------------------
/source/test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import csv
 3 | import torch
 4 | import numpy as np
 5 | import wandb
 6 | from tqdm import tqdm
 7 | from config import RESULTS_PATH, RAW_PATH, DETECTION_TRESHOLD_DICT, IS_ANOMALY, IS_NORMAL
 8 | from metrics import metrics
 9 | 
10 | def test(model, test_loader, machine_name):
11 | 
12 |     os.makedirs(os.path.join(RESULTS_PATH, machine_name), exist_ok=True)
13 | 
14 |     loss_func = torch.nn.MSELoss()
15 | 
16 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
17 | 
18 |     model.eval()
19 |     model.to(device)
20 | 
21 |     # Get a sorted list of file names in the relevant directory
22 |     file_names = sorted(os.listdir(os.path.join(RAW_PATH, machine_name, "test")))
23 |     anomaly_score_path = os.path.join(RESULTS_PATH, machine_name, f'anomaly_score_{machine_name}_section_{0}.csv')
24 |     decision_result_path = os.path.join(RESULTS_PATH, machine_name, f'decision_result_{machine_name}_section_{0}.csv')
25 | 
26 |     with open(anomaly_score_path, 'w', newline='\n') as _:
27 |         pass
28 |     with open(decision_result_path, 'w', newline='\n') as _:
29 |         pass
30 | 
31 |     for (spectrograms, waveforms, indices, domains, labels) in tqdm(test_loader):
32 | 
33 |         # batch holds all segments of one test sample
34 |         # indices, domains, labels hold the same values for the test batches.
35 |         index = indices[0]
36 |         domain = domains[0]
37 |         label = labels[0]
38 | 
39 |         x = spectrograms.to(device)
40 |         y = model.forward(x)
41 | 
42 |         anomaly_score = loss_func(y, x).view(-1).sum().item()/len(indices)
43 | 
44 |         if anomaly_score > DETECTION_TRESHOLD_DICT[machine_name]:
45 |             prediction = IS_ANOMALY
46 |         else:
47 |             prediction = IS_NORMAL
48 | 
49 |         # Get the filename for the current iteration
50 |         file_name = file_names[index]
51 | 
52 | 
53 |         # Write the anomaly score and prediction to the CSV files
54 |         with open(anomaly_score_path, 'a', newline='\n') as f:
55 |             writer = csv.writer(f)
56 |             writer.writerow([file_name, anomaly_score])
57 | 
58 |         with open(decision_result_path, 'a', newline='\n') as f:
59 |             writer = csv.writer(f)
60 |             writer.writerow([file_name, prediction])
61 |     try:
62 |         accurracy, auc, p_auc, prec, recall, f1 = metrics(anomaly_score_path, decision_result_path)
63 | 
64 |         wandb.log({f"{machine_name}_auc": auc})
65 |         wandb.log({f"{machine_name}_accurracy": accurracy})
66 |     except:
67 |         print("logging was not possible")
68 | 
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/source/train.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | from tqdm import tqdm
 4 | from config import EPOCHS, MODEL_PATH
 5 | import os
 6 | import wandb
 7 | from test import test
 8 | from metrics import metrics
 9 | 
10 | 
11 | def train(model, optimizer, train_loader, test_loader, machine_name):
12 | 
13 |     save_path = os.path.join(MODEL_PATH, machine_name)
14 |     os.makedirs(save_path, exist_ok=True)
15 |     save_path = os.path.join(save_path, "model.pt")
16 | 
17 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
18 | 
19 |     model.train()
20 |     model.to(device)
21 | 
22 |     loss_func = torch.nn.MSELoss()
23 | 
24 |     step_count = 0
25 |     best_loss = 999999999999
26 | 
27 |     for epoch in range(EPOCHS):
28 | 
29 |         epoch_loss = 0
30 | 
31 |         for spectrograms, waveforms, indices, domains, labels in tqdm(train_loader):
32 | 
33 |             step_count += 1
34 |             optimizer.zero_grad()
35 | 
36 |             x = spectrograms.to(device)
37 |             y = model.forward(x)
38 | 
39 |             #calculate loss, barward pass and optimizer step
40 |             batch_loss = loss_func(y, x)
41 |             batch_loss.backward()
42 |             optimizer.step()
43 |             epoch_loss += float(batch_loss.item())
44 | 
45 |         # normalize epoch_loss by total number of samples
46 |         epoch_loss = epoch_loss/len(train_loader)
47 |         wandb.log({f"{machine_name}_epoch_loss": epoch_loss})
48 | 
49 | 
50 |         #save model if loss is new best loss
51 |         if epoch_loss < best_loss:
52 |             best_loss = epoch_loss
53 |             torch.save(model.state_dict(), save_path)
54 |         print(f'epoch: {epoch} | loss: {epoch_loss}')
55 | 
56 |         #log area under the curve every 10 epochs
57 |         if epoch % 2 == 0:
58 |             model.load_state_dict(torch.load(save_path))
59 |             model.eval()
60 |             model.to(device)
61 |             test(model, test_loader, machine_name)
62 |             model.train()
63 |             
64 | 
65 |     torch.save(model.state_dict(), save_path)
66 | 
67 |     return model
68 | 
69 | 
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------