├── .gitignore ├── README.md ├── data ├── .gitkeep └── raw │ └── .gitkeep ├── environment.yml ├── output └── models │ └── .gitkeep └── source ├── config.py ├── data_prep.py ├── dataset.py ├── main.py ├── metrics.py ├── model.py ├── test.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | #ignore python cache files 2 | source/__pycache__/config.cpython-310.pyc 3 | source/__pycache__/dataset.cpython-310.pyc 4 | source/__pycache__/model.cpython-310.pyc 5 | source/__pycache__/train.cpython-310.pyc 6 | 7 | #ignore processed files 8 | data/processed/* 9 | data/processed/* 10 | 11 | #ignore raw folders 12 | data/raw/* 13 | #ignore model checkpoints 14 | output/models/* 15 | 16 | #ignore results 17 | results/* -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DCASE Task 2 2 | Repo for DCASE 2023 Task 2, anomaly detection. 3 | 4 | # Set up environment with dependencies 5 | 1. Load conda env with "conda env create -f environment.yml" 6 | 2. Activate conda environment with "conda activate dcase2023" 7 | 3. If on Mac/Linux install sox with "conda install -c conda-forge sox" (a dependency of torchaudio) 8 | 9 | 10 | # Prepare data 11 | By default, when using source/data_prep.py, full length audio files are segmented into fixed length samples of 1 second (length is configurable). Mel spectrograms are computed for each segmented sample. The segmented audio samples and mel spectrograms are saved. 12 | 13 | 1. Set up a folder "data/raw" in root directory of this repository 14 | 2. Download the ZIP folders for each machine in the Task 2 train set: https://zenodo.org/record/7690157#.ZC7sl3tBzMY 15 | 3. Extract the ZIP folders into "data/raw" ("data" folder has to be on same level as "output" and "source" folders) 16 | 4. Remove one folder level for each machine such that the directory has the following structure: 17 | "raw_path/machine_name/train/...", for example /raw_path/gearbox/train/ 18 | 5. Run "python source/data_prep.py" to chop samples in "data/raw" into 1 second snippets and save to "data/processed/audio_segments" and spectrograms to "data/processed/spectrograms" 19 | The resulting files will have following namings: f"index={index}__segment_id={segment_id}__domain={domain}__label={label}" 20 | index: a number indicating the original audio file index (row index in the sorted list of raw audio files) belonged to 21 | segment_id is a number from 0 to the number of audio segments produced from one audio file 22 | label is "normal" or "anomaly" (only relevant for test data) 23 | domain is "source" or "target" 24 | 25 | # How to train a model 26 | 1. Set desired config parameters in "source/config.py" 27 | 2. Run "python source/main.py" to start training and testing 28 | main.py will train a model for each machine type and produce evaluation results: Two files in each results/machine/ with the anomaly scores and the prediction 29 | The results follow the official submission format: 30 | "anomaly_score__section_.csv" with rows: "[filename (string)],[anomaly score (real value)]" 31 | "decision_result__section_.csv" with rows "[filename (string)],[decision result (0: normal, 1: anomaly)]" 32 | Every 5 epochs accurracy (which is not really meaningful right now) as well aus auc is tracked with weight and biases as well as in the end all metrics are computed and saved to results/metrics.csv 33 | 34 | # TBD in the future 35 | 1. Find reasonable choices for the anomaly score detection threshold (for each machine). This will also be model dependent! 36 | For now this is set to a fixed value in the configs DETECTION_TRESHOLD_DICT 37 | 2. If the full evaluation pipeline is done, we can start comparing to the baselines/previous years scores and improve the model 38 | 39 | -------------------------------------------------------------------------------- /data/.gitkeep: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /data/raw/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinggongzilla/DCASE2023_Task2/a00a325d37f287cc66550599ff9fe1d9ce273062/data/raw/.gitkeep -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: dcase2023 2 | channels: 3 | - conda-forge 4 | - defaults 5 | - pytorch 6 | dependencies: 7 | - jupyter 8 | - torchaudio 9 | - pysoundfile 10 | - pydub 11 | - ffmpeg 12 | prefix: C:\Users\hause\miniconda3\envs\dcase2023 13 | -------------------------------------------------------------------------------- /output/models/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinggongzilla/DCASE2023_Task2/a00a325d37f287cc66550599ff9fe1d9ce273062/output/models/.gitkeep -------------------------------------------------------------------------------- /source/config.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import os 4 | 5 | # GENERAL SETTINGS 6 | IS_NORMAL = 0 7 | IS_ANOMALY = 1 8 | IS_SOURCE = 0 9 | IS_TARGET = 1 10 | 11 | # MODEL SETTINGS 12 | BATCH_SIZE = 64 13 | SAMPLE_RATE = 22500 # 22.5kHz 14 | LEARNING_RATE = 1e-4 15 | EPOCHS = 10 16 | MODEL_PATH = 'models' 17 | RESULTS_PATH = 'results' 18 | 19 | DETECTION_TRESHOLD_DICT = dict( 20 | bearing=0.001, 21 | fan=0.001, 22 | gearbox=0.001, 23 | slider=0.001, 24 | ToyCar=0.001, 25 | ToyTrain=0.001, 26 | valve=0.001, 27 | ) 28 | 29 | # if reconstruction loss is higher than this --> consider sample an anomaly 30 | 31 | # DATA PREP SETTINGS 32 | SAMPLE_LENGTH_SECONDS = 1 33 | WINDOW_LENGTH = 1024 34 | HOP_LENGTH = 256 35 | N_FFT = 1024 36 | N_MELS = 80 37 | FMIN = 20.0 38 | FMAX = SAMPLE_RATE / 2 39 | POWER = 1.0 40 | NORMALIZED = True 41 | PARENT_PATH = os.path.abspath(os.path.join(os.getcwd(), os.pardir)) 42 | DATA_PATH = os.path.join(PARENT_PATH, 'data') 43 | RAW_PATH = os.path.join(DATA_PATH, 'raw') 44 | PROCESSED_PATH = os.path.join(DATA_PATH, 'processed') 45 | SPECTROGRAMS_PATH = os.path.join(PROCESSED_PATH, 'spectrograms') 46 | AUDIO_SEGMENTS_PATH = os.path.join(PROCESSED_PATH, 'audio_segments') 47 | RESULT_PATH = 'results' -------------------------------------------------------------------------------- /source/data_prep.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from pydub import AudioSegment 4 | from config import SAMPLE_RATE, SAMPLE_LENGTH_SECONDS, WINDOW_LENGTH, HOP_LENGTH, N_FFT, N_MELS, FMIN, FMAX, POWER, NORMALIZED, RAW_PATH, AUDIO_SEGMENTS_PATH, SPECTROGRAMS_PATH 5 | import torchaudio 6 | import numpy as np 7 | import torch 8 | from tqdm import tqdm 9 | #Note: ffmpeg package required for pydub 10 | 11 | def transform_to_spectrogram(in_file_path, out_dir): 12 | if not os.path.exists(out_dir): 13 | raise f'out_dir {out_dir} does not exist' 14 | if not os.path.isfile(in_file_path): 15 | raise f'given in_path {in_file_path} is not a file' 16 | 17 | audio = torchaudio.load(in_file_path)[0] 18 | mel_spectrogram = torchaudio.transforms.MelSpectrogram( 19 | sample_rate=SAMPLE_RATE, 20 | win_length=WINDOW_LENGTH, 21 | hop_length=HOP_LENGTH, 22 | n_fft=N_FFT, 23 | f_min=FMIN, 24 | f_max=FMAX, 25 | n_mels=N_MELS, 26 | power=POWER, 27 | normalized=NORMALIZED)(audio) 28 | 29 | mel_spectrogram = 20 * torch.log10(torch.clamp(mel_spectrogram, min=1e-5)) - 20 30 | mel_spectrogram = torch.clamp((mel_spectrogram + 100) / 100, 0.0, 1.0) 31 | file_name = os.path.basename(in_file_path) 32 | np.save(os.path.join(out_dir, f'{file_name[:-4]}.spec.npy'), mel_spectrogram.cpu().numpy()) 33 | 34 | 35 | #load data of one wav and split it into chunks 36 | def chop_wav(index, domain, label, sample_length, in_file_path: str, out_dir: str): 37 | 38 | #check if out_dir exists 39 | if not os.path.exists(out_dir): 40 | raise 'out_dir does not exist' 41 | if not os.path.isfile(in_file_path): 42 | raise 'given in_file_path is not a file' 43 | 44 | # load audio 45 | if in_file_path.endswith('.wav'): 46 | audio = AudioSegment.from_wav(in_file_path) 47 | file_ending = '.wav' 48 | elif in_file_path.endswith('.mp3'): 49 | audio = AudioSegment.from_mp3(in_file_path) 50 | file_ending = '.mp3' 51 | else: 52 | raise 'wav_path must be a .wav or .mp3 file' 53 | 54 | start = 0 55 | end = sample_length 56 | n_iters = int(len(audio)) // (SAMPLE_LENGTH_SECONDS * 1000) 57 | 58 | for segment_id in range(n_iters): 59 | newAudio = audio[start:end] 60 | out_file_name = f"index={index}__segment_id={segment_id}__domain={domain}__label={label}{file_ending}" 61 | newAudio.export(os.path.join(out_dir, out_file_name), format="wav") 62 | start += sample_length 63 | end += sample_length 64 | 65 | if __name__ == '__main__': 66 | 67 | sample_length = SAMPLE_LENGTH_SECONDS * 1000 #milliseconds 68 | 69 | # create segments 70 | for machine_name in tqdm(os.listdir(RAW_PATH)): 71 | if machine_name == '.gitkeep': 72 | continue 73 | for set_name in ["train", "test"]: 74 | for index, file_name in enumerate(os.listdir(os.path.join(RAW_PATH, machine_name, set_name))): 75 | file_path = os.path.join(RAW_PATH, machine_name, set_name, file_name) 76 | domain = "source" if "source" in file_name else "target" 77 | label = "normal" if "normal" in file_name else "anomaly" 78 | out_dir = os.path.join(AUDIO_SEGMENTS_PATH, machine_name, set_name) 79 | os.makedirs(out_dir, exist_ok=True) 80 | chop_wav(index, domain, label, sample_length, in_file_path=file_path, out_dir=out_dir) 81 | 82 | # create spectograms 83 | for machine_name in tqdm(os.listdir(RAW_PATH)): 84 | #ignore .gitkeep file 85 | if machine_name == '.gitkeep': 86 | continue 87 | for set_name in ["train", "test"]: 88 | for file_name in os.listdir(os.path.join(AUDIO_SEGMENTS_PATH, machine_name, set_name)): 89 | file_path = os.path.join(AUDIO_SEGMENTS_PATH, machine_name, set_name, file_name) 90 | out_dir = os.path.join(SPECTROGRAMS_PATH, machine_name, set_name) 91 | os.makedirs(out_dir, exist_ok=True) 92 | transform_to_spectrogram(in_file_path=file_path, out_dir=out_dir) 93 | -------------------------------------------------------------------------------- /source/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy as np 4 | import torch 5 | import torch.nn.functional as F 6 | from torch.utils.data import Dataset 7 | import torchaudio 8 | import re 9 | from config import SAMPLE_RATE, RAW_PATH, SPECTROGRAMS_PATH, AUDIO_SEGMENTS_PATH, IS_NORMAL, IS_ANOMALY, IS_SOURCE, IS_TARGET 10 | 11 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 12 | 13 | 14 | def parse_filename(filename): 15 | """ 16 | Parse a filename of the format 'index=0__segment_id=0__domain=source__label=anomaly.wav.spec' 17 | and return a tuple containing the values of 'index', 'segment_id', 'domain', and 'label'. 18 | """ 19 | # Split the filename into its components using the '__' separator 20 | components = filename.split('__') 21 | 22 | # Extract the values of 'index', 'segment_id', 'domain', and 'label' from the components 23 | index = int(components[0].split('=')[1]) 24 | segment_id = int(components[1].split('=')[1]) 25 | domain = components[2].split('=')[1] 26 | label = components[3].split('=')[1].split('.')[0] 27 | 28 | # Return the values as a tuple 29 | return index, segment_id, domain, label 30 | 31 | 32 | def get_data(audio_segment_file_path, spectrogram_file_path): 33 | 34 | audio_segment_file_name = os.path.basename(audio_segment_file_path) 35 | spectrogram_file_name = os.path.basename(spectrogram_file_path) 36 | 37 | index, segment_id, domain, label = parse_filename(spectrogram_file_name) 38 | 39 | if (index, segment_id, domain, label) != parse_filename(audio_segment_file_name): 40 | raise AttributeError("The dataset is inconsistent!") 41 | 42 | waveform, sample_rate = torchaudio.load(audio_segment_file_path) 43 | spectrogram = torch.from_numpy(np.load(spectrogram_file_path)) 44 | 45 | spectrogram = spectrogram[0:1, :, :] #get single channel spectrogram slicing [0:1] to preserve dimensions 46 | #resample waveform if sample rate is higher than SAMPLE_RATE from config.py 47 | if sample_rate > SAMPLE_RATE: 48 | waveform = torchaudio.functional.resample(waveform, orig_freq=sample_rate, new_freq=SAMPLE_RATE) 49 | waveform = waveform[0:1, :] #get single channel waveform from waveform with two channels; slicing [0:1] to preserve dimensions 50 | 51 | domain = IS_SOURCE if "source" in audio_segment_file_name else IS_TARGET 52 | label = IS_NORMAL if "normal" in audio_segment_file_name else IS_ANOMALY 53 | 54 | return spectrogram, waveform, index, domain, label 55 | 56 | 57 | class MachineTrainDataset(Dataset): 58 | 59 | def __init__(self, machine_name) -> None: 60 | 61 | self.spectrograms_folder_path = os.path.join(SPECTROGRAMS_PATH, machine_name, "train") 62 | self.audio_segments_folder_path = os.path.join(AUDIO_SEGMENTS_PATH, machine_name, "train") 63 | self.spectrograms_file_names = sorted(os.listdir(self.spectrograms_folder_path)) 64 | self.audio_segments_file_names = sorted(os.listdir(self.audio_segments_folder_path)) 65 | self.length = len(self.spectrograms_file_names) 66 | 67 | def __len__(self): 68 | return self.length 69 | 70 | def __getitem__(self, index): 71 | audio_segment_file_name = self.audio_segments_file_names[index] 72 | spectrogram_file_name = self.spectrograms_file_names[index] 73 | audio_segment_file_path = os.path.join(self.audio_segments_folder_path, audio_segment_file_name) 74 | spectrogram_file_path = os.path.join(self.spectrograms_folder_path, spectrogram_file_name) 75 | return get_data(audio_segment_file_path, spectrogram_file_path) 76 | 77 | 78 | class MachineTestLoader: 79 | """ 80 | Creates a batch for each original audio file 81 | """ 82 | def __init__(self, machine_name): 83 | self.index = 0 84 | self.machine_name = machine_name 85 | self.length = len(os.listdir(os.path.join(RAW_PATH, self.machine_name, "test"))) 86 | self.spectrograms_folder_path = os.path.join(SPECTROGRAMS_PATH, machine_name, "test") 87 | self.audio_segments_folder_path = os.path.join(AUDIO_SEGMENTS_PATH, machine_name, "test") 88 | self.spectrograms_file_names = sorted(os.listdir(self.spectrograms_folder_path)) 89 | self.audio_segments_file_names = sorted(os.listdir(self.audio_segments_folder_path)) 90 | 91 | @property 92 | def index_file_names(self): 93 | """ 94 | Get all filenames in the 'in_folder_path' that have the specified 'index'. 95 | """ 96 | # Get a list of all filenames in the folder 97 | index_spectrograms_file_names = sorted([filename for filename in self.spectrograms_file_names if f"index={self.index}__" in filename]) 98 | index_audio_segments_file_names = sorted([filename for filename in self.audio_segments_file_names if f"index={self.index}__" in filename]) 99 | 100 | # Return the filtered filenames 101 | return index_spectrograms_file_names, index_audio_segments_file_names 102 | 103 | def __len__(self): 104 | return self.length 105 | 106 | def __iter__(self): 107 | return self 108 | 109 | def __next__(self): 110 | 111 | index_spectrograms_file_names, index_audio_segments_file_names = self.index_file_names 112 | 113 | if not index_spectrograms_file_names: 114 | raise StopIteration 115 | 116 | data = dict( 117 | spectrograms=[], 118 | waveforms=[], 119 | indices=[], 120 | domains=[], 121 | labels=[] 122 | ) 123 | 124 | for spectrogram_file_name, audio_segment_file_name in zip(index_spectrograms_file_names, index_audio_segments_file_names): 125 | audio_segment_file_path = os.path.join(self.audio_segments_folder_path, audio_segment_file_name) 126 | spectrogram_file_path = os.path.join(self.spectrograms_folder_path, spectrogram_file_name) 127 | spectrogram, waveform, index, domain, label = get_data(audio_segment_file_path, spectrogram_file_path) 128 | data["spectrograms"].append(spectrogram) 129 | data["waveforms"].append(waveform) 130 | data["indices"].append(index) 131 | data["domains"].append(domain) 132 | data["labels"].append(label) 133 | 134 | # Convert the lists of numerical input features to tensors 135 | data["spectrograms"] = torch.unsqueeze(torch.cat(data["spectrograms"], dim=0), 1) 136 | data["waveforms"] = torch.unsqueeze(torch.cat(data["waveforms"], dim=0), 1) 137 | 138 | self.index += 1 139 | 140 | return data["spectrograms"], data["waveforms"], data["indices"], data["domains"], data["labels"] 141 | 142 | 143 | -------------------------------------------------------------------------------- /source/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import torch 4 | import numpy as np 5 | from torch.utils.data import Dataset 6 | import wandb 7 | from model import CNNAutoencoder 8 | from dataset import MachineTrainDataset, MachineTestLoader 9 | from train import train 10 | from test import test 11 | from config import BATCH_SIZE, LEARNING_RATE, RAW_PATH, RESULT_PATH, EPOCHS 12 | from metrics import metrics_data, overall_score 13 | 14 | #start with empty cache 15 | torch.cuda.empty_cache() 16 | 17 | 18 | #initialize wandb 19 | wandb.init( 20 | project="dcase2023_task2", 21 | entity="dcasetask2", 22 | config = { 23 | "learning_rate": LEARNING_RATE, 24 | "epochs": EPOCHS, 25 | "batch_size": BATCH_SIZE, 26 | } 27 | ) 28 | 29 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 30 | print(f'Using device: {device}\n\n') 31 | 32 | for machine_name in os.listdir(RAW_PATH): 33 | if machine_name == '.gitkeep': 34 | continue 35 | 36 | print(f"Machine: {machine_name}\n") 37 | 38 | train_set = MachineTrainDataset(machine_name) 39 | train_loader = torch.utils.data.DataLoader( 40 | train_set, 41 | batch_size=BATCH_SIZE, 42 | shuffle=True, 43 | ) 44 | 45 | test_loader = MachineTestLoader(machine_name) 46 | 47 | model = CNNAutoencoder() 48 | 49 | model_parameters = filter(lambda p: p.requires_grad, model.parameters()) 50 | params = sum([np.prod(p.size()) for p in model_parameters]) 51 | print(f'Total number of parameters: {params}\n') #print number of parameters 52 | 53 | optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) 54 | 55 | print("\nTraining\n") 56 | train(model, optimizer, train_loader, test_loader, machine_name) 57 | print("\nTesting\n") 58 | test(model, test_loader, machine_name) 59 | print("\n\n") 60 | 61 | print(f"overall_score:{overall_score(RESULT_PATH)}") 62 | df = metrics_data(RESULT_PATH) 63 | df.to_csv('results/metrics.csv') 64 | 65 | -------------------------------------------------------------------------------- /source/metrics.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import os 4 | import sys 5 | from statistics import harmonic_mean 6 | from sklearn.metrics import roc_auc_score, confusion_matrix, accuracy_score 7 | 8 | 9 | def metrics(anomaly_score_path, decison_result_path): 10 | """ 11 | input: 12 | anomaly_score_path: path to a csv file containing the filnames in the first and the anomaly score in the second column 13 | decision_result_path: path to a csv file containing the filnames in the first and the anomaly decisions in the second column 14 | assumption currently: filename contains 'anomaly' for anomalous sample 15 | returns: 16 | accuracy, auc, p_auc with p=0.1, precision, recall, f1 17 | """ 18 | anomaly_df = pd.read_csv(anomaly_score_path) 19 | anomaly_df = anomaly_df.reset_index() 20 | decision_df = pd.read_csv(decison_result_path) 21 | y = np.zeros((len(anomaly_df), 3)) 22 | for ind in anomaly_df.index: 23 | name = anomaly_df.iloc[ind][1] 24 | anomaly_score = anomaly_df.iloc[ind][2] 25 | y_pred = decision_df.iloc[ind][1] 26 | y_true = 0.0 27 | if 'anomaly' in name: 28 | y_true = 1.0 29 | y[ind] = (y_pred, y_true, anomaly_score) 30 | y_preds = y[:, 0] 31 | y_trues = y[:, 1] 32 | anomalies = y[:, 2] 33 | accurracy = accuracy_score(y_trues, y_preds) 34 | auc = roc_auc_score(y_trues, anomalies) 35 | p_auc = roc_auc_score(y_trues, anomalies, max_fpr=0.1) 36 | tn_s, fp_s, fn_s, tp_s = confusion_matrix(y_trues, y_preds).ravel() 37 | prec = tp_s / np.maximum(tp_s + fp_s, sys.float_info.epsilon) 38 | recall = tp_s / np.maximum(tp_s + fn_s, sys.float_info.epsilon) 39 | f1 = 2.0 * prec * recall / np.maximum(prec + recall, sys.float_info.epsilon) 40 | return accurracy, auc, p_auc, prec, recall, f1 41 | 42 | def metrics_data(resultspath): 43 | """ 44 | input: 45 | path to results directory with the following structure: 46 | results 47 | machinetype name 48 | anomaly_score.csv 49 | decision_result.csv 50 | returns: 51 | datframe containing the metrics for each machine type 52 | """ 53 | columns = ["machine", "accuracy", "auc", "p_auc", "precision", "recall", "f1"] 54 | 55 | df = pd.DataFrame({col: [] for col in columns}) 56 | 57 | for dir_name in os.listdir(resultspath): 58 | anomaly_score_path = None 59 | decision_result_path = None 60 | if os.path.isdir(os.path.join(resultspath, dir_name)): 61 | for file in os.listdir(os.path.join(resultspath, dir_name)): 62 | if 'anomaly_score' in file: 63 | anomaly_score_path = os.path.join(os.path.join(resultspath, dir_name), file) 64 | if 'decision_result' in file: 65 | decision_result_path = os.path.join(os.path.join(resultspath, dir_name), file) 66 | if anomaly_score_path and decision_result_path is not None: 67 | accuracy, auc, p_auc, prec, recall, f1 = metrics(anomaly_score_path, decision_result_path) 68 | else: 69 | raise f'{os.path.join(resultspath, dir_name)} does not contain anomaly_score or decision_results file' 70 | df = df.append({"machine": dir_name, "accuracy": accuracy, "auc": auc, "p_auc": p_auc, "precision": prec, "recall": recall, "f1": f1}, 71 | ignore_index=True) 72 | return df 73 | 74 | 75 | 76 | def overall_score(resultspath): 77 | """ 78 | input: 79 | path to results directory with the following structure: 80 | results 81 | machinetype name 82 | anomaly_score.csv 83 | decision_result.csv 84 | returns: 85 | harmonic mean of auc and pauc scores over all machine types 86 | """ 87 | pauc_auc_list = [] 88 | for dir_name in os.listdir(resultspath): 89 | anomaly_score_path = None 90 | decision_result_path = None 91 | if os.path.isdir(os.path.join(resultspath, dir_name)): 92 | for file in os.listdir(os.path.join(resultspath, dir_name)): 93 | if 'anomaly_score' in file: 94 | anomaly_score_path = os.path.join(os.path.join(resultspath, dir_name), file) 95 | if 'decision_result' in file: 96 | decision_result_path = os.path.join(os.path.join(resultspath, dir_name), file) 97 | if anomaly_score_path and decision_result_path is not None: 98 | accuracy, auc, p_auc, prec, recall, f1 = metrics(anomaly_score_path, decision_result_path) 99 | else: 100 | raise f'{os.path.join(resultspath, dir_name)} does not contain anomaly_score or decision_results file' 101 | pauc_auc_list.append(auc) 102 | pauc_auc_list.append(p_auc) 103 | return harmonic_mean(pauc_auc_list) 104 | 105 | 106 | 107 | 108 | if __name__ == '__main__': 109 | ANOMALY_SCORE_PATH = 'results/bearing/anomaly_score_bearing_section_0.csv' 110 | DECISION_RESULT_PATH = 'results/bearing/decision_result_bearing_section_0.csv' 111 | RESULT_PATH = 'results' 112 | accuracy, auc, p_auc, prec, recall, f1 = metrics(ANOMALY_SCORE_PATH, DECISION_RESULT_PATH) 113 | overall = overall_score(RESULT_PATH) 114 | df = metrics_data(RESULT_PATH) 115 | -------------------------------------------------------------------------------- /source/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | class CNNAutoencoder(nn.Module): 5 | def __init__(self): 6 | super(CNNAutoencoder, self).__init__() 7 | 8 | #input size: (1, 80, 63), 1 channel, 80x63 pixels 9 | 10 | # Encoder 11 | self.conv1 = nn.Conv2d(1, 16, kernel_size=3) 12 | self.bn1 = nn.BatchNorm2d(16) 13 | self.conv2 = nn.Conv2d(16, 16, kernel_size=3) 14 | self.bn2 = nn.BatchNorm2d(16) 15 | self.conv3 = nn.Conv2d(16, 8, kernel_size=3) 16 | self.bn3 = nn.BatchNorm2d(8) 17 | self.conv4 = nn.Conv2d(8, 4, kernel_size=3) 18 | self.bn4 = nn.BatchNorm2d(4) 19 | self.conv5 = nn.Conv2d(4, 4, kernel_size=3) 20 | self.bn5 = nn.BatchNorm2d(4) 21 | 22 | #compressed latent space size: ??? 23 | 24 | # Decoder 25 | self.t_conv1 = nn.ConvTranspose2d(4, 4, kernel_size=3) 26 | self.t_bn1 = nn.BatchNorm2d(4) 27 | self.t_conv2 = nn.ConvTranspose2d(4, 8, kernel_size=3) 28 | self.t_bn2 = nn.BatchNorm2d(8) 29 | self.t_conv3 = nn.ConvTranspose2d(8, 16, kernel_size=3) 30 | self.t_bn3 = nn.BatchNorm2d(16) 31 | self.t_conv4 = nn.ConvTranspose2d(16, 16, kernel_size=3) 32 | self.t_bn4 = nn.BatchNorm2d(16) 33 | self.t_conv5 = nn.ConvTranspose2d(16, 1, kernel_size=3) 34 | 35 | #output size: (1, 80, 63), 1 channel, 80x63 pixels 36 | 37 | 38 | def forward(self, x): 39 | # Encoder 40 | x = torch.relu(self.bn1(self.conv1(x))) 41 | x = torch.relu(self.bn2(self.conv2(x))) 42 | x = torch.relu(self.bn3(self.conv3(x))) 43 | x = torch.relu(self.bn4(self.conv4(x))) 44 | x = torch.relu(self.bn5(self.conv5(x))) 45 | 46 | # Decoder 47 | x = torch.relu(self.t_bn1(self.t_conv1(x))) 48 | x = torch.relu(self.t_bn2(self.t_conv2(x))) 49 | x = torch.relu(self.t_bn3(self.t_conv3(x))) 50 | x = torch.relu(self.t_bn4(self.t_conv4(x))) 51 | x = torch.sigmoid(self.t_conv5(x)) 52 | 53 | return x -------------------------------------------------------------------------------- /source/test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import csv 3 | import torch 4 | import numpy as np 5 | import wandb 6 | from tqdm import tqdm 7 | from config import RESULTS_PATH, RAW_PATH, DETECTION_TRESHOLD_DICT, IS_ANOMALY, IS_NORMAL 8 | from metrics import metrics 9 | 10 | def test(model, test_loader, machine_name): 11 | 12 | os.makedirs(os.path.join(RESULTS_PATH, machine_name), exist_ok=True) 13 | 14 | loss_func = torch.nn.MSELoss() 15 | 16 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 17 | 18 | model.eval() 19 | model.to(device) 20 | 21 | # Get a sorted list of file names in the relevant directory 22 | file_names = sorted(os.listdir(os.path.join(RAW_PATH, machine_name, "test"))) 23 | anomaly_score_path = os.path.join(RESULTS_PATH, machine_name, f'anomaly_score_{machine_name}_section_{0}.csv') 24 | decision_result_path = os.path.join(RESULTS_PATH, machine_name, f'decision_result_{machine_name}_section_{0}.csv') 25 | 26 | with open(anomaly_score_path, 'w', newline='\n') as _: 27 | pass 28 | with open(decision_result_path, 'w', newline='\n') as _: 29 | pass 30 | 31 | for (spectrograms, waveforms, indices, domains, labels) in tqdm(test_loader): 32 | 33 | # batch holds all segments of one test sample 34 | # indices, domains, labels hold the same values for the test batches. 35 | index = indices[0] 36 | domain = domains[0] 37 | label = labels[0] 38 | 39 | x = spectrograms.to(device) 40 | y = model.forward(x) 41 | 42 | anomaly_score = loss_func(y, x).view(-1).sum().item()/len(indices) 43 | 44 | if anomaly_score > DETECTION_TRESHOLD_DICT[machine_name]: 45 | prediction = IS_ANOMALY 46 | else: 47 | prediction = IS_NORMAL 48 | 49 | # Get the filename for the current iteration 50 | file_name = file_names[index] 51 | 52 | 53 | # Write the anomaly score and prediction to the CSV files 54 | with open(anomaly_score_path, 'a', newline='\n') as f: 55 | writer = csv.writer(f) 56 | writer.writerow([file_name, anomaly_score]) 57 | 58 | with open(decision_result_path, 'a', newline='\n') as f: 59 | writer = csv.writer(f) 60 | writer.writerow([file_name, prediction]) 61 | try: 62 | accurracy, auc, p_auc, prec, recall, f1 = metrics(anomaly_score_path, decision_result_path) 63 | 64 | wandb.log({f"{machine_name}_auc": auc}) 65 | wandb.log({f"{machine_name}_accurracy": accurracy}) 66 | except: 67 | print("logging was not possible") 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /source/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from tqdm import tqdm 4 | from config import EPOCHS, MODEL_PATH 5 | import os 6 | import wandb 7 | from test import test 8 | from metrics import metrics 9 | 10 | 11 | def train(model, optimizer, train_loader, test_loader, machine_name): 12 | 13 | save_path = os.path.join(MODEL_PATH, machine_name) 14 | os.makedirs(save_path, exist_ok=True) 15 | save_path = os.path.join(save_path, "model.pt") 16 | 17 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 18 | 19 | model.train() 20 | model.to(device) 21 | 22 | loss_func = torch.nn.MSELoss() 23 | 24 | step_count = 0 25 | best_loss = 999999999999 26 | 27 | for epoch in range(EPOCHS): 28 | 29 | epoch_loss = 0 30 | 31 | for spectrograms, waveforms, indices, domains, labels in tqdm(train_loader): 32 | 33 | step_count += 1 34 | optimizer.zero_grad() 35 | 36 | x = spectrograms.to(device) 37 | y = model.forward(x) 38 | 39 | #calculate loss, barward pass and optimizer step 40 | batch_loss = loss_func(y, x) 41 | batch_loss.backward() 42 | optimizer.step() 43 | epoch_loss += float(batch_loss.item()) 44 | 45 | # normalize epoch_loss by total number of samples 46 | epoch_loss = epoch_loss/len(train_loader) 47 | wandb.log({f"{machine_name}_epoch_loss": epoch_loss}) 48 | 49 | 50 | #save model if loss is new best loss 51 | if epoch_loss < best_loss: 52 | best_loss = epoch_loss 53 | torch.save(model.state_dict(), save_path) 54 | print(f'epoch: {epoch} | loss: {epoch_loss}') 55 | 56 | #log area under the curve every 10 epochs 57 | if epoch % 2 == 0: 58 | model.load_state_dict(torch.load(save_path)) 59 | model.eval() 60 | model.to(device) 61 | test(model, test_loader, machine_name) 62 | model.train() 63 | 64 | 65 | torch.save(model.state_dict(), save_path) 66 | 67 | return model 68 | 69 | 70 | 71 | 72 | --------------------------------------------------------------------------------