├── .gitignore ├── LICENSE ├── README.md ├── dl4d ├── __init__.py ├── datasets │ ├── __init__.py │ ├── cifar10.py │ ├── pamap.py │ ├── sits.py │ ├── svhn.py │ ├── ucr.py │ └── wisdm.py ├── images.py ├── sampler.py ├── timeseries.py └── transforms.py ├── experiments ├── .DS_Store ├── config_files │ ├── fixmatch.yaml │ ├── ladder.yaml │ ├── logisticregression.yaml │ ├── meanteacher.yaml │ ├── mixmatch.yaml │ ├── randomforest.yaml │ ├── selfsupervised.yaml │ ├── supervised.yaml │ ├── supervised_full.yaml │ └── vat.yaml ├── run.py ├── run_baselines.py ├── tune.py └── tune_baselines.py ├── figures ├── results.png └── uml_ssltsc.png ├── requirements.txt ├── results.png ├── setup.py ├── ssltsc ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── constants.cpython-36.pyc │ ├── experiments.cpython-36.pyc │ ├── postprocessing.cpython-36.pyc │ └── visualization.cpython-36.pyc ├── architectures │ ├── FCN_tsai.py │ ├── InceptionTime.py │ ├── ResCNN.py │ ├── ResNet.py │ ├── __init__.py │ ├── cnn_lstm.py │ ├── convlarge.py │ ├── convnet13.py │ ├── fcn.py │ ├── fcn_multitask.py │ ├── ladder.py │ ├── ladder_utils.py │ ├── layers_utils.py │ ├── utils.py │ └── wideresnet28.py ├── callbacks.py ├── constants.py ├── data.py ├── experiments.py ├── models │ ├── __init__.py │ ├── basemodel.py │ ├── fixmatch.py │ ├── ladder.py │ ├── losses.py │ ├── meanteacher.py │ ├── mixmatch.py │ ├── model_factory.py │ ├── selfsupervised.py │ ├── supervised.py │ ├── utils.py │ └── vat.py ├── postprocessing.py └── visualization.py └── tests ├── cifar10 ├── X_test.npy ├── X_train.npy ├── Y_test.npy └── Y_train.npy ├── pamap2 ├── X_train.npy └── Y_train.npy ├── test_architectures.py ├── test_dl4d.py ├── test_models.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | ssltsc.egg-info* 2 | .vscode* 3 | __pycache__* 4 | data/* 5 | .idea* 6 | *mlruns* 7 | *.vs* 8 | .DS_Store 9 | *.db 10 | *.png 11 | *.npy -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Goschjann 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SSLTSC 2 | 3 | Codebase for our Paper [Deep Semi-supervised Learning (SSL) for Time Series Classification (TSC)](https://arxiv.org/abs/2102.03622) to appear at the ICMLA '21 4 | 5 | tldr: performance gains of semi-supervised models translate well from image to time series classification: 6 | 7 | ![Results](results.png) 8 | 9 | ## General 10 | 11 | This framework allows the evaluation of the performance of SSL algorithms initially designed for image classification tasks on time series classification problems and their comparison with a different baseline models. 12 | 13 | This pytorch-based codebase allows you to run experiments in a reproducible manner and to track and visualize your single experiments via [mlflow](www.mlflow.org/). 14 | The core of this framework are two sub-packages `dl4d` for data loading and sampling in a semi-supervised manner and `ssltsc` which contains different backbone architectures, baseline models and the semi-supervised learning strategies. 15 | To control the hyperparameters and general arguments for the model runs, you want to use the `config` files specifying single experiments in `ssltsc/experiments/config_files`. 16 | Hyperparameter tuning is possible based upon this config file syntax using Hyperband as implemented in [optuna](www.optuna.org/). 17 | 18 | All models in this repository were developed using image classification datasets (Cifar10, SVHN) as comparison to validate the correctness of the code. This means, you can use it not only for semi-supervised time series classification but also as a starting point for semi-supervised image classification. 19 | 20 | The core functionalities of this framework are also tested in a series of unit tests. 21 | Run `python -m unittest discover -s tests` from the parent level of this repository to test those crucial parts of the framework via the `unittest` framework. CI will be integrated on top of these tests soon. 22 | 23 | The following UML diagram gives a detailed overview on the different components of this framework: ![UML Diagram](figures/uml_ssltsc.png) 24 | 25 | ## Get Started 26 | 27 | Install the `requirements.txt` in a clean python environment via `pip install -r requirements.txt`. Then install the module `ssltsc` by running `pip install -e .` from the parent level of this repository. 28 | 29 | ## Examples 30 | 31 | The following are some examples on how to train or tune different algorithms on different datasets using this framework. Datasets are downloaded to the folder `data` on the fly if they are used the first time. These code-snippets should be run from `ssltsc/experiments`. Then 32 | 33 | To train a `mixmatch` model with an FCN backbone on the `pamap2` Dataset for `1000` update steps storing the results in the mlflow experiment `hello_mixmatch_fcn`, run: 34 | 35 | ``` 36 | python run.py --config config_files/mixmatch.yaml --n_steps 1000 --dataset pamap2 --backbone FCN --mlflow_name hello_mixmatch_fcn 37 | ``` 38 | 39 | To verify the correct implementation of the `virtual adversarial training` (VAT) model on `cifar10` with a `wideresnet28` backbone run: 40 | 41 | ``` 42 | python run.py --config config_files/vat.yaml --dataset cifar10 --backbone wideresnet28 43 | ``` 44 | 45 | To run a Random Forest baseline based on features extracted via `tsfresh` from the `SITS` dataset on `250` labelled samples only, run: 46 | 47 | ``` 48 | python run_baseline.py --config config_files/randomforest.yaml --dataset sits --num_labels 250 49 | ``` 50 | 51 | And finally to tune the hyperparameters of the `meanteacher` model on the `crop` dataset for 10 hours on `1000` labelled samples, run: 52 | 53 | ``` 54 | python tune.py --config config_files/meanteacher.yaml --num_labels 1000 --time_budget 36000 55 | ``` 56 | 57 | ## Integrated Algorithms and Datasets 58 | 59 | ### Algorithms 60 | 61 | All algorithms are stored in `ssltsc.models`. Currently, the following semi-supervised algorithms are implemented within this framework: 62 | 63 | * Mixmatch by [Berthelot et al. (2019)](https://arxiv.org/abs/1905.02249) 64 | * Virtual Adversarial Training by [Miyato et al. (2017)](https://arxiv.org/abs/1704.03976) 65 | * Mean Teacher by [Valpola et al. (2017)](https://arxiv.org/abs/1703.01780) 66 | * Ladder Net by [Rasmus et al. (2016)](https://arxiv.org/abs/1507.02672) 67 | * Self-supervised Learning for TSC by [Jawed et al. (2020)](https://link.springer.com/chapter/10.1007/978-3-030-47426-3_39) 68 | 69 | and the following baseline models: 70 | 71 | * Supervised baseline model 72 | * Random Forest (based on features extracted via [tsfresh](https://tsfresh.readthedocs.io/en/latest/)) 73 | * Logistic Regression (based on features extracted via [tsfresh](https://tsfresh.readthedocs.io/en/latest/) 74 | 75 | 76 | ### Datasets 77 | 78 | All integrated datasets can be found at `dl4d.datasets`. This framework currently contain the following TSC datasets: 79 | 80 | * [WISDM](https://www.cis.fordham.edu/wisdm/dataset.php) 81 | * [Pamap2](https://archive.ics.uci.edu/ml/machine-learning-databases/00231/PAMAP2_Dataset.zip) 82 | * [SITS](http://cloudstor.aarnet.edu.au/plus/s/pRLVtQyNhxDdCoM/download?path=%2FDataset%2FSITS_2006_NDVI_C%2FSITS1M_fold1&files=SITS1M_fold1_TRAIN.csv) 83 | * [Crop](http://www.timeseriesclassification.com/description.php?Dataset=Crop) 84 | * [FordB](http://www.timeseriesclassification.com/description.php?Dataset=FordB) 85 | * [Electric Devices](http://www.timeseriesclassification.com/description.php?Dataset=ElectricDevices) 86 | 87 | as well as these standard image classification datasets to validate the implementation 88 | 89 | * Cifar10 90 | * SVHN -------------------------------------------------------------------------------- /dl4d/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Goschjann/ssltsc/08d6b1bf711bb1c8f19f9bfb66a98d4e423e932e/dl4d/__init__.py -------------------------------------------------------------------------------- /dl4d/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Goschjann/ssltsc/08d6b1bf711bb1c8f19f9bfb66a98d4e423e932e/dl4d/datasets/__init__.py -------------------------------------------------------------------------------- /dl4d/datasets/cifar10.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pdb 3 | import numpy as np 4 | import torchvision 5 | import torch 6 | import torchvision.transforms as transforms 7 | from sklearn.model_selection import train_test_split 8 | from dl4d.images import ImageDataset 9 | 10 | 11 | class Cifar10(ImageDataset): 12 | base_folder = 'cifar10' 13 | seed = 1337 14 | val_size = 1000 15 | 16 | def __init__(self, root, part='train', task='classification', 17 | features=False, 18 | val_size=None, 19 | test_size=None, 20 | transform=None, target_transform=None, download=True, 21 | normalize=False, standardize=False, 22 | scale_overall=True, scale_channelwise=True): 23 | 24 | self.root = root 25 | if download: 26 | self.download() 27 | 28 | super(Cifar10, self).__init__(root, transform=transform, 29 | target_transform=target_transform) 30 | 31 | self.x, self.y = self.load_dataset(part=part) 32 | 33 | def __len__(self): 34 | return len(self.x) 35 | 36 | def download(self): 37 | final_path = os.path.join(self.root, self.base_folder) 38 | if not os.path.exists(final_path): 39 | os.mkdir(final_path) 40 | else: 41 | return 42 | 43 | np.random.seed(self.seed) 44 | 45 | transform = transforms.Compose([transforms.ToTensor()]) 46 | trainset = torchvision.datasets.CIFAR10(root=final_path, 47 | train=True, 48 | download=True, 49 | transform=transform) 50 | testset = torchvision.datasets.CIFAR10(root=final_path, 51 | train=False, 52 | download=True, 53 | transform=transform) 54 | 55 | X_train = trainset.data.swapaxes(2, 3).swapaxes(1, 2) 56 | Y_train = trainset.targets 57 | X_test = testset.data.swapaxes(2, 3).swapaxes(1, 2) 58 | Y_test = testset.targets 59 | 60 | X_test, X_val, Y_test, Y_val = train_test_split(X_test, Y_test, 61 | test_size=self.val_size, 62 | random_state=self.seed, 63 | stratify=Y_test) 64 | 65 | np.save(file=os.path.join(final_path, 'X_train.npy'), arr=X_train) 66 | np.save(file=os.path.join(final_path, 'X_test.npy'), arr=X_test) 67 | np.save(file=os.path.join(final_path, 'X_val.npy'), arr=X_val) 68 | np.save(file=os.path.join(final_path, 'Y_train.npy'), arr=Y_train) 69 | np.save(file=os.path.join(final_path, 'Y_test.npy'), arr=Y_test) 70 | np.save(file=os.path.join(final_path, 'Y_val.npy'), arr=Y_val) 71 | 72 | os.system('rm {}/cifar-10-python.tar.gz; rm -rf {}/cifar-10-batches-py'.format(final_path, final_path)) -------------------------------------------------------------------------------- /dl4d/datasets/pamap.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import os 3 | import numpy as np 4 | import pandas as pd 5 | from datetime import datetime, timedelta 6 | from sklearn.model_selection import train_test_split 7 | from torchvision.datasets.utils import download_and_extract_archive 8 | from dl4d.timeseries import TimeseriesDataset 9 | 10 | 11 | class PAMAP2(TimeseriesDataset): 12 | base_folder = 'pamap2' 13 | url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00231/PAMAP2_Dataset.zip" 14 | filename = "PAMAP2_Dataset.zip" 15 | 16 | seed = 1337 # Seed for train/test split 17 | _freq = 10 # Frequency for resampling in hz 18 | _length_in_sec = 10 # Length of each window in seconds 19 | _overlap = 0.8 # Overlap for data augmentation 20 | 21 | def __init__(self, root, part='train', task='classification', 22 | transform=None, target_transform=None, download=True, 23 | normalize=False, standardize=False, 24 | features=False, 25 | horizon=None, 26 | stride=None, 27 | val_size=250, 28 | test_size=2000, 29 | scale_overall=True, scale_channelwise=True): 30 | super(PAMAP2, self).__init__(root, transform=transform, 31 | target_transform=target_transform, 32 | horizon=horizon, 33 | stride=stride, 34 | val_size=val_size, 35 | test_size=test_size, 36 | task=task, normalize=normalize, standardize=standardize, 37 | scale_overall=scale_overall, scale_channelwise=scale_channelwise) 38 | 39 | if download: 40 | self.download() 41 | 42 | self.x, self.y = self.load_dataset(part=part, features=features) 43 | self.test_size = test_size # Absolute size of the test data set 44 | self.val_size = val_size # Absolute size of the validation data set 45 | 46 | def __len__(self): 47 | return len(self.x) 48 | 49 | def download(self): 50 | final_path = os.path.join(self.root, self.base_folder) 51 | extracted_path = os.path.join(self.root, 'PAMAP2_Dataset') 52 | 53 | if os.path.exists(final_path): 54 | return 55 | 56 | np.random.seed(self.seed) 57 | download_and_extract_archive(self.url, self.root, 58 | filename=self.filename) 59 | 60 | # colnames as provided by pamap dictionary 61 | imu_cols = ['temperature', 'acc_1_x', 'acc_1_y', 'acc_1_z', 62 | 'acc_2_x', 63 | 'acc_2_y', 'acc_2_y', 'gyro_x', 'gyro_y', 'gyro_z', 64 | 'magnet_x', 'magnet_y', 'magnet_z', 'orient_1', 65 | 'orient_2', 66 | 'orient_3', 'orient_4'] 67 | 68 | columns = ['timestamp', 'act_id', 'heart_rate'] + \ 69 | ['hand_' + a for a in imu_cols] + \ 70 | ['chest_' + a for a in imu_cols] + \ 71 | ['ankle_' + a for a in imu_cols] 72 | 73 | resample_string = '{}S'.format(1 / self._freq) 74 | 75 | # columns of interest 76 | cols_int = ['act_id', 'hand_acc_1_x', 'hand_acc_1_y', 77 | 'hand_acc_1_z', 'hand_gyro_x', 'hand_gyro_y', 78 | 'hand_gyro_z'] 79 | 80 | # store subjects and labels 81 | subject_list = [] 82 | label_list = [] 83 | 84 | for subject_idx in range(1, 10): 85 | print('##### Work on patient {}'.format(subject_idx)) 86 | 87 | data_file = 'subject10{}.dat'.format(subject_idx) 88 | data_path = os.path.join(extracted_path, 'Protocol', data_file) 89 | data = pd.read_csv(data_path, sep=r'\s+', header=None) 90 | 91 | data.columns = columns 92 | # Amount of minutes for this subject 93 | data['timestamp'] = pd.to_datetime(data['timestamp'], unit='s') 94 | data = data.set_index('timestamp') 95 | max_st = datetime.strptime(str(max(data.index)), 96 | '%Y-%m-%d %H:%M:%S.%f') 97 | min_st = datetime.strptime(str(min(data.index)), 98 | '%Y-%m-%d %H:%M:%S.%f') 99 | # Amount of total seconds 100 | secs = (max_st - min_st).total_seconds() 101 | st = min_st 102 | et = min_st 103 | 104 | # Initialize array with estimated amount of windows 105 | est_samples = int(secs / self._length_in_sec * (1 - self._overlap) ** (-1)) 106 | df = np.empty(shape=(est_samples, len(cols_int) - 1, self._length_in_sec * self._freq)) 107 | j = 0 108 | labels = [] 109 | for col in cols_int: 110 | # Resampling to 10hz 111 | series = pd.Series(data[col], index=data.index) 112 | series = series.resample(resample_string).mean() 113 | 114 | # Segmenting into windows 115 | st = min_st 116 | et = min_st 117 | i = 0 118 | while True: 119 | # Length of the window: 10 seconds 120 | delta = timedelta(0, self._length_in_sec) 121 | # Offset for the next window: 2 seconds for 80% args.overlap 122 | offset = timedelta(0, (1 - self._overlap) * self._length_in_sec) 123 | st = st + offset 124 | et = st + delta 125 | if et > max_st: 126 | print('reached end, extracted {} windows'.format(i)) 127 | break 128 | segment = series.between_time(start_time=st.time(), 129 | end_time=et.time()) 130 | # Store time series or label 131 | if col != 'act_id': 132 | df[i, j, :] = segment.to_numpy()[ 133 | :(self._length_in_sec * self._freq)] 134 | else: 135 | (v, c) = np.unique(segment.to_numpy(), 136 | return_counts=True) 137 | idx = np.argmax(c) 138 | labels.append(v[idx]) 139 | i += 1 140 | 141 | if col != 'act_id': 142 | j += 1 143 | 144 | # Cut unneeded space in ndarray 145 | df = df[:i, :, :] 146 | 147 | subject_list.extend([subject_idx] * i) 148 | label_list.extend(labels) 149 | 150 | final_df = df if subject_idx == 1 else np.concatenate((final_df, df), axis=0) 151 | 152 | # fill nan's with 0.0 153 | X = np.nan_to_num(final_df) 154 | 155 | # store label and subject information 156 | meta_dict = {'subject': subject_list, 'label': label_list} 157 | meta_df = pd.DataFrame(meta_dict) 158 | 159 | Y = meta_df['label'] 160 | # Split X data in X_test and X_train 161 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, 162 | test_size=self.val_size + self.test_size, 163 | random_state=self.seed, 164 | stratify=Y) 165 | 166 | # Split X_test data in X_test and X_val 167 | X_test, X_val, Y_test, Y_val = train_test_split(X_test, Y_test, 168 | test_size=self.val_size, 169 | random_state=self.seed, 170 | stratify=Y_test) 171 | 172 | os.makedirs(final_path, exist_ok=True) 173 | 174 | np.save(file=os.path.join(final_path, 'X_train.npy'), arr=X_train) 175 | np.save(file=os.path.join(final_path, 'X_test.npy'), arr=X_test) 176 | np.save(file=os.path.join(final_path, 'X_val.npy'), arr=X_val) 177 | np.save(file=os.path.join(final_path, 'Y_train.npy'), arr=Y_train.astype(np.float32)) 178 | np.save(file=os.path.join(final_path, 'Y_test.npy'), arr=Y_test.astype(np.float32)) 179 | np.save(file=os.path.join(final_path, 'Y_val.npy'), arr=Y_val.astype(np.float32)) 180 | 181 | shutil.rmtree(extracted_path) 182 | self.save_stats(X_train) 183 | self.extract_features_from_npy() 184 | -------------------------------------------------------------------------------- /dl4d/datasets/sits.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pandas as pd 4 | from sklearn.preprocessing import LabelEncoder 5 | from sklearn.model_selection import train_test_split 6 | from dl4d.timeseries import TimeseriesDataset 7 | 8 | 9 | class SITS(TimeseriesDataset): 10 | base_folder = 'sits' 11 | url_train = "http://cloudstor.aarnet.edu.au/plus/s/pRLVtQyNhxDdCoM/download?path=%2FDataset%2FSITS_2006_NDVI_C%2FSITS1M_fold1&files=SITS1M_fold1_TRAIN.csv" 12 | url_test = "https://cloudstor.aarnet.edu.au/plus/s/pRLVtQyNhxDdCoM/download?path=%2FDataset%2FSITS_2006_NDVI_C%2FSITS1M_fold1&files=SITS1M_fold1_TEST.csv" 13 | filename_train = "SITS1M_fold1_TRAIN.csv" 14 | filename_test = "SITS1M_fold1_TEST.csv" 15 | 16 | num_obs = 100000 17 | min_support = 5000 18 | 19 | seed = 1337 20 | 21 | def __init__(self, root, part='train', task='classification', 22 | transform=None, target_transform=None, download=True, 23 | normalize=False, standardize=False, 24 | features=False, 25 | horizon=None, 26 | stride=None, 27 | val_size=250, 28 | test_size=2000, 29 | scale_overall=True, scale_channelwise=True): 30 | 31 | super(SITS, self).__init__(root, transform=transform, 32 | target_transform=target_transform, 33 | horizon=horizon, 34 | stride=stride, 35 | val_size=val_size, 36 | test_size=test_size, 37 | task=task, normalize=normalize, standardize=standardize, 38 | scale_overall=scale_overall, scale_channelwise=scale_channelwise) 39 | 40 | if download: 41 | self.download() 42 | 43 | self.x, self.y = self.load_dataset(part=part, features=features) 44 | self.test_size = test_size # Absolute size of the test data set 45 | self.val_size = val_size # Absolute size of the validation data set 46 | 47 | def __len__(self): 48 | return len(self.x) 49 | 50 | def download(self): 51 | final_path = os.path.join(self.root, self.base_folder) 52 | 53 | if not os.path.exists(final_path): 54 | os.mkdir(final_path) 55 | else: 56 | return 57 | 58 | np.random.seed(self.seed) 59 | 60 | # 2) Read in data 61 | df_raw_train = pd.read_csv(self.url_train, header=None) 62 | df_raw_test = pd.read_csv(self.url_test, header=None) 63 | 64 | # 3) Select random numbers of observations 65 | test_ratio = 0.1 66 | num_obs_test = round(self.num_obs * test_ratio) 67 | num_obs_train = round(self.num_obs * (1 - test_ratio)) 68 | 69 | df_raw_test = df_raw_test.sample(num_obs_test) 70 | df_raw_train = df_raw_train.sample(num_obs_train) 71 | 72 | Y_train = np.asarray(df_raw_train.iloc[:, 0]) 73 | Y_test = np.asarray(df_raw_test.iloc[:, 0]) 74 | 75 | # Subset only classes with large support in the data 76 | large_classes = np.where(np.unique(Y_train, return_counts=True)[1] > self.min_support)[0] + 1 77 | idx_train = [idx for idx in range(len(Y_train)) if Y_train[idx] in large_classes] 78 | idx_test = [idx for idx in range(len(Y_test)) if Y_test[idx] in large_classes] 79 | 80 | Y_train = Y_train[idx_train] 81 | Y_test = Y_test[idx_test] 82 | 83 | print('Distribution of subsetted train and test') 84 | print(np.unique(Y_train, return_counts=True)) 85 | print(np.unique(Y_test, return_counts=True)) 86 | 87 | df_raw_train = df_raw_train.iloc[idx_train] 88 | df_raw_test = df_raw_test.iloc[idx_test] 89 | 90 | # Encode the labels to ints 91 | Y = pd.DataFrame(Y_train, columns=['label']) 92 | le = LabelEncoder() 93 | le.fit(Y['label'].values) 94 | Y_foo = le.transform(Y['label'].values) 95 | Y_train = pd.DataFrame(Y_foo.tolist(), columns=['label']) 96 | 97 | # Encode the labels to ints 98 | Y = pd.DataFrame(Y_test, columns=['label']) 99 | Y_foo = le.transform(Y['label'].values) 100 | Y_test = pd.DataFrame(Y_foo.tolist(), columns=['label']) 101 | 102 | df_raw_train = df_raw_train.iloc[:, 1:] 103 | df_raw_test = df_raw_test.iloc[:, 1:] 104 | 105 | # Reshape and normalize the data 106 | X_train = np.nan_to_num(df_raw_train.to_numpy()) 107 | 108 | # Reshape to bcl format 109 | X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1]) 110 | 111 | X_test = np.nan_to_num(df_raw_test.to_numpy()) 112 | 113 | # Reshape to bcl format 114 | X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1]) 115 | 116 | # Split X_test data in X_test and X_val 117 | X_test, X_val, Y_test, Y_val = train_test_split(X_test, Y_test, 118 | test_size=self.val_size, 119 | random_state=self.seed, 120 | stratify=Y_test) 121 | 122 | np.save(file=os.path.join(final_path, 'X_train.npy'), arr=X_train) 123 | np.save(file=os.path.join(final_path, 'X_test.npy'), arr=X_test) 124 | np.save(file=os.path.join(final_path, 'X_val.npy'), arr=X_val) 125 | np.save(file=os.path.join(final_path, 'Y_train.npy'), arr=Y_train.astype(np.float32).squeeze(1)) 126 | np.save(file=os.path.join(final_path, 'Y_test.npy'), arr=Y_test.astype(np.float32).squeeze(1)) 127 | np.save(file=os.path.join(final_path, 'Y_val.npy'), arr=Y_val.astype(np.float32).squeeze(1)) 128 | 129 | self.save_stats(X_train) 130 | self.extract_features_from_npy() 131 | -------------------------------------------------------------------------------- /dl4d/datasets/svhn.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torchvision 4 | import torch 5 | import torchvision.transforms as transforms 6 | from sklearn.model_selection import train_test_split 7 | from dl4d.images import ImageDataset 8 | 9 | 10 | class SVHN(ImageDataset): 11 | base_folder = 'svhn' 12 | seed = 1337 13 | val_size = 1000 14 | 15 | def __init__(self, root, part='train', task='classification', 16 | features=False, 17 | val_size=None, 18 | test_size=None, 19 | transform=None, target_transform=None, download=True, 20 | normalize=False, standardize=False, 21 | scale_overall=True, scale_channelwise=True): 22 | 23 | self.root = root 24 | if download: 25 | self.download() 26 | 27 | super(SVHN, self).__init__(root, transform=transform, 28 | target_transform=target_transform) 29 | 30 | self.x, self.y = self.load_dataset(part=part) 31 | 32 | def __len__(self): 33 | return len(self.x) 34 | 35 | def download(self): 36 | final_path = os.path.join(self.root, self.base_folder) 37 | 38 | if not os.path.exists(final_path): 39 | os.mkdir(final_path) 40 | else: 41 | return 42 | 43 | np.random.seed(self.seed) 44 | 45 | transform = transforms.Compose([transforms.ToTensor()]) 46 | trainset = torchvision.datasets.SVHN(root=final_path, 47 | split='train', 48 | download=True, 49 | transform=transform) 50 | testset = torchvision.datasets.SVHN(root=final_path, 51 | split='test', 52 | download=True, 53 | transform=transform) 54 | 55 | X_train = trainset.data 56 | Y_train = trainset.labels 57 | X_test = testset.data 58 | Y_test = testset.labels 59 | 60 | X_test, X_val, Y_test, Y_val = train_test_split(X_test, Y_test, 61 | test_size=self.val_size, 62 | random_state=self.seed, 63 | stratify=Y_test) 64 | 65 | np.save(file=os.path.join(final_path, 'X_train.npy'), arr=X_train) 66 | np.save(file=os.path.join(final_path, 'X_test.npy'), arr=X_test) 67 | np.save(file=os.path.join(final_path, 'X_val.npy'), arr=X_val) 68 | np.save(file=os.path.join(final_path, 'Y_train.npy'), arr=Y_train.astype(np.float32)) 69 | np.save(file=os.path.join(final_path, 'Y_test.npy'), arr=Y_test.astype(np.float32)) 70 | np.save(file=os.path.join(final_path, 'Y_val.npy'), arr=Y_val.astype(np.float32)) 71 | 72 | os.system('rm {}/train_32x32.mat; rm -rf {}/test_32x32.mat'.format(final_path, final_path)) 73 | -------------------------------------------------------------------------------- /dl4d/datasets/ucr.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import pdb 3 | import os 4 | import numpy as np 5 | import tempfile 6 | from sklearn.model_selection import train_test_split 7 | from dl4d.timeseries import TimeseriesDataset 8 | from urllib.request import urlretrieve 9 | from pyunpack import Archive 10 | from sktime.utils.data_io import load_from_tsfile_to_dataframe 11 | 12 | 13 | class UCR(TimeseriesDataset): 14 | seed = 1337 15 | src_website = 'http://www.timeseriesclassification.com/Downloads' 16 | 17 | def __init__(self, root, dataset_name: str, part='train', 18 | task='classification', transform=None, 19 | target_transform=None, download=True, 20 | normalize=False, standardize=False, 21 | features=False, 22 | horizon=None, 23 | stride=None, 24 | val_size=250, 25 | test_size=2000, 26 | scale_overall=True, scale_channelwise=True): 27 | 28 | super(UCR, self).__init__(root, transform=transform, 29 | target_transform=target_transform, 30 | task=task, normalize=normalize, 31 | horizon=horizon, 32 | stride=stride, 33 | standardize=standardize, 34 | scale_overall=scale_overall, 35 | val_size=val_size, 36 | test_size=test_size, 37 | scale_channelwise=scale_channelwise) 38 | if download: 39 | self._download(dataset_name) 40 | 41 | self.x, self.y = self.load_dataset(part=part, features=features) 42 | self.test_size = test_size # Absolute size of the test data set 43 | self.val_size = val_size # Absolute size of the validation data set 44 | 45 | def __len__(self): 46 | return len(self.x) 47 | 48 | def _download(self, dataset: str): 49 | final_path = os.path.join(self.root, self.base_folder) 50 | extracted_path = os.path.join(self.root, f'{dataset}_download') 51 | 52 | if not os.path.exists(final_path): 53 | os.makedirs(final_path) 54 | os.makedirs(extracted_path) 55 | else: 56 | return 57 | 58 | np.random.seed(self.seed) 59 | decompress_from_url(f'{self.src_website}/{dataset}.zip', target_dir=extracted_path) 60 | X_train_df, Y_train = load_from_tsfile_to_dataframe(f"{extracted_path}/{dataset}_TRAIN.ts") 61 | X_test_df, Y_test = load_from_tsfile_to_dataframe(f"{extracted_path}/{dataset}_TEST.ts") 62 | 63 | X_train_ = [] 64 | X_test_ = [] 65 | for i in range(X_train_df.shape[-1]): 66 | X_train_.append(stack_pad(X_train_df[f'dim_{i}'])) 67 | X_test_.append(stack_pad(X_test_df[f'dim_{i}'])) 68 | X_train = np.transpose(np.stack(X_train_, axis=-1), (0, 2, 1)).astype(np.float32) 69 | X_test = np.transpose(np.stack(X_test_, axis=-1), (0, 2, 1)).astype(np.float32) 70 | 71 | try: 72 | X_test, X_val, Y_test, Y_val = train_test_split(X_test, Y_test, 73 | test_size=self.val_size, 74 | random_state=self.seed, 75 | stratify=Y_test) 76 | except: 77 | print('X_test is too small, reduce X_val size to 400') 78 | X_test, X_val, Y_test, Y_val = train_test_split(X_test, Y_test, 79 | test_size=400, 80 | random_state=self.seed, 81 | stratify=Y_test) 82 | 83 | np.save(file=os.path.join(final_path, 'X_train.npy'), arr=X_train.astype(np.float32)) 84 | np.save(file=os.path.join(final_path, 'X_test.npy'), arr=X_test.astype(np.float32)) 85 | np.save(file=os.path.join(final_path, 'X_val.npy'), arr=X_val.astype(np.float32)) 86 | np.save(file=os.path.join(final_path, 'Y_train.npy'), arr=Y_train.astype(np.float32)) 87 | np.save(file=os.path.join(final_path, 'Y_test.npy'), arr=Y_test.astype(np.float32)) 88 | np.save(file=os.path.join(final_path, 'Y_val.npy'), arr=Y_val.astype(np.float32)) 89 | 90 | shutil.rmtree(extracted_path) 91 | self.save_stats(X_train.astype(np.float64)) 92 | self.extract_features_from_npy() 93 | 94 | 95 | class CROP(UCR): 96 | base_folder = 'crop' 97 | 98 | def __init__(self, root, part='train', task='classification', 99 | transform=None, target_transform=None, download=True, 100 | normalize=False, standardize=False, 101 | features=False, 102 | horizon=None, 103 | stride=None, 104 | val_size=250, 105 | test_size=2000, 106 | scale_overall=True, scale_channelwise=True): 107 | 108 | super().__init__(root, dataset_name='Crop', part=part, 109 | transform=transform, 110 | horizon=horizon, 111 | stride=stride, 112 | target_transform=target_transform, 113 | task=task, normalize=normalize, 114 | standardize=standardize, 115 | scale_overall=scale_overall, 116 | features=features, 117 | val_size=val_size, 118 | test_size=test_size, 119 | scale_channelwise=scale_channelwise) 120 | 121 | 122 | class FordB(UCR): 123 | base_folder = 'fordb' 124 | 125 | def __init__(self, root, part='train', task='classification', 126 | transform=None, target_transform=None, download=True, 127 | normalize=False, standardize=False, 128 | features=False, 129 | horizon=None, 130 | stride=None, 131 | val_size=250, 132 | test_size=2000, 133 | scale_overall=True, scale_channelwise=True): 134 | 135 | super().__init__(root, dataset_name='FordB', part=part, 136 | transform=transform, 137 | horizon=horizon, 138 | stride=stride, 139 | target_transform=target_transform, 140 | task=task, normalize=normalize, 141 | standardize=standardize, 142 | scale_overall=scale_overall, 143 | features=features, 144 | val_size=val_size, 145 | test_size=test_size, 146 | scale_channelwise=scale_channelwise) 147 | 148 | 149 | class ElectricDevices(UCR): 150 | base_folder = 'electricdevices' 151 | 152 | def __init__(self, root, part='train', task='classification', 153 | transform=None, target_transform=None, download=True, 154 | normalize=False, standardize=False, 155 | features=False, 156 | horizon=None, 157 | stride=None, 158 | val_size=250, 159 | test_size=2000, 160 | scale_overall=True, scale_channelwise=True): 161 | 162 | super().__init__(root, dataset_name='ElectricDevices', 163 | part=part, transform=transform, 164 | target_transform=target_transform, 165 | horizon=horizon, 166 | stride=stride, 167 | task=task, normalize=normalize, 168 | standardize=standardize, 169 | scale_overall=scale_overall, 170 | val_size=val_size, 171 | test_size=test_size, 172 | features=features, 173 | scale_channelwise=scale_channelwise) 174 | 175 | def decompress_from_url(url, target_dir=None, verbose=False): 176 | #Download 177 | try: 178 | fname = os.path.basename(url) 179 | tmpdir = tempfile.mkdtemp() 180 | local_comp_fname = os.path.join(tmpdir, fname) 181 | urlretrieve(url, local_comp_fname) 182 | except: 183 | shutil.rmtree(tmpdir) 184 | if verbose: sys.stderr.write("Could not download url. Please, check url.\n") 185 | 186 | #Decompress 187 | try: 188 | if not os.path.exists(target_dir): 189 | os.makedirs(target_dir) 190 | Archive(local_comp_fname).extractall(target_dir) 191 | shutil.rmtree(tmpdir) 192 | return target_dir 193 | except: 194 | shutil.rmtree(tmpdir) 195 | if verbose: sys.stderr.write("Could not decompress file, aborting.\n") 196 | return None 197 | 198 | def stack_pad(l): 199 | def resize(row, size): 200 | new = np.array(row) 201 | new.resize(size) 202 | return new 203 | row_length = max(l, key=len).__len__() 204 | mat = np.array([resize(row, row_length) for row in l]) 205 | return mat -------------------------------------------------------------------------------- /dl4d/datasets/wisdm.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import math 3 | import os 4 | import numpy as np 5 | import pandas as pd 6 | from sklearn.preprocessing import LabelEncoder 7 | from sklearn.model_selection import train_test_split 8 | from torchvision.datasets.utils import download_and_extract_archive 9 | from dl4d.timeseries import TimeseriesDataset 10 | 11 | 12 | class WISDM(TimeseriesDataset): 13 | base_folder = 'wisdm' 14 | url = 'http://www.cis.fordham.edu/wisdm/includes/datasets/latest/WISDM_ar_latest.tar.gz' 15 | filename = 'WISDM_ar_latest.tar.gz' 16 | 17 | seed = 1337 18 | overlap = 0.0 19 | length_sec = 4 20 | 21 | def __init__(self, root, part='train', task='classification', 22 | transform=None, target_transform=None, download=True, 23 | normalize=False, standardize=False, features=False, 24 | horizon=None, 25 | stride=None, 26 | val_size=250, 27 | test_size=2000, 28 | scale_overall=True, scale_channelwise=True): 29 | 30 | super(WISDM, self).__init__(root, transform=transform, 31 | target_transform=target_transform, 32 | horizon=horizon, 33 | stride=stride, 34 | val_size=val_size, 35 | test_size=test_size, 36 | task=task, normalize=normalize, standardize=standardize, 37 | scale_overall=scale_overall, scale_channelwise=scale_channelwise) 38 | 39 | if download: 40 | self.download() 41 | 42 | self.x, self.y = self.load_dataset(part=part, features=features) 43 | self.test_size = test_size # Absolute size of the test data set 44 | self.val_size = val_size # Absolute size of the validation data set 45 | 46 | def __len__(self): 47 | return len(self.x) 48 | 49 | def download(self): 50 | final_path = os.path.join(self.root, self.base_folder) 51 | extracted_path = os.path.join(self.root, 'WISDM_ar_v1.1') 52 | 53 | if os.path.exists(final_path): 54 | return 55 | 56 | np.random.seed(self.seed) 57 | download_and_extract_archive(self.url, self.root, 58 | filename=self.filename) 59 | 60 | array_WISDM = np.loadtxt('{}/WISDM_ar_v1.1_raw.txt'.format(extracted_path), dtype=str) 61 | 62 | # Replace values 63 | timeseries_WISDM = [None] * array_WISDM.shape[0] 64 | for idx in range(array_WISDM.shape[0]): 65 | timeseries_WISDM[idx] = array_WISDM[idx].split(",") 66 | 67 | # Convert weird data format to pd.DataFrame, compatibel with tsfresh 68 | data = pd.DataFrame(timeseries_WISDM, columns=[ 69 | 'ID', 'activity', 'timestamp', 70 | 'variable1', 'variable2', 'variable3', 71 | 'x1', 'x2', 'x3', 'x4', 'x5']) 72 | 73 | # Delete las columns ['x1' - 'x6'] as not relevant 74 | data = data.iloc[:, 0:6] 75 | data = data.dropna() 76 | 77 | # Last column has ; at the end of each numeric, replace with empty 78 | data.iloc[:, 5] = [i.replace(";", "") for i in data.iloc[:, 5].values] 79 | data['variable1'] = pd.to_numeric(data['variable1']) 80 | data['variable2'] = pd.to_numeric(data['variable2']) 81 | data['variable3'] = pd.to_numeric(data['variable3']) 82 | 83 | # Problem: time stamps are corrupted 84 | # Solution: no resampling etc, simply loop over them incl overlap 85 | 86 | lts = self.length_sec * 20 87 | timestep = math.ceil((1.0 - self.overlap) * lts) 88 | 89 | windows = [] 90 | labels = [] 91 | for i in range(0, len(data) - lts, timestep): 92 | x = data['variable1'].values[i: i + lts] 93 | y = data['variable2'].values[i: i + lts] 94 | z = data['variable3'].values[i: i + lts] 95 | windows.append([x, y, z]) 96 | v, cnt = np.unique(data['activity'][i: i + lts], return_counts=True) 97 | idx = np.argmax(cnt) 98 | labels.append(v[idx]) 99 | 100 | X = np.nan_to_num(np.asarray(windows, dtype=np.float32)) 101 | 102 | # Encode the labels to ints 103 | Y = pd.DataFrame(labels, columns=['label']) 104 | le = LabelEncoder() 105 | le.fit(Y['label'].values) 106 | le.transform(Y['label'].values) 107 | Y_foo = le.transform(Y['label'].values) 108 | Y = pd.DataFrame(Y_foo.tolist(), columns=['label']) 109 | 110 | # Split X data in X_test and X_train 111 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, 112 | test_size=self.val_size + self.test_size, 113 | random_state=self.seed, 114 | stratify=Y) 115 | 116 | 117 | # Split X_test data in X_test and X_val 118 | X_test, X_val, Y_test, Y_val = train_test_split(X_test, Y_test, 119 | test_size=self.val_size, 120 | random_state=self.seed, 121 | stratify=Y_test) 122 | 123 | os.makedirs(final_path, exist_ok=True) 124 | 125 | np.save(file=os.path.join(final_path, 'X_train.npy'), arr=X_train) 126 | np.save(file=os.path.join(final_path, 'X_test.npy'), arr=X_test) 127 | np.save(file=os.path.join(final_path, 'X_val.npy'), arr=X_val) 128 | np.save(file=os.path.join(final_path, 'Y_train.npy'), arr=Y_train.astype(np.float32).squeeze(1)) 129 | np.save(file=os.path.join(final_path, 'Y_test.npy'), arr=Y_test.astype(np.float32).squeeze(1)) 130 | np.save(file=os.path.join(final_path, 'Y_val.npy'), arr=Y_val.astype(np.float32).squeeze(1)) 131 | 132 | shutil.rmtree(extracted_path) 133 | self.save_stats(X_train.astype(np.float64)) 134 | self.extract_features_from_npy() 135 | -------------------------------------------------------------------------------- /dl4d/images.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import pdb 4 | import torch 5 | import numpy as np 6 | import pandas as pd 7 | import tsfresh 8 | from torch.utils import data 9 | 10 | 11 | class ImageDataset(data.Dataset): 12 | base_path = None 13 | 14 | def __init__(self, 15 | root, 16 | transform=None, 17 | target_transform=None, 18 | task='classification', 19 | val_size=None, 20 | test_size=None, 21 | normalize=False, standardize=False, 22 | scale_overall=False, scale_channelwise=False): 23 | if isinstance(root, torch._six.string_classes): 24 | root = os.path.expanduser(root) 25 | self.root = root 26 | self.labelled_idxs = None 27 | self.transform = transform 28 | self.target_transform = target_transform 29 | 30 | 31 | @property 32 | def size(self): 33 | return self.x.shape[2] 34 | 35 | @property 36 | def length(self): 37 | return None 38 | 39 | @property 40 | def nvariables(self): 41 | return None 42 | 43 | @property 44 | def nclasses(self): 45 | return len(np.unique(self.y)) 46 | 47 | def __getitem__(self, index): 48 | x = self.x[index] 49 | y = self.y[index] 50 | 51 | if self.transform: 52 | x = torch.tensor(x).to(torch.float32) 53 | x = self.transform(x) 54 | 55 | if self.labelled_idxs and index not in self.labelled_idxs: 56 | y = -1 57 | 58 | return x, y 59 | 60 | def __len__(self): 61 | raise NotImplementedError 62 | 63 | def load_dataset(self, part='train'): 64 | path = os.path.join(self.root, self.base_folder) 65 | 66 | x_path = os.path.join(path, 'X_{}.npy'.format(part)) 67 | x = np.load(file=x_path).astype('float32') 68 | 69 | y = np.load(file=os.path.join(path, 'Y_{}.npy'.format(part))).astype('int') 70 | 71 | if -1 not in np.unique(y): 72 | classes = np.unique(y) 73 | for idx in range(len(classes)): 74 | np.place(y, y == classes[idx], idx) 75 | y = y.astype(int) 76 | 77 | assert len(x) == len(y) 78 | 79 | return x, y 80 | -------------------------------------------------------------------------------- /dl4d/sampler.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import math 3 | 4 | import numpy as np 5 | from sklearn.model_selection import train_test_split 6 | from torch.utils.data import Sampler, Dataset 7 | 8 | 9 | class SemiSupervisionSampler(Sampler): 10 | """ 11 | A sampler for loading a dataset in a semi supervised fashion. 12 | 13 | This sampler is inspired by the `TwoStreamBatchSampler` 14 | from the code for the Mean Teacher implementation by 15 | Curious AI. 16 | 17 | https://github.com/CuriousAI/mean-teacher/blob/546348ff863c998c26be4339021425df973b4a36/pytorch/mean_teacher/data.py#L105 18 | 19 | Args: 20 | dataset: A pytorch Dataset 21 | batch_size: The total batch size 22 | num_labels_in_batch: The number of labeled data in a batch 23 | num_labels_in_dataset: The number of labeled data in the dataset 24 | seed: Seed for the random unlabelling of the dataset 25 | 26 | Returns: 27 | defines the sampling procedure for the Dataloader later. 28 | """ 29 | def __init__(self, dataset: Dataset, batch_size: int = 10, 30 | num_labels_in_batch: int = 5, 31 | num_labels_in_dataset: int = None, 32 | seed: int = 1337, 33 | drop_last: bool = False): 34 | 35 | assert batch_size < len(dataset), "Cannot load a batch bigger than the dataset" 36 | assert batch_size >= num_labels_in_batch, "Cannot have more labeled data in batch than batch size" 37 | assert num_labels_in_dataset <= len(dataset), "The number of labeled data in dataset must be smaller than the dataset size" 38 | 39 | self.drop_last = drop_last 40 | self.batch_size = batch_size 41 | self.num_labels_in_batch = num_labels_in_batch 42 | self.num_labels_in_dataset = num_labels_in_dataset 43 | self.num_unlabelled_in_batch = batch_size - num_labels_in_batch 44 | 45 | dataset_idxs = range(len(dataset)) 46 | 47 | self.unlabelled_idxs, self.labelled_idxs = make_unlabelling_split( 48 | dataset_idxs=dataset_idxs, 49 | y=dataset.y, 50 | num_labels_in_dataset=num_labels_in_dataset, 51 | seed=seed 52 | ) 53 | 54 | dataset.labelled_idxs = self.labelled_idxs 55 | 56 | # If either we have a full labelled batch or a full labelled dataset 57 | # then we should never iterate over the unlabelled dataset 58 | full_labelled_batch = bool(batch_size == num_labels_in_batch) 59 | full_labelled_dataset = bool(num_labels_in_dataset == len(dataset)) 60 | 61 | self.iterate_only_over_labelled = full_labelled_batch or full_labelled_dataset 62 | 63 | def __iter__(self): 64 | """ 65 | Returns: 66 | A list of tuples where each tuple represents a batch and contains 67 | the idx for the datapoints in the given batch. 68 | """ 69 | if self.iterate_only_over_labelled: 70 | labeled_iter = iterate_once(self.labelled_idxs) 71 | 72 | # This snippet is taken from the Pytorch BatchSampler. 73 | # It essentially loops over the indicies and fills up 74 | # batches. Once a batch is filled up then it is yielded 75 | batch = [] 76 | for idx in labeled_iter: 77 | batch.append(idx) 78 | if len(batch) == self.batch_size: 79 | yield batch 80 | batch = [] 81 | if len(batch) > 0 and not self.drop_last: 82 | yield batch 83 | 84 | else: 85 | unlabeled_iter = iterate_once(self.unlabelled_idxs) 86 | labeled_iter = iterate_eternally(self.labelled_idxs) 87 | 88 | batches = zip(grouper(unlabeled_iter, self.num_unlabelled_in_batch), 89 | grouper(labeled_iter, self.num_labels_in_batch)) 90 | 91 | for (labeled_batch, unlabeled_batch) in batches: 92 | yield labeled_batch + unlabeled_batch 93 | 94 | 95 | def __len__(self): 96 | if self.iterate_only_over_labelled: 97 | if self.drop_last: 98 | return len(self.labelled_idxs) // self.batch_size 99 | 100 | # We will be doing ceil division because we do not want to drop_last 101 | return math.ceil(len(self.labelled_idxs) / self.batch_size) 102 | 103 | return len(self.unlabelled_idxs) // self.num_unlabelled_in_batch 104 | 105 | 106 | 107 | class SupervisionSampler(Sampler): 108 | """ 109 | A sampler for loading a dataset in a supervised fashion. 110 | 111 | Args: 112 | dataset: A pytorch Dataset 113 | batch_size: The total batch size 114 | num_labels_in_batch: The number of labeled data in a batch 115 | num_labels_in_dataset: The number of labeled data in the dataset 116 | seed: Seed for the random unlabelling of the dataset 117 | 118 | Returns: 119 | defines the sampling procedure for the Dataloader later. 120 | """ 121 | def __init__(self, dataset: Dataset, 122 | batch_size: int = 10, 123 | num_labels_in_dataset: int = None, 124 | seed: int = 1337, 125 | drop_last: bool = False): 126 | 127 | assert batch_size < len(dataset), "Cannot load a batch bigger than the dataset" 128 | assert num_labels_in_dataset <= len(dataset), "The number of labeled data in dataset must be smaller than the dataset size" 129 | 130 | self.drop_last = drop_last 131 | self.batch_size = batch_size 132 | self.num_labels_in_dataset = num_labels_in_dataset 133 | 134 | dataset_idxs = range(len(dataset)) 135 | 136 | self.unlabelled_idxs, self.labelled_idxs = make_unlabelling_split( 137 | dataset_idxs=dataset_idxs, 138 | y=dataset.y, 139 | num_labels_in_dataset=num_labels_in_dataset, 140 | seed=seed 141 | ) 142 | 143 | dataset.labelled_idxs = self.labelled_idxs 144 | 145 | def __iter__(self): 146 | """ 147 | Returns: 148 | A list of tuples where each tuple represents a batch and contains 149 | the idx for the datapoints in the given batch. 150 | """ 151 | labeled_iter = iterate_once(self.labelled_idxs) 152 | # This snippet is taken from the Pytorch BatchSampler. 153 | # It essentially loops over the indicies and fills up 154 | # batches. Once a batch is filled up then it is yielded 155 | batch = [] 156 | for idx in labeled_iter: 157 | batch.append(idx) 158 | if len(batch) == self.batch_size: 159 | yield batch 160 | batch = [] 161 | if len(batch) > 0 and not self.drop_last: 162 | yield batch 163 | 164 | def __len__(self): 165 | if self.drop_last: 166 | return len(self.labelled_idxs) // self.batch_size 167 | 168 | # We will be doing ceil division because we do not want to drop_last 169 | return math.ceil(len(self.labelled_idxs) / self.batch_size) 170 | 171 | 172 | 173 | def iterate_once(indicies): 174 | return np.random.permutation(indicies) 175 | 176 | 177 | def iterate_eternally(indices): 178 | def infinite_shuffles(): 179 | while True: 180 | yield np.random.permutation(indices) 181 | 182 | return itertools.chain.from_iterable(infinite_shuffles()) 183 | 184 | 185 | def grouper(iterable, n): 186 | args = [iter(iterable)] * n 187 | return zip(*args) 188 | 189 | 190 | def make_unlabelling_split(dataset_idxs, y, num_labels_in_dataset: int, 191 | seed: int = 1337): 192 | """ 193 | This function (ab)uses sklearns train_test_split() for stratified 194 | unlabelling 195 | Args: 196 | dataset_idxs: 197 | y: 198 | num_labels_in_dataset: 199 | seed: 200 | 201 | Returns: 202 | 203 | """ 204 | 205 | if len(dataset_idxs) == num_labels_in_dataset: 206 | return [], dataset_idxs 207 | 208 | unlabelled_idxs, labelled_idxs, _, _ = train_test_split( 209 | dataset_idxs, y, 210 | test_size=num_labels_in_dataset, 211 | random_state=seed, 212 | stratify=y 213 | ) 214 | 215 | return unlabelled_idxs, labelled_idxs 216 | -------------------------------------------------------------------------------- /experiments/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Goschjann/ssltsc/08d6b1bf711bb1c8f19f9bfb66a98d4e423e932e/experiments/.DS_Store -------------------------------------------------------------------------------- /experiments/config_files/fixmatch.yaml: -------------------------------------------------------------------------------- 1 | # 4 different param blocks 2 | 3 | exp_params: 4 | n_steps: 102400 5 | val_steps: 1024 6 | lr_scheduler: cosine 7 | backbone: 'wideresnet28' 8 | model_name: 'fixmatch' 9 | mlflow_id: 1 10 | mlflow_name: 'fixmatch_exps' 11 | early_stopping: True 12 | early_stopping_metric: 'val_weighted_auc' 13 | tuning_criterion: 'val_weighted_auc' 14 | 15 | model_params: 16 | lr: 0.03 17 | weight_decay: 0.0005 18 | warmup_epochs: 0 19 | use_ema: True 20 | ema_decay: 0.999 21 | temp: 1 # pseudolabel temp 22 | threshold: 0.95 # pseudolabel threshold 23 | lambda_u: 1 # unlabelled loss weight 24 | 25 | data_params: 26 | path: '../data/' 27 | dataset: 'cifar10' 28 | model: 'fixmatch' 29 | num_labels: 4000 30 | seed: 1249 31 | da_strategy: 'fixmatch' 32 | batch_size: 64 33 | mu: 7 34 | inference_batch_size: 128 35 | num_workers: 6 36 | N: 3 37 | magnitude: 1 38 | val_size: 100 39 | test_size: 2000 40 | 41 | search_space: 42 | lr: 43 | low: 0.000001 44 | high: 0.01 45 | type: "log" 46 | weight_decay: 47 | low: 0.0000001 48 | high: 0.001 49 | type: "log" 50 | magnitude: 51 | low: 1 52 | high: 10 53 | step: 1 54 | type: "int" 55 | N: 56 | low: 1 57 | high: 6 58 | step: 1 59 | type: "int" -------------------------------------------------------------------------------- /experiments/config_files/ladder.yaml: -------------------------------------------------------------------------------- 1 | # 4 different param blocks 2 | 3 | exp_params: 4 | n_steps: 25000 5 | val_steps: 5000 6 | lr_scheduler: None 7 | backbone: 'ladder' 8 | model_name: 'ladder' 9 | mlflow_id: 1 10 | mlflow_name: 'laddernet' 11 | early_stopping: False 12 | early_stopping_metric: 'val_weighted_auc' 13 | tuning_criterion: 'val_weighted_auc' 14 | 15 | data_params: 16 | path: '../data/' 17 | dataset: 'wisdm' 18 | model: 'ladder' 19 | num_labels: 1000 20 | seed: 1249 21 | K: 1 22 | features: False 23 | da_strategy: None 24 | batch_size: 100 25 | labeled_batch_size: 50 26 | inference_batch_size: 128 27 | standardize: False 28 | normalize: False 29 | scale_overall: True 30 | scale_channelwise: False 31 | sample_supervised: False 32 | num_workers: 0 33 | N: 3 34 | magnitude: 1 35 | val_size: 100 36 | test_size: 2000 37 | 38 | model_params: 39 | lr: 0.0002 40 | weight_decay: 0.0 41 | noise_sd: 0.8 42 | loss_weights: 43 | - 1.0 44 | - 1.0 45 | - 1.0 46 | - 1.0 47 | - 1.0 48 | 49 | search_space: 50 | lr: 51 | low: 0.000001 52 | high: 0.001 53 | step: 0.0005 54 | type: "float" 55 | weight_decay: 56 | low: 0.0 57 | high: 0.001 58 | step: 0.0001 59 | type: "float" 60 | noise_sd: 61 | type: "categorical" 62 | choices: 63 | - 0.1 64 | - 0.3 65 | - 0.45 66 | - 0.6 67 | loss_weights: 68 | length: 5 69 | low: 0.1 70 | high: 10.0 71 | magnitude: 72 | low: 1 73 | high: 10 74 | step: 1 75 | type: "int" 76 | N: 77 | low: 1 78 | high: 6 79 | step: 1 80 | type: "int" -------------------------------------------------------------------------------- /experiments/config_files/logisticregression.yaml: -------------------------------------------------------------------------------- 1 | data_params: 2 | path: '../data/' 3 | dataset: 'pamap2' 4 | model: 'logisticregression' 5 | num_labels: 500 6 | seed: 1249 7 | features: True 8 | val_size: 1000 9 | test_size: 2000 10 | 11 | exp_params: 12 | mlflow_id: 1 13 | mlflow_name: 'tune_ml' 14 | model_name: 'logisticregression' 15 | 16 | model_params: 17 | penalty: "none" 18 | max_iter: 1000 19 | 20 | search_space: 21 | penalty: 22 | type: "categorical" 23 | choices: 24 | - "l1" 25 | - "l2" 26 | - "none" -------------------------------------------------------------------------------- /experiments/config_files/meanteacher.yaml: -------------------------------------------------------------------------------- 1 | # 4 different param blocks 2 | 3 | exp_params: 4 | n_steps: 25000 5 | val_steps: 5000 6 | lr_scheduler: None 7 | backbone: 'FCN' 8 | model_name: 'meanteacher' 9 | mlflow_id: 1 10 | mlflow_name: 'oof' 11 | early_stopping: False 12 | early_stopping_metric: 'val_weighted_auc' 13 | tuning_criterion: 'val_weighted_auc' 14 | 15 | model_params: 16 | rampup_length: 60 17 | max_w: 2 18 | alpha_ema: 0.95 19 | lr: 0.0001 20 | weight_decay: 0.0 21 | 22 | data_params: 23 | path: '../data/' 24 | dataset: 'wisdm' 25 | model: 'meanteacher' 26 | num_labels: 1000 27 | seed: 1249 28 | K: 2 29 | features: False 30 | da_strategy: 'randaug' 31 | batch_size: 64 32 | labeled_batch_size: 16 33 | inference_batch_size: 128 34 | standardize: False 35 | normalize: False 36 | sample_supervised: False 37 | scale_overall: True 38 | scale_channelwise: False 39 | num_workers: 6 40 | N: 3 41 | magnitude: 1 42 | val_size: 100 43 | test_size: 2000 44 | 45 | search_space: 46 | alpha_ema: 47 | low: 0.9 48 | high: 1.0 49 | type: "log" 50 | rampup_length: 51 | low: 2500 52 | high: 25000 53 | step: 2500 54 | type: "int" 55 | max_w: 56 | low: 1 57 | high: 10 58 | step: 1 59 | type: "int" 60 | lr: 61 | low: 0.000001 62 | high: 0.001 63 | type: "log" 64 | weight_decay: 65 | low: 0.0000001 66 | high: 0.001 67 | type: "log" 68 | magnitude: 69 | low: 1 70 | high: 10 71 | step: 1 72 | type: "int" 73 | N: 74 | low: 1 75 | high: 6 76 | step: 1 77 | type: "int" -------------------------------------------------------------------------------- /experiments/config_files/mixmatch.yaml: -------------------------------------------------------------------------------- 1 | # 4 different param blocks 2 | 3 | exp_params: 4 | n_steps: 25000 5 | val_steps: 5000 6 | lr_scheduler: None 7 | backbone: 'FCN' 8 | model_name: 'mixmatch' 9 | mlflow_id: 1 10 | mlflow_name: 'oof' 11 | early_stopping: False 12 | early_stopping_metric: 'val_weighted_auc' 13 | tuning_criterion: 'val_weighted_auc' 14 | 15 | model_params: 16 | alpha: 0.75 17 | K: 2 18 | T: 0.5 19 | rampup_length: 20000 20 | lambda_u: 75 21 | alpha_ema: 0.999 22 | plot_mixup: False 23 | lr: 0.0001 24 | weight_decay: 0.0 25 | 26 | data_params: 27 | path: '../data/' 28 | dataset: 'wisdm' 29 | model: 'mixmatch' 30 | num_labels: 500 31 | seed: 1249 32 | K: 2 33 | features: False 34 | da_strategy: 'randaug' 35 | batch_size: 128 36 | labeled_batch_size: 64 37 | inference_batch_size: 128 38 | standardize: False 39 | normalize: False 40 | scale_overall: True 41 | scale_channelwise: False 42 | sample_supervised: False 43 | num_workers: 6 44 | N: 3 45 | magnitude: 1 46 | val_size: 100 47 | test_size: 2000 48 | 49 | search_space: 50 | alpha: 51 | low: 0.5 52 | high: 1.0 53 | step: 0.05 54 | type: "float" 55 | lambda_u: 56 | low: 0.0 57 | high: 150.0 58 | step: 1.0 59 | type: "float" 60 | rampup_length: 61 | low: 2500 62 | high: 25000 63 | step: 2500 64 | type: "int" 65 | lr: 66 | low: 0.000001 67 | high: 0.001 68 | type: "log" 69 | weight_decay: 70 | low: 0.0000001 71 | high: 0.001 72 | type: "log" 73 | magnitude: 74 | low: 1 75 | high: 10 76 | step: 1 77 | type: "int" 78 | N: 79 | low: 1 80 | high: 6 81 | step: 1 82 | type: "int" -------------------------------------------------------------------------------- /experiments/config_files/randomforest.yaml: -------------------------------------------------------------------------------- 1 | data_params: 2 | path: '../data/' 3 | dataset: 'fordb' 4 | model: 'randomforest' 5 | num_labels: 500 6 | seed: 1249 7 | features: True 8 | val_size: 1000 9 | test_size: 2000 10 | 11 | exp_params: 12 | mlflow_id: 1 13 | mlflow_name: None 14 | model_name: 'randomforest' 15 | 16 | model_params: 17 | n_estimators: 15 18 | max_depth: 3 19 | 20 | search_space: 21 | n_estimators: 22 | low: 10 23 | high: 1000 24 | step: 10 25 | type: "int" 26 | max_depth: 27 | low: 3 28 | high: 25 29 | step: 1 30 | type: "int" -------------------------------------------------------------------------------- /experiments/config_files/selfsupervised.yaml: -------------------------------------------------------------------------------- 1 | # 4 different param blocks 2 | 3 | exp_params: 4 | n_steps: 60 5 | val_steps: 20 6 | lr_scheduler: None 7 | backbone: 'fcnmultitask' 8 | model_name: 'selfsupervised' 9 | mlflow_id: 1 10 | mlflow_name: 'oof' 11 | early_stopping: False 12 | early_stopping_metric: 'val_weighted_auc' 13 | tuning_criterion: 'val_weighted_auc' 14 | 15 | model_params: 16 | mixup: False 17 | lr: 0.0001 18 | weight_decay: 0.00005 19 | lambda: 1.0 20 | horizon: 0.2 21 | stride: 0.3 22 | 23 | data_params: 24 | path: '../data/' 25 | dataset: 'pamap2' 26 | model: 'selfsupervised' 27 | num_labels: 500 28 | seed: 1249 29 | K: 1 30 | features: False 31 | da_strategy: None 32 | batch_size: 64 33 | labeled_batch_size: 16 34 | inference_batch_size: 128 35 | standardize: False 36 | normalize: False 37 | scale_overall: True 38 | scale_channelwise: False 39 | sample_supervised: True 40 | num_workers: 6 41 | fully_labeled: True 42 | N: 3 43 | magnitude: 1 44 | horizon: 0.2 45 | stride: 0.3 46 | val_size: 100 47 | test_size: 2000 48 | 49 | search_space: 50 | lr: 51 | low: 0.000001 52 | high: 0.01 53 | type: "log" 54 | weight_decay: 55 | low: 0.0000001 56 | high: 0.001 57 | type: "log" 58 | horizon: 59 | type: "categorical" 60 | choices: 61 | - 0.1 62 | - 0.2 63 | - 0.3 64 | stride: 65 | type: "categorical" 66 | choices: 67 | - 0.05 68 | - 0.1 69 | - 0.2 70 | - 0.3 -------------------------------------------------------------------------------- /experiments/config_files/supervised.yaml: -------------------------------------------------------------------------------- 1 | # 4 different param blocks 2 | 3 | exp_params: 4 | n_steps: 25000 5 | val_steps: 5000 6 | lr_scheduler: None 7 | backbone: 'FCN' 8 | model_name: 'supervised' 9 | mlflow_id: 1 10 | mlflow_name: 'oof' 11 | early_stopping: False 12 | early_stopping_metric: 'val_weighted_auc' 13 | tuning_criterion: 'val_weighted_auc' 14 | 15 | model_params: 16 | mixup: False 17 | lr: 4.0202387014768895e-06 18 | weight_decay: 1.8605931539339385e-07 19 | 20 | data_params: 21 | path: '../data/' 22 | dataset: 'wisdm' 23 | model: 'supervised' 24 | num_labels: 1000 25 | seed: 1249 26 | K: 1 27 | features: False 28 | da_strategy: 'randaug' 29 | batch_size: 100 30 | inference_batch_size: 128 31 | standardize: False 32 | normalize: False 33 | scale_overall: True 34 | scale_channelwise: False 35 | sample_supervised: True 36 | num_workers: 6 37 | fully_labeled: False 38 | N: 3 39 | magnitude: 1 40 | val_size: 100 41 | test_size: 2000 42 | 43 | search_space: 44 | lr: 45 | low: 0.000001 46 | high: 0.01 47 | type: "log" 48 | weight_decay: 49 | low: 0.0000001 50 | high: 0.001 51 | type: "log" 52 | magnitude: 53 | low: 1 54 | high: 10 55 | step: 1 56 | type: "int" 57 | N: 58 | low: 1 59 | high: 6 60 | step: 1 61 | type: "int" -------------------------------------------------------------------------------- /experiments/config_files/supervised_full.yaml: -------------------------------------------------------------------------------- 1 | # 4 different param blocks 2 | 3 | exp_params: 4 | n_steps: 250 5 | val_steps: 50 6 | lr_scheduler: None 7 | backbone: 'FCN' 8 | model_name: 'supervised' 9 | mlflow_id: 1 10 | mlflow_name: 'oof' 11 | early_stopping: False 12 | early_stopping_metric: 'val_weighted_auc' 13 | tuning_criterion: 'val_weighted_auc' 14 | 15 | model_params: 16 | mixup: False 17 | lr: 0.0001 18 | weight_decay: 0.00005 19 | 20 | data_params: 21 | path: '../data/' 22 | dataset: 'pamap2' 23 | model: 'supervised' 24 | num_labels: 500 25 | seed: 1249 26 | K: 1 27 | features: False 28 | da_strategy: 'randaug' 29 | batch_size: 100 30 | labeled_batch_size: 100 # We have to have labeled_batch_size=batch size as we only need iteration over labeled samples 31 | inference_batch_size: 128 32 | standardize: False 33 | normalize: False 34 | scale_overall: True 35 | scale_channelwise: False 36 | sample_supervised: True 37 | num_workers: 6 38 | fully_labeled: True 39 | N: 3 40 | magnitude: 1 41 | val_size: 1000 42 | test_size: 2000 43 | 44 | search_space: 45 | lr: 46 | low: 0.000001 47 | high: 0.01 48 | type: "log" 49 | weight_decay: 50 | low: 0.0000001 51 | high: 0.001 52 | type: "log" 53 | magnitude: 54 | low: 1 55 | high: 10 56 | step: 1 57 | type: "int" 58 | N: 59 | low: 1 60 | high: 6 61 | step: 1 62 | type: "int" -------------------------------------------------------------------------------- /experiments/config_files/vat.yaml: -------------------------------------------------------------------------------- 1 | # 4 different param blocks 2 | 3 | exp_params: 4 | n_steps: 25000 5 | val_steps: 5000 6 | lr_scheduler: None 7 | backbone: 'FCN' 8 | model_name: 'vat' 9 | mlflow_id: 1 10 | mlflow_name: 'oof' 11 | early_stopping: False 12 | early_stopping_metric: 'val_weighted_auc' 13 | tuning_criterion: 'val_weighted_auc' 14 | 15 | model_params: 16 | xi: 0.0000001 17 | epsilon: 2.5 18 | method: 'vatent' 19 | alpha: 1.0 20 | plot_adversarials: False 21 | lr: 0.0001 22 | weight_decay: 0.0 23 | 24 | data_params: 25 | path: '../data/' 26 | dataset: 'wisdm' 27 | model: 'vat' 28 | num_labels: 1000 29 | seed: 1249 30 | K: 1 31 | features: False 32 | da_strategy: 'randaug' 33 | batch_size: 64 34 | labeled_batch_size: 16 35 | standardize: False 36 | normalize: False 37 | scale_overall: True 38 | scale_channelwise: False 39 | sample_supervised: False 40 | num_workers: 6 41 | N: 3 42 | magnitude: 1 43 | val_size: 100 44 | test_size: 2000 45 | 46 | search_space: 47 | epsilon: 48 | low: 0.1 49 | high: 10.0 50 | step: 0.1 51 | type: "float" 52 | alpha: 53 | low: 0.1 54 | high: 5.0 55 | step: 0.1 56 | type: "float" 57 | lr: 58 | low: 0.000001 59 | high: 0.001 60 | type: "log" 61 | weight_decay: 62 | low: 0.0000001 63 | high: 0.001 64 | type: "log" 65 | magnitude: 66 | low: 1 67 | high: 10 68 | step: 1 69 | type: "int" 70 | N: 71 | low: 1 72 | high: 6 73 | step: 1 74 | type: "int" -------------------------------------------------------------------------------- /experiments/run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import mlflow 3 | 4 | from ssltsc.callbacks import TimerCallback 5 | 6 | import os 7 | import pdb 8 | import time 9 | 10 | from ssltsc.callbacks import ConsoleLoggingCallback, MlflowLoggingCallback, MeanTeacherConsoleLoggingCallback 11 | from ssltsc.architectures.utils import backbone_factory 12 | from ssltsc.data import load_dataloaders 13 | from ssltsc.experiments import get_experiment_id, get_base_argparser, \ 14 | update_config 15 | from ssltsc.models.model_factory import model_factory 16 | 17 | 18 | def parse_args(): 19 | parser = get_base_argparser(description='generic') 20 | args = parser.parse_args() 21 | return args 22 | 23 | 24 | def run_experiment(args): 25 | config = update_config(args) 26 | 27 | experiment_id = get_experiment_id(config=config) 28 | mlflow.start_run(experiment_id=experiment_id) 29 | mlflow.set_tag(key='dataset', value=args.dataset) 30 | mlflow.set_tag(key='run', value='training') 31 | mlflow.set_tag(key='model', value=config['exp_params']['model_name']) 32 | mlflow.log_param(key='model', value=config['exp_params']['model_name']) 33 | 34 | # DATA 35 | data_dict = load_dataloaders(**config['data_params']) 36 | # MODEL 37 | horizon = config['data_params']['horizon'] if "horizon" in config['data_params'] else None 38 | backbone, backbone_dict = backbone_factory(architecture=config['exp_params']['backbone'], 39 | dataset=config['data_params']['dataset'], 40 | horizon=horizon, 41 | n_classes=data_dict['train_data_l'].nclasses, 42 | n_channels=data_dict['train_data_l'].nvariables, 43 | lengthts=data_dict['train_data_l'].length) 44 | 45 | if config['exp_params']['model_name'] == 'meanteacher': 46 | callbacks = [MeanTeacherConsoleLoggingCallback(), MlflowLoggingCallback()] 47 | else: 48 | callbacks = [ConsoleLoggingCallback(), MlflowLoggingCallback(), TimerCallback(verbose=False)] 49 | model = model_factory(model_name=config['exp_params']['model_name'], 50 | backbone=backbone, 51 | backbone_dict=backbone_dict, 52 | callbacks=callbacks) 53 | 54 | # log parameters (except the search space) 55 | for cfg in config.items(): 56 | if cfg[0] != 'search_space': 57 | for k, v in cfg[1].items(): 58 | mlflow.log_param(key=k, value=v) 59 | 60 | opt_dict = {'lr': config['model_params']['lr'], 61 | 'weight_decay': config['model_params']['weight_decay']} 62 | 63 | # halve the labelled batch size in case num_labels < labelled batch size 64 | start_time = time.time() 65 | model.train(opt_dict=opt_dict, 66 | data_dict=data_dict, 67 | model_params=config['model_params'], 68 | exp_params=config['exp_params']) 69 | eta = round(time.time() - start_time, 3) 70 | print(f'Training took {eta} seconds') 71 | # Evaluate the model 72 | model.evaluate(data_loader=data_dict['test_gen'], 73 | early_stopping=config['exp_params']['early_stopping'], 74 | plot_reliability=False, 75 | model_name=config['exp_params']['model_name']) 76 | mlflow.log_metric(key='es_step', value=model.es_step) 77 | 78 | mlflow.end_run() 79 | 80 | 81 | if __name__ == "__main__": 82 | args = parse_args() 83 | run_experiment(args=args) 84 | -------------------------------------------------------------------------------- /experiments/run_baselines.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import numpy as np 3 | import os 4 | import mlflow 5 | import pdb 6 | 7 | from sklearn.ensemble import RandomForestClassifier 8 | from sklearn.linear_model import LogisticRegression 9 | from sklearn.semi_supervised import LabelPropagation, LabelSpreading 10 | 11 | from ssltsc import constants as c 12 | from ssltsc.experiments import get_experiment_id, get_base_argparser, update_config 13 | from ssltsc.models.utils import calculate_classification_metrics 14 | from ssltsc.data import load_dataloaders 15 | 16 | from ssltsc.models.losses import rbf_kernel_safe 17 | 18 | def parse_args(): 19 | parser = get_base_argparser(description='ML baseline on extracted features') 20 | args = parser.parse_args() 21 | return args 22 | 23 | 24 | def run_experiment(args): 25 | config = update_config(args) 26 | 27 | experiment_id = get_experiment_id(config=config) 28 | mlflow.start_run(experiment_id=experiment_id) 29 | mlflow.set_tag(key='dataset', value=args.dataset) 30 | mlflow.set_tag(key='run', value='training') 31 | mlflow.set_tag(key='model', value=config['exp_params']['model_name']) 32 | mlflow.log_param(key='model', value=config['exp_params']['model_name']) 33 | 34 | #DATA 35 | data_dict = load_dataloaders(**config['data_params']) 36 | idx_labelled_train = data_dict['train_gen_l'].batch_sampler.labelled_idxs 37 | idx_unlabelled_train = data_dict['train_gen_l'].batch_sampler.unlabelled_idxs 38 | 39 | X_train, Y_train = data_dict['train_data_l'].x[idx_labelled_train], data_dict['train_data_l'].y[idx_labelled_train] 40 | X_train_ul, Y_train_ul = data_dict['train_data_l'].x[idx_unlabelled_train], data_dict['train_data_l'].y[idx_unlabelled_train] 41 | Y_train_ul = np.full(shape=Y_train_ul.shape, fill_value=-1) 42 | X_test, Y_test = data_dict['test_data'].x, data_dict['test_data'].y 43 | 44 | # build classifier 45 | if config['exp_params']['model_name'] == 'randomforest': 46 | classifier = RandomForestClassifier(n_estimators=config['model_params']['n_estimators'], 47 | max_depth=config['model_params']['max_depth'], 48 | random_state=1) 49 | elif config['exp_params']['model_name'] == 'logisticregression': 50 | classifier = LogisticRegression(penalty=config['model_params']['penalty'], 51 | max_iter=config['model_params']['max_iter'], 52 | random_state=1) 53 | elif config['exp_params']['model_name'] == 'labelpropagation': 54 | # concat labelled/ unlabelled data 55 | X_train = np.concatenate([X_train, X_train_ul]) 56 | Y_train = np.concatenate([Y_train, Y_train_ul]) 57 | classifier = LabelPropagation(gamma=config['model_params']['gamma'], 58 | n_neighbors=config['model_params']['n_neighbors'], 59 | n_jobs=config['model_params']['n_jobs'], 60 | kernel=rbf_kernel_safe if config['model_params']['kernel'] == 'rbf_kernel_safe' else config['model_params']['kernel'], 61 | max_iter=config['model_params']['max_iter']) 62 | elif config['exp_params']['model_name'] == 'labelspreading': 63 | # concat labelled/ unlabelled data 64 | X_train = np.concatenate([X_train, X_train_ul]) 65 | Y_train = np.concatenate([Y_train, Y_train_ul]) 66 | classifier = LabelSpreading(gamma=config['model_params']['gamma'], 67 | alpha=config['model_params']['alpha'], 68 | n_neighbors=config['model_params']['n_neighbors'], 69 | n_jobs=config['model_params']['n_jobs'], 70 | kernel=rbf_kernel_safe if config['model_params']['kernel'] == 'rbf_kernel_safe' else config['model_params']['kernel'], 71 | max_iter=config['model_params']['max_iter']) 72 | 73 | classifier.fit(X=X_train, y=Y_train) 74 | 75 | yhat_prob = classifier.predict_proba(X_test) 76 | final_metrics = calculate_classification_metrics(yhat_prob, Y_test) 77 | 78 | print('Final test acc: {:.4f} w. Auc {:.4f} macro Auc {:.4f} XE {:.4f} microF1 {:.4f}'.format( 79 | final_metrics['accuracy'], final_metrics['weighted_auc'], final_metrics['macro_auc'], 80 | final_metrics['cross_entropy'], final_metrics['micro_f1'])) 81 | 82 | # Append test to the metrics and log to mlflow 83 | final_metrics = {"test_"+metric: v for metric, v in final_metrics.items()} 84 | mlflow.log_metrics(final_metrics) 85 | 86 | # log parameters (except the search space) 87 | for cfg in config.items(): 88 | if cfg[0] != 'search_space': 89 | for k, v in cfg[1].items(): 90 | mlflow.log_param(key=k, value=v) 91 | 92 | mlflow.end_run() 93 | 94 | if __name__ == "__main__": 95 | args = parse_args() 96 | run_experiment(args=args) -------------------------------------------------------------------------------- /experiments/tune.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import mlflow 3 | import optuna 4 | import os 5 | import pdb 6 | import tempfile 7 | 8 | from optuna import Trial, samplers 9 | from ssltsc.callbacks import TuneCallback 10 | from ssltsc.data import load_dataloaders 11 | from ssltsc.architectures.utils import backbone_factory 12 | from ssltsc.experiments import update_config, get_experiment_id, save_as_yaml_file_in_mlflow, get_base_argparser, log_optuna_plots_in_mlflow, save_as_csv_file_in_mlflow, convert_to_tuner_config, convert_to_best_config 13 | from ssltsc.models.supervised import Supervised 14 | from ssltsc.models.vat import VAT 15 | from ssltsc.models.meanteacher import MeanTeacher 16 | from ssltsc.models.mixmatch import MixMatch 17 | from ssltsc.models.model_factory import model_factory 18 | 19 | def parse_args(): 20 | parser = get_base_argparser(description='generic') 21 | parser.add_argument('--n_trials', type=int, default=None, metavar='MU', 22 | help='number of trials (default: 10)') 23 | parser.add_argument('--time_budget', type=int, default=None, metavar='MU', 24 | help='time budget in sec (default: 1000)') 25 | parser.add_argument('--reduction_factor', type=int, default=3, metavar='MU', 26 | help='reduction factor (default: 3)') 27 | parser.add_argument('--no_pruner', default=False, action='store_true', 28 | help='set flag to stop pruning. ' 29 | 'hyperband pruning used by default') 30 | return parser.parse_args() 31 | 32 | def run_experiment(args): 33 | # overwrite args in the config file via command line arguments 34 | #FIXME: only use config arguments in the following, no args. anywhere 35 | configuration = update_config(args) 36 | 37 | # get mlflow loggers straight 38 | experiment_id = get_experiment_id(config=configuration) 39 | mlflow.start_run(experiment_id=experiment_id) 40 | 41 | for param, val in configuration['data_params'].items(): 42 | mlflow.log_param(param, val) 43 | 44 | mlflow.set_tag(key='dataset', value=args.dataset) 45 | mlflow.set_tag(key='run', value='tuning') 46 | mlflow.set_tag(key='model', value=configuration['exp_params']['model_name']) 47 | save_as_yaml_file_in_mlflow(configuration, "tune_config.yaml") 48 | 49 | def objective(trial: Trial): 50 | #FIXME: pass the configuration file directly to the function 51 | # might be impossible because of 52 | print("Running trial #{}".format(trial.number)) 53 | 54 | # sample the to be tuned params from the search space in the configuration file 55 | # and convert them to the optuna specific format 56 | tuner_config = convert_to_tuner_config(configuration, trial) 57 | 58 | # include params for data augmentation in tuning 59 | if 'stride' in tuner_config['model_params'].keys(): 60 | tuner_config['data_params']['stride'] = tuner_config['model_params']['stride'] 61 | if 'horizon' in tuner_config['model_params'].keys(): 62 | tuner_config['data_params']['horizon'] = tuner_config['model_params']['horizon'] 63 | 64 | data_dict = load_dataloaders(**tuner_config['data_params']) 65 | print("Data loaded") 66 | 67 | # Load architecture 68 | backbone, backbone_dict = backbone_factory(architecture=configuration['exp_params']['backbone'], 69 | dataset=configuration['data_params']['dataset'], 70 | n_classes=data_dict['train_data_l'].nclasses, 71 | n_channels=data_dict['train_data_l'].nvariables, 72 | lengthts=data_dict['train_data_l'].length, 73 | horizon=tuner_config['model_params']['horizon'] if 'horizon' in tuner_config['model_params'].keys() else None) 74 | 75 | callbacks = [TuneCallback(trial=trial, tuning_criterion=configuration['exp_params']['tuning_criterion'])] 76 | model = model_factory(model_name=configuration['exp_params']['model_name'], 77 | backbone=backbone, 78 | backbone_dict=backbone_dict, 79 | callbacks=callbacks) 80 | 81 | opt_dict = {'lr': tuner_config['model_params']['lr'], 82 | 'weight_decay': tuner_config['model_params']['weight_decay']} 83 | 84 | model.train(opt_dict=opt_dict, data_dict=data_dict, 85 | model_params=tuner_config['model_params'], 86 | exp_params=tuner_config['exp_params']) 87 | 88 | return model.history[configuration['exp_params']['tuning_criterion']].iloc[-1] 89 | 90 | if args.no_pruner: 91 | pruner = optuna.pruners.NopPruner() 92 | else: 93 | pruner = optuna.pruners.HyperbandPruner( 94 | min_resource=configuration['exp_params']['val_steps'], 95 | max_resource=configuration['exp_params']['n_steps'], 96 | reduction_factor=args.reduction_factor 97 | ) 98 | 99 | # Use the objective for optuna 100 | study = optuna.create_study(direction='maximize', 101 | pruner=pruner, 102 | sampler=samplers.RandomSampler()) 103 | #sampler=samplers.TPESampler()) 104 | study.optimize(objective, n_trials=args.n_trials, timeout=args.time_budget) 105 | 106 | # store tuning results 107 | df = study.trials_dataframe().sort_values(by='value', ascending=False) 108 | 109 | # to be sure to store the final best hpc setting 110 | best_config = convert_to_best_config(configuration, study.best_params) 111 | save_as_yaml_file_in_mlflow(best_config, 'best_' + "config.yaml") 112 | 113 | # store optuna history in mlflow 114 | save_as_csv_file_in_mlflow(data=df, filename='optuna_history.csv') 115 | 116 | # get the best out of three 117 | df = df.iloc[:3, :] 118 | 119 | for idx, storage_name in zip(range(3), ['first_best_', 'second_best_', 'third_best_']): 120 | param_names = ['params_' + param for param in [*configuration['search_space']]] 121 | foo = dict(df.filter(param_names).iloc[idx, :]) 122 | hpc = {k.split('_')[1]: float(v) for k, v in foo.items()} 123 | best_config = convert_to_best_config(configuration, hpc) 124 | save_as_yaml_file_in_mlflow(best_config, storage_name + "config.yaml") 125 | 126 | log_optuna_plots_in_mlflow(study=study) 127 | 128 | mlflow.end_run() 129 | 130 | if __name__ == "__main__": 131 | args = parse_args() 132 | run_experiment(args) 133 | -------------------------------------------------------------------------------- /experiments/tune_baselines.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import mlflow 3 | import optuna 4 | import os 5 | import pdb 6 | import tempfile 7 | import numpy as np 8 | 9 | from optuna import Trial, samplers 10 | from ssltsc.callbacks import TuneCallback 11 | from ssltsc.data import load_dataloaders 12 | from ssltsc.architectures.utils import backbone_factory 13 | from ssltsc.experiments import update_config, get_experiment_id, save_as_yaml_file_in_mlflow, get_base_argparser, log_optuna_plots_in_mlflow, save_as_csv_file_in_mlflow, convert_to_tuner_config, convert_to_best_config 14 | from ssltsc.models.supervised import Supervised 15 | from ssltsc.models.vat import VAT 16 | from ssltsc.models.meanteacher import MeanTeacher 17 | from ssltsc.models.mixmatch import MixMatch 18 | from ssltsc.models.model_factory import model_factory 19 | from ssltsc.models.utils import calculate_classification_metrics 20 | 21 | from sklearn.ensemble import RandomForestClassifier 22 | from sklearn.linear_model import LogisticRegression 23 | from sklearn.metrics import roc_auc_score 24 | from sklearn.semi_supervised import LabelPropagation, LabelSpreading 25 | from ssltsc.models.losses import rbf_kernel_safe 26 | 27 | def parse_args(): 28 | parser = get_base_argparser(description='ml model tuner') 29 | parser.add_argument('--n_trials', type=int, default=50, 30 | help='amound of random search model evals (default: 10)') 31 | return parser.parse_args() 32 | 33 | def run_experiment(args): 34 | # overwrite args in the config file via command line arguments 35 | configuration = update_config(args) 36 | 37 | # get mlflow loggers straight 38 | experiment_id = get_experiment_id(config=configuration) 39 | mlflow.start_run(experiment_id=experiment_id) 40 | 41 | for param, val in configuration['data_params'].items(): 42 | mlflow.log_param(param, val) 43 | 44 | mlflow.set_tag(key='dataset', value=args.dataset) 45 | mlflow.set_tag(key='run', value='tuning') 46 | mlflow.set_tag(key='model', value=configuration['exp_params']['model_name']) 47 | save_as_yaml_file_in_mlflow(configuration, "tune_config.yaml") 48 | 49 | data_dict = load_dataloaders(**configuration['data_params']) 50 | print("Data loaded") 51 | 52 | def objective(trial: Trial): 53 | print("Running trial #{}".format(trial.number)) 54 | 55 | #DATA 56 | idx_labelled_train = data_dict['train_gen_l'].batch_sampler.labelled_idxs 57 | idx_unlabelled_train = data_dict['train_gen_l'].batch_sampler.unlabelled_idxs 58 | 59 | X_train, Y_train = data_dict['train_data_l'].x[idx_labelled_train], data_dict['train_data_l'].y[idx_labelled_train] 60 | X_train_ul, Y_train_ul = data_dict['train_data_l'].x[idx_unlabelled_train], data_dict['train_data_l'].y[idx_unlabelled_train] 61 | Y_train_ul = np.full(shape=Y_train_ul.shape, fill_value=-1) 62 | X_val, Y_val = data_dict['val_data'].x, data_dict['val_data'].y 63 | 64 | 65 | # update config and suggest hyperpars 66 | tuner_config = convert_to_tuner_config(configuration, trial) 67 | if tuner_config['exp_params']['model_name'] == 'randomforest': 68 | classifier = RandomForestClassifier(n_estimators=tuner_config['model_params']['n_estimators'], 69 | max_depth=tuner_config['model_params']['max_depth'], 70 | random_state=1) 71 | elif tuner_config['exp_params']['model_name'] == 'logisticregression': 72 | classifier = LogisticRegression(penalty=tuner_config['model_params']['penalty'], 73 | # solver='liblinear', 74 | max_iter=tuner_config['model_params']['max_iter'], 75 | random_state=1) 76 | elif tuner_config['exp_params']['model_name'] == 'labelpropagation': 77 | # concat labelled/ unlabelled data 78 | X_train = np.concatenate([X_train, X_train_ul]) 79 | Y_train = np.concatenate([Y_train, Y_train_ul]) 80 | classifier = LabelPropagation(gamma=tuner_config['model_params']['gamma'], 81 | n_neighbors=tuner_config['model_params']['n_neighbors'], 82 | n_jobs=tuner_config['model_params']['n_jobs'], 83 | kernel=rbf_kernel_safe, 84 | max_iter=tuner_config['model_params']['max_iter']) 85 | elif tuner_config['exp_params']['model_name'] == 'labelspreading': 86 | # concat labelled/ unlabelled data 87 | X_train = np.concatenate([X_train, X_train_ul]) 88 | Y_train = np.concatenate([Y_train, Y_train_ul]) 89 | classifier = LabelSpreading(gamma=tuner_config['model_params']['gamma'], 90 | alpha=tuner_config['model_params']['alpha'], 91 | n_neighbors=tuner_config['model_params']['n_neighbors'], 92 | n_jobs=tuner_config['model_params']['n_jobs'], 93 | kernel=rbf_kernel_safe if tuner_config['model_params']['kernel'] == 'rbf_kernel_safe' else tuner_config['model_params']['kernel'], 94 | max_iter=tuner_config['model_params']['max_iter']) 95 | classifier.fit(X=X_train, y=Y_train) 96 | 97 | # validate 98 | Y_hat = classifier.predict_proba(X=X_val) 99 | metrics = calculate_classification_metrics(Y_hat, Y_val) 100 | 101 | return metrics['weighted_auc'] 102 | 103 | # Use the objective for optuna 104 | study = optuna.create_study(direction='maximize', 105 | #pruner=pruner, 106 | sampler=samplers.RandomSampler()) 107 | study.optimize(objective, n_trials=args.n_trials) 108 | 109 | # store tuning results 110 | df = study.trials_dataframe().sort_values(by='value', ascending=False) 111 | 112 | # to be sure to store the final best hpc setting 113 | best_config = convert_to_best_config(configuration, study.best_params) 114 | save_as_yaml_file_in_mlflow(best_config, 'best_' + "config.yaml") 115 | 116 | # store optuna history in mlflow 117 | save_as_csv_file_in_mlflow(data=df, filename='optuna_history.csv') 118 | 119 | # get the best out of three 120 | df = df.iloc[:3, :] 121 | 122 | for idx, storage_name in zip(range(3), ['bbest_', 'second_best_', 'third_best_']): 123 | param_names = ['params_' + param for param in [*configuration['search_space']]] 124 | foo = dict(df.filter(param_names).iloc[idx, :]) 125 | # pdb.set_trace() 126 | # hpc = {k.split('_')[1]: float(v) for k, v in foo.items()} 127 | hpc = {k.split('_')[1]: v for k, v in foo.items()} 128 | best_config = convert_to_best_config(configuration, hpc) 129 | save_as_yaml_file_in_mlflow(best_config, storage_name + "config.yaml") 130 | 131 | log_optuna_plots_in_mlflow(study=study) 132 | 133 | mlflow.end_run() 134 | 135 | if __name__ == "__main__": 136 | args = parse_args() 137 | run_experiment(args) 138 | -------------------------------------------------------------------------------- /figures/results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Goschjann/ssltsc/08d6b1bf711bb1c8f19f9bfb66a98d4e423e932e/figures/results.png -------------------------------------------------------------------------------- /figures/uml_ssltsc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Goschjann/ssltsc/08d6b1bf711bb1c8f19f9bfb66a98d4e423e932e/figures/uml_ssltsc.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.18.1 2 | cython==0.29.21 3 | numba==0.48.0 4 | torchvision==0.5.0 5 | scipy==1.4.1 6 | uncertainty_metrics==0.0.81 7 | tsfresh==0.14.1 8 | tslearn>=0.3.0 9 | mlflow==1.6.0 10 | optuna==2.0.0 11 | matplotlib==3.1.1 12 | plotly==4.9.0 13 | sktime>=0.3.0 14 | pandas==1.0.1 15 | fastcore==0.1.17 16 | torch==1.4.0 17 | pyunpack>=0.1.2 18 | library==0.0.0 19 | PyYAML>5.4 20 | scikit_learn==0.23.2 21 | fastcore>=1.3.0 -------------------------------------------------------------------------------- /results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Goschjann/ssltsc/08d6b1bf711bb1c8f19f9bfb66a98d4e423e932e/results.png -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | setuptools.setup( 4 | name="ssltsc", 5 | version="0.0.1", 6 | author="Jann Goschenhofer", 7 | author_email="jann.goschenhofer@stat.uni-muenchen.de", 8 | description="package for ssl on tsc", 9 | url="none.com", 10 | classifiers=[ 11 | "Programming Language :: Python :: 3", 12 | "License :: OSI Approved :: MIT License", 13 | "Operating System :: OS Independent", 14 | ], 15 | packages=setuptools.find_packages(), 16 | python_requires='>=3.6', 17 | zip_safe=False 18 | ) 19 | -------------------------------------------------------------------------------- /ssltsc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Goschjann/ssltsc/08d6b1bf711bb1c8f19f9bfb66a98d4e423e932e/ssltsc/__init__.py -------------------------------------------------------------------------------- /ssltsc/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Goschjann/ssltsc/08d6b1bf711bb1c8f19f9bfb66a98d4e423e932e/ssltsc/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /ssltsc/__pycache__/constants.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Goschjann/ssltsc/08d6b1bf711bb1c8f19f9bfb66a98d4e423e932e/ssltsc/__pycache__/constants.cpython-36.pyc -------------------------------------------------------------------------------- /ssltsc/__pycache__/experiments.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Goschjann/ssltsc/08d6b1bf711bb1c8f19f9bfb66a98d4e423e932e/ssltsc/__pycache__/experiments.cpython-36.pyc -------------------------------------------------------------------------------- /ssltsc/__pycache__/postprocessing.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Goschjann/ssltsc/08d6b1bf711bb1c8f19f9bfb66a98d4e423e932e/ssltsc/__pycache__/postprocessing.cpython-36.pyc -------------------------------------------------------------------------------- /ssltsc/__pycache__/visualization.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Goschjann/ssltsc/08d6b1bf711bb1c8f19f9bfb66a98d4e423e932e/ssltsc/__pycache__/visualization.cpython-36.pyc -------------------------------------------------------------------------------- /ssltsc/architectures/FCN_tsai.py: -------------------------------------------------------------------------------- 1 | # Architecture and Code recycled from: 2 | # https://github.com/timeseriesAI/tsai/blob/master/tsai/models/FCN.py 3 | 4 | import torch 5 | from torch import nn 6 | import torch.nn.functional as F 7 | 8 | from .layers_utils import * 9 | 10 | 11 | class FCN(nn.Module): 12 | def __init__(self, c_in, c_out, layers=[128, 256, 128], kss=[7, 5, 3]): 13 | super(FCN, self).__init__() 14 | assert len(layers) == len(kss) 15 | self.convblock1 = ConvBlock(c_in, layers[0], kss[0]) 16 | self.convblock2 = ConvBlock(layers[0], layers[1], kss[1]) 17 | self.convblock3 = ConvBlock(layers[1], layers[2], kss[2]) 18 | self.gap = GAP1d(1) 19 | self.fc = nn.Linear(layers[-1], c_out) 20 | 21 | def forward(self, x): 22 | x = self.convblock1(x) 23 | x = self.convblock2(x) 24 | x = self.convblock3(x) 25 | x = self.gap(x) 26 | return self.fc(x) 27 | -------------------------------------------------------------------------------- /ssltsc/architectures/InceptionTime.py: -------------------------------------------------------------------------------- 1 | # Architecture and Code taken from: 2 | # https://github.com/timeseriesAI/tsai/blob/master/tsai/models/InceptionTime.py 3 | 4 | import torch 5 | from torch import nn 6 | 7 | from .layers_utils import * 8 | 9 | class InceptionModule(nn.Module): 10 | def __init__(self, ni, nf, ks=40, bottleneck=True): 11 | super(InceptionModule, self).__init__() 12 | ks = [ks // (2**i) for i in range(3)] 13 | ks = [k if k % 2 != 0 else k - 1 for k in ks] # ensure odd ks 14 | bottleneck = bottleneck if ni > 1 else False 15 | self.bottleneck = Conv1d(ni, nf, 1, bias=False) if bottleneck else noop 16 | self.convs = nn.ModuleList([Conv1d(nf if bottleneck else ni, nf, k, bias=False) for k in ks]) 17 | self.maxconvpool = nn.Sequential(*[nn.MaxPool1d(3, stride=1, padding=1), Conv1d(ni, nf, 1, bias=False)]) 18 | self.concat = Concat() 19 | self.bn = BN1d(nf * 4) 20 | self.act = nn.ReLU() 21 | 22 | def forward(self, x): 23 | input_tensor = x 24 | x = self.bottleneck(input_tensor) 25 | x = self.concat([l(x) for l in self.convs] + [self.maxconvpool(input_tensor)]) 26 | return self.act(self.bn(x)) 27 | 28 | 29 | class InceptionBlock(nn.Module): 30 | def __init__(self, ni, nf=32, residual=True, depth=6, **kwargs): 31 | super(InceptionBlock, self).__init__() 32 | self.residual, self.depth = residual, depth 33 | self.inception, self.shortcut = nn.ModuleList(), nn.ModuleList() 34 | for d in range(depth): 35 | self.inception.append(InceptionModule(ni if d == 0 else nf * 4, nf, **kwargs)) 36 | if self.residual and d % 3 == 2: 37 | n_in, n_out = ni if d == 2 else nf * 4, nf * 4 38 | self.shortcut.append(BN1d(n_in) if n_in == n_out else ConvBlock(n_in, n_out, 1, act=None)) 39 | self.add = Add() 40 | self.act = nn.ReLU() 41 | 42 | def forward(self, x): 43 | res = x 44 | for d, l in enumerate(range(self.depth)): 45 | x = self.inception[d](x) 46 | if self.residual and d % 3 == 2: res = x = self.act(self.add(x, self.shortcut[d//3](res))) 47 | return x 48 | 49 | 50 | class InceptionTime(nn.Module): 51 | def __init__(self, c_in, c_out, nf=32, nb_filters=None, **kwargs): 52 | super(InceptionTime, self).__init__() 53 | nf = ifnone(nf, nb_filters) 54 | self.inceptionblock = InceptionBlock(c_in, nf, **kwargs) 55 | self.gap = GAP1d(1) 56 | self.fc = nn.Linear(nf * 4, c_out) 57 | 58 | def forward(self, x): 59 | x = self.inceptionblock(x) 60 | x = self.gap(x) 61 | x = self.fc(x) 62 | return x 63 | -------------------------------------------------------------------------------- /ssltsc/architectures/ResCNN.py: -------------------------------------------------------------------------------- 1 | # Architecture and Code taken from: 2 | # https://github.com/timeseriesAI/tsai/blob/master/tsai/models/ResCNN.py 3 | 4 | import torch 5 | from torch import nn 6 | import torch.nn.functional as F 7 | 8 | from .layers_utils import * 9 | 10 | 11 | class ResCNNBlock(nn.Module): 12 | def __init__(self, ni, nf, kss=[7, 5, 3], coord=False, separable=False, zero_norm=False): 13 | super(ResCNNBlock, self).__init__() 14 | self.convblock1 = ConvBlock(ni, nf, kss[0], coord=coord, separable=separable) 15 | self.convblock2 = ConvBlock(nf, nf, kss[1], coord=coord, separable=separable) 16 | self.convblock3 = ConvBlock(nf, nf, kss[2], act=None, coord=coord, separable=separable, zero_norm=zero_norm) 17 | 18 | # expand channels for the sum if necessary 19 | self.shortcut = ConvBN(ni, nf, 1, coord=coord) 20 | self.add = Add() 21 | self.act = nn.ReLU() 22 | 23 | def forward(self, x): 24 | res = x 25 | x = self.convblock1(x) 26 | x = self.convblock2(x) 27 | x = self.convblock3(x) 28 | x = self.add(x, self.shortcut(res)) 29 | x = self.act(x) 30 | return x 31 | 32 | 33 | class ResCNN(nn.Module): 34 | def __init__(self, c_in, c_out, coord=False, separable=False, zero_norm=False): 35 | super(ResCNN, self).__init__() 36 | nf = 64 37 | self.block1 = ResCNNBlock(c_in, nf, kss=[7, 5, 3], coord=coord, separable=separable, zero_norm=zero_norm) 38 | self.block2 = ConvBlock(nf, nf * 2, 3, coord=coord, separable=separable, act=nn.LeakyReLU, act_kwargs={'negative_slope':.2}) 39 | self.block3 = ConvBlock(nf * 2, nf * 4, 3, coord=coord, separable=separable, act=nn.PReLU) 40 | self.block4 = ConvBlock(nf * 4, nf * 2, 3, coord=coord, separable=separable, act=nn.ELU, act_kwargs={'alpha':.3}) 41 | self.gap = nn.AdaptiveAvgPool1d(1) 42 | self.squeeze = Squeeze(-1) 43 | self.lin = nn.Linear(nf * 2, c_out) 44 | 45 | def forward(self, x): 46 | x = self.block1(x) 47 | x = self.block2(x) 48 | x = self.block3(x) 49 | x = self.block4(x) 50 | x = self.squeeze(self.gap(x)) 51 | return self.lin(x) 52 | -------------------------------------------------------------------------------- /ssltsc/architectures/ResNet.py: -------------------------------------------------------------------------------- 1 | # Architecture and Code taken from: 2 | # https://github.com/timeseriesAI/tsai/blob/master/tsai/models/ResNet.py 3 | 4 | import torch 5 | from torch import nn 6 | import torch.nn.functional as F 7 | 8 | from .layers_utils import * 9 | 10 | 11 | class ResBlock(nn.Module): 12 | def __init__(self, ni, nf, kss=[7, 5, 3]): 13 | super(ResBlock, self).__init__() 14 | self.convblock1 = ConvBlock(ni, nf, kss[0]) 15 | self.convblock2 = ConvBlock(nf, nf, kss[1]) 16 | self.convblock3 = ConvBlock(nf, nf, kss[2], act=None) 17 | 18 | # expand channels for the sum if necessary 19 | self.shortcut = BN1d(ni) if ni == nf else ConvBlock(ni, nf, 1, act=None) 20 | self.add = Add() 21 | self.act = nn.ReLU() 22 | 23 | def forward(self, x): 24 | res = x 25 | x = self.convblock1(x) 26 | x = self.convblock2(x) 27 | x = self.convblock3(x) 28 | x = self.add(x, self.shortcut(res)) 29 | x = self.act(x) 30 | return x 31 | 32 | 33 | class ResNet(nn.Module): 34 | def __init__(self, c_in, c_out): 35 | super(ResNet, self).__init__() 36 | nf = 64 37 | kss = [7, 5, 3] 38 | self.resblock1 = ResBlock(c_in, nf, kss=kss) 39 | self.resblock2 = ResBlock(nf, nf * 2, kss=kss) 40 | self.resblock3 = ResBlock(nf * 2, nf * 2, kss=kss) 41 | self.gap = nn.AdaptiveAvgPool1d(1) 42 | self.squeeze = Squeeze(-1) 43 | self.fc = nn.Linear(nf * 2, c_out) 44 | 45 | def forward(self, x): 46 | x = self.resblock1(x) 47 | x = self.resblock2(x) 48 | x = self.resblock3(x) 49 | x = self.squeeze(self.gap(x)) 50 | return self.fc(x) 51 | -------------------------------------------------------------------------------- /ssltsc/architectures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Goschjann/ssltsc/08d6b1bf711bb1c8f19f9bfb66a98d4e423e932e/ssltsc/architectures/__init__.py -------------------------------------------------------------------------------- /ssltsc/architectures/cnn_lstm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | class CNNLSTM(nn.Module): 5 | 6 | def __init__(self, dropout=0.2, kernel=4, filters=200, td_layer="BILSTM", num_classes=15, 7 | length_ts=64, c_in=13): 8 | super(CNNLSTM, self).__init__() 9 | self.td_layer = td_layer 10 | print('Build IMU with ' + self.td_layer + '...') 11 | self.feature_extraction = nn.Sequential( 12 | nn.Conv1d(in_channels=c_in, out_channels=filters, kernel_size=kernel, padding='same'), 13 | nn.ReLU(), 14 | nn.MaxPool1d(kernel_size=2), 15 | nn.BatchNorm1d(filters), 16 | nn.Dropout(dropout), 17 | nn.Conv1d(in_channels=filters, out_channels=filters, kernel_size=kernel, padding='same'), 18 | nn.ReLU(), 19 | nn.MaxPool1d(kernel_size=2), 20 | nn.BatchNorm1d(filters), 21 | nn.Dropout(dropout)) 22 | 23 | 24 | if self.td_layer == "LSTM": 25 | self.model = nn.LSTM(input_size=filters, hidden_size=100, bidirectional=False) 26 | self.model_act = nn.ReLU6() 27 | out_features_model = int(100 * ((length_ts / 2) / 2)) 28 | # td = layers.LSTM(units=100, activation=tf.nn.relu6, return_sequences=True)(drop2) 29 | elif self.td_layer == "BILSTM": 30 | # td = layers.Bidirectional(layers.LSTM(units=60, activation="relu", return_sequences=True))(drop2) 31 | self.model = nn.LSTM(input_size=filters, hidden_size=60, bidirectional=True) 32 | self.model_act = nn.Tanh() 33 | out_features_model = int(120 * ((length_ts / 2) / 2)) 34 | else: 35 | raise ValueError("Not implemented Layer: " + str(td_layer)) 36 | 37 | self.classifier = nn.Sequential( 38 | nn.Linear(in_features=out_features_model, out_features=100), 39 | nn.BatchNorm1d(100), 40 | nn.Linear(in_features=100, out_features=200), 41 | nn.Linear(in_features=200, out_features=num_classes) 42 | ) 43 | 44 | def forward(self, x, probits=False): 45 | x = self.feature_extraction(x) 46 | # for name, param in self.named_parameters(): 47 | # if param.requires_grad: 48 | # print (name, param.data) 49 | # print("nonzero inputs: ", torch.sum(x!= 0)) 50 | if (self.td_layer == "LSTM") or (self.td_layer == "BILSTM"): 51 | x = self.model_act(self.model(x.permute(0,2,1))[0]) 52 | x = x.flatten(1) 53 | else: 54 | selfmodelx = self.model(x) 55 | # nonzeroweights = torch.sum(selfmodelx != 0) 56 | # print("nonzero weights after tempconvnet: ", nonzeroweights) 57 | x = self.model_act(selfmodelx) 58 | # print("nonzero weights lost after relu(tempconvnet): ", nonzeroweights - torch.sum(x != 0)) 59 | x = x.flatten(1) 60 | 61 | logits = self.classifier(x) 62 | 63 | if not probits: 64 | return logits 65 | else: 66 | return torch.nn.functional.softmax(logits, dim=1) 67 | 68 | if __name__ == "__main__": 69 | model = CNNLSTM(0.2, 4, 200, td_layer="BILSTM", num_classes=60, c_in=13) 70 | print(model) -------------------------------------------------------------------------------- /ssltsc/architectures/convlarge.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from ssltsc.architectures.ladder_utils import add_gaussian_noise, Combinator2d 5 | 6 | 7 | class ConvLayer(nn.Module): 8 | def __init__(self, in_channels, out_channels, padding=0, kernel_size=3): 9 | super(ConvLayer, self).__init__() 10 | 11 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, padding=padding, stride=1) 12 | 13 | # The easy implementation using standard affine. We could 14 | # use the affine from Eq. 10 in Pezeshki et al. (2016) 15 | self.bn = nn.BatchNorm2d(num_features=out_channels, affine=True) 16 | 17 | def forward(self, x): 18 | z_pre = self.conv(x) 19 | z = self.bn(z_pre) 20 | return z, z_pre 21 | 22 | 23 | class PoolLayer(nn.Module): 24 | def __init__(self, kernel_size, channels, padding=0): 25 | super(PoolLayer, self).__init__() 26 | 27 | self.pool = nn.MaxPool2d(kernel_size, padding=padding) 28 | self.bn = nn.BatchNorm2d(num_features=channels) 29 | 30 | def forward(self, x): 31 | z_pre = self.pool(x) 32 | z = self.bn(z_pre) 33 | return z, z_pre 34 | 35 | 36 | class ConvLarge(nn.Module): 37 | def __init__(self, n_classes=3, channels=2, return_hidden_states=False, verbose=True): 38 | """ 39 | 40 | Args: 41 | n_classes: 42 | channels: 43 | return_hidden_states: 44 | If the architecture is used in a Ladder architecture we need 45 | to return hidden states 46 | verbose: 47 | """ 48 | super(ConvLarge, self).__init__() 49 | self.verbose = verbose 50 | self.n_classes = n_classes 51 | self.channels = channels 52 | self.return_hidden_states = return_hidden_states 53 | 54 | k = 3 # The default kernel size 55 | 56 | self.layers = nn.ModuleList() 57 | self.layers.append(ConvLayer(in_channels=self.channels, out_channels=96)) # Padding valid. Out shape (bs, 96, 30, 30) 58 | self.layers.append(ConvLayer(in_channels=96, out_channels=96, padding=k - 1)) # Padding full. Out shape (bs, 96, 32, 32) 59 | self.layers.append(ConvLayer(in_channels=96, out_channels=96, padding=k - 1)) # Padding full. Out shape (bs, 96, 34, 34) 60 | 61 | self.layers.append(PoolLayer(kernel_size=2, channels=96)) # Out shape (bs, 96, 17, 17) 62 | 63 | self.layers.append(ConvLayer(in_channels=96, out_channels=192)) # Padding valid. Out shape (bs, 192, 15, 15) 64 | self.layers.append(ConvLayer(in_channels=192, out_channels=192, padding=k - 1)) # Padding full. Out shape (bs, 192, 17, 17) 65 | self.layers.append(ConvLayer(in_channels=192, out_channels=192)) # Padding valid. Out shape (bs, 192, 15, 15) 66 | 67 | self.layers.append(PoolLayer(kernel_size=2, channels=192, padding=1)) # Original code uses downsize=2. Out shape (bs, 192, 8, 8) 68 | 69 | self.layers.append(ConvLayer(in_channels=192, out_channels=192)) # Padding valid. Out shape (bs, 192, 6, 6) 70 | self.layers.append(ConvLayer(in_channels=192, out_channels=192, kernel_size=1)) # Padding valid. Out shape (bs, 192, 6, 6) 71 | self.layers.append(ConvLayer(in_channels=192, out_channels=self.n_classes, kernel_size=1)) # Padding valid. Out shape (bs, 10, 6, 6) 72 | 73 | self.n_lateral_connections = len(self.layers) + 2 # 13 74 | # send the modules to cuda if gpu is present 75 | for m in self.modules(): 76 | if torch.cuda.is_available(): 77 | m.to(torch.device('cuda')) 78 | 79 | def forward(self, x, noise_sd=0.0): 80 | x = add_gaussian_noise(x, sd=noise_sd) if noise_sd else x 81 | h = x 82 | 83 | layers_z = [x] # The list of calculated z for each layer 84 | batch_means = [x.mean()] 85 | batch_std = [x.std()] 86 | 87 | for l_idx, layer in enumerate(self.layers): 88 | z, z_pre = layer(h) # h is set in the previous iteration 89 | 90 | z = add_gaussian_noise(z, sd=noise_sd) if noise_sd else z 91 | layers_z.append(z) 92 | 93 | h = nn.functional.leaky_relu(z) 94 | 95 | if not noise_sd: # The mean and sd are only needed for clean pass 96 | batch_means.append(z_pre.mean()) 97 | batch_std.append(z_pre.std()) 98 | 99 | # Global average pool is the same as the mean of the last two dimensions 100 | global_avg_pool = h.mean([2, 3]) 101 | 102 | layers_z.append(global_avg_pool.unsqueeze(2).unsqueeze(3)) 103 | batch_means.append(global_avg_pool.mean()) 104 | batch_std.append(global_avg_pool.std()) 105 | 106 | assert len(self.layers) + 2 == len(layers_z) 107 | 108 | if not self.return_hidden_states: 109 | return global_avg_pool 110 | 111 | if noise_sd: # Then we will return z_hats and no mean and sd 112 | return global_avg_pool, layers_z 113 | 114 | return global_avg_pool, layers_z, \ 115 | batch_means, batch_std # Ie. not the softmax but the logits 116 | 117 | 118 | class DecodeLayer(nn.Module): 119 | def __init__(self, encode_layer=None, map_size=0): 120 | super(DecodeLayer, self).__init__() 121 | 122 | out_channels = encode_layer.conv.in_channels 123 | kernel_size = encode_layer.conv.kernel_size 124 | in_channels = encode_layer.conv.out_channels 125 | padding = encode_layer.conv.padding 126 | 127 | self.trans_conv = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, padding=padding) 128 | 129 | self.bn = nn.BatchNorm2d(num_features=out_channels) 130 | self.combinator = Combinator2d(n_channels=out_channels, length=map_size) 131 | 132 | def forward(self, z_tilde, z_hat): 133 | u = self.trans_conv(z_hat) # Eq. 11 in Pezeshki et al. (2015) 134 | u = self.bn(u) # Eq 12-14 in Pezeshki et al. (2015) 135 | 136 | new_z_hat = self.combinator(z_tilde, u) # Eq 15 in Pezeshki et al. (2015) 137 | 138 | return new_z_hat 139 | 140 | 141 | class UpsampleLayer(nn.Module): 142 | """ 143 | From Rasmus et al. (2015): "the downsampling of the pooling on the encoder 144 | side is compensated for by upsampling with copying on the decoder side." 145 | """ 146 | def __init__(self, out_channels, feature_map_size): 147 | super(UpsampleLayer, self).__init__() 148 | 149 | self.upsample = nn.Upsample(feature_map_size) 150 | self.bn = nn.BatchNorm2d(num_features=out_channels) 151 | self.combinator = Combinator2d(n_channels=out_channels, 152 | length=feature_map_size) 153 | 154 | def forward(self, z_tilde, z_hat): 155 | u = self.upsample(z_hat) 156 | u = self.bn(u) 157 | 158 | new_z_hat = self.combinator(z_tilde, u) 159 | 160 | return new_z_hat 161 | 162 | 163 | class ConvLargeDecoder(nn.Module): 164 | def __init__(self, encoder, length: int = None, ladders: list = None): 165 | """ 166 | 167 | Args: 168 | encoder: 169 | length: 170 | ladders: 171 | A list of booleans that specifies whether the ladders should 172 | be used or not. The idx starts from bottom to top e.g. 173 | ladders[0] corresponds to the ladder between the first encoder 174 | layer and the last decoder layer. If ladder=None then all 175 | the lateral connections are used. 176 | """ 177 | super(ConvLargeDecoder, self).__init__() 178 | self.num_layers = len(encoder.layers) + 2 # Also denoted L 179 | self.ladders = ladders if ladders else [True] * self.num_layers 180 | assert len(self.ladders) == self.num_layers 181 | 182 | self.decoder_layers = nn.ModuleList([ 183 | Combinator2d(n_channels=10, length=1), 184 | UpsampleLayer(out_channels=10, feature_map_size=6), 185 | DecodeLayer(encode_layer=encoder.layers[-1], map_size=6), 186 | DecodeLayer(encode_layer=encoder.layers[-2], map_size=6), 187 | DecodeLayer(encode_layer=encoder.layers[-3], map_size=8), 188 | UpsampleLayer(out_channels=192, feature_map_size=15), 189 | DecodeLayer(encode_layer=encoder.layers[-5], map_size=17), 190 | DecodeLayer(encode_layer=encoder.layers[-6], map_size=15), 191 | DecodeLayer(encode_layer=encoder.layers[-7], map_size=17), 192 | UpsampleLayer(out_channels=96, feature_map_size=34), 193 | DecodeLayer(encode_layer=encoder.layers[-9], map_size=32), 194 | DecodeLayer(encode_layer=encoder.layers[-10], map_size=30), 195 | DecodeLayer(encode_layer=encoder.layers[-11], map_size=32) 196 | ]) 197 | 198 | def forward(self, layers_tilde_z: list): 199 | 200 | # Page 7 in Rasmus et al. (2015) states that "for the highest layer we 201 | # choose u=y_tilde". Ie. u is set to the softmax output from 202 | # the noisy encoder 203 | u = nn.functional.softmax(layers_tilde_z[-1], dim=1) # Dims (bs, n_classes, 1, 1) 204 | last_z_tilde = layers_tilde_z[-1] # Dims (bs, n_classes, 1, 1) 205 | assert u.shape == last_z_tilde.shape 206 | 207 | z_hat = self.decoder_layers[0](last_z_tilde, u) # l = 12 208 | layers_z_hat = [z_hat] 209 | 210 | last_needed_decoder = len(self.ladders) - self.ladders.index(True) 211 | 212 | for decode_layer_idx in range(1, self.num_layers): 213 | if decode_layer_idx >= last_needed_decoder: 214 | layers_z_hat.append(None) 215 | continue 216 | 217 | encode_layer_idx = - decode_layer_idx - 1 218 | tilde_z = layers_tilde_z[encode_layer_idx] 219 | 220 | z_hat = self.decoder_layers[decode_layer_idx](tilde_z, z_hat) 221 | layers_z_hat.append(z_hat) 222 | 223 | return layers_z_hat 224 | -------------------------------------------------------------------------------- /ssltsc/architectures/fcn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from ssltsc.architectures.ladder_utils import Combinator1d, add_gaussian_noise 5 | 6 | class LadderFCN(nn.Module): 7 | def __init__(self, n_classes, channels, 8 | return_hidden_states=False, verbose=False): 9 | super(LadderFCN, self).__init__() 10 | self.n_classes = n_classes 11 | self.n_channels = channels 12 | 13 | self.n_lateral_connections = 3 + 2 14 | 15 | self.return_hidden_states = return_hidden_states 16 | self.conv1 = nn.Conv1d(in_channels=channels, out_channels=128, kernel_size=7, padding=3) 17 | self.bn1 = nn.BatchNorm1d(128) 18 | 19 | self.conv2 = nn.Conv1d(in_channels=128, out_channels=256, kernel_size=5, padding=2) 20 | self.bn2 = nn.BatchNorm1d(256) 21 | 22 | self.conv3 = nn.Conv1d(in_channels=256, out_channels=128, kernel_size=3, padding=1) 23 | self.bn3 = nn.BatchNorm1d(128) 24 | 25 | self.gap = nn.AdaptiveAvgPool1d(1) 26 | self.fc = nn.Linear(128, n_classes) 27 | 28 | # send the modules to cuda if gpu is present 29 | for m in self.modules(): 30 | if torch.cuda.is_available(): 31 | m.to(torch.device('cuda')) 32 | 33 | def forward(self, x, noise_sd=0.0): 34 | x = add_gaussian_noise(x, sd=noise_sd) if noise_sd else x 35 | 36 | z_pre1 = self.conv1(x) 37 | z1 = add_gaussian_noise(self.bn1(z_pre1), sd=noise_sd) if noise_sd else self.bn1(z_pre1) 38 | h1 = nn.functional.relu(z1) 39 | 40 | z_pre2 = self.conv2(h1) 41 | z2 = add_gaussian_noise(self.bn2(z_pre2), sd=noise_sd) if noise_sd else self.bn2(z_pre2) 42 | h2 = nn.functional.relu(z2) 43 | 44 | z_pre3 = self.conv3(h2) 45 | z3 = add_gaussian_noise(self.bn3(z_pre3), sd=noise_sd) if noise_sd else self.bn3(z_pre3) 46 | h3 = nn.functional.relu(z3) 47 | 48 | avg_pool = self.gap(h3) 49 | avg_pool = avg_pool.squeeze() 50 | out = self.fc(avg_pool) 51 | 52 | layers_z = [x, z1, z2, z3, out.unsqueeze(2)] # The list of calculated z for each layer 53 | batch_means = [x.mean(), z_pre1.mean(), z_pre2.mean(), z_pre3.mean(), out.mean()] 54 | batch_std = [x.std(), z_pre1.std(), z_pre2.std(), z_pre3.std(), out.std()] 55 | 56 | if not self.return_hidden_states: 57 | return out 58 | 59 | if noise_sd: # Then we will return z_hats and no mean and sd 60 | return out, layers_z 61 | 62 | return out, layers_z, batch_means, batch_std 63 | 64 | 65 | class LadderFCNDecoder(nn.Module): 66 | def __init__(self, encoder, length: int = None, ladders: list = None): 67 | super(LadderFCNDecoder, self).__init__() 68 | self.ladders = ladders if ladders else [True] * 5 69 | self.n_classes = encoder.n_classes 70 | self.n_channels = encoder.n_channels 71 | 72 | self.combinator4 = Combinator1d(n_channels=self.n_classes, length=1) 73 | self.upsample = nn.Upsample(length) 74 | 75 | self.trans_conv3 = nn.ConvTranspose1d(self.n_classes, 128, kernel_size=3, padding=1) 76 | self.bn3 = nn.BatchNorm1d(num_features=128) 77 | self.combinator3 = Combinator1d(n_channels=128, length=length) 78 | 79 | self.trans_conv2 = nn.ConvTranspose1d(128, 256, kernel_size=5, padding=2) 80 | self.bn2 = nn.BatchNorm1d(num_features=256) 81 | self.combinator2 = Combinator1d(n_channels=256, length=length) 82 | 83 | self.trans_conv1 = nn.ConvTranspose1d(256, 128, kernel_size=7, padding=3) 84 | self.bn1 = nn.BatchNorm1d(num_features=128) 85 | self.combinator1 = Combinator1d(n_channels=128, length=length) 86 | 87 | self.trans_conv0 = nn.ConvTranspose1d(128, self.n_channels, kernel_size=3, padding=1) 88 | self.bn0 = nn.BatchNorm1d(num_features=self.n_channels) 89 | self.combinator0 = Combinator1d(n_channels=self.n_channels, length=length) 90 | 91 | def forward(self, layers_tilde_z: list): 92 | # Page 7 in Rasmus et al. (2015) states that "for the highest layer we 93 | # choose u=y_tilde". Ie. u is set to the softmax output from 94 | # the noisy encoder 95 | u = nn.functional.softmax(layers_tilde_z[-1], dim=1) # Dims (bs, n_classes, 1) 96 | last_z_tilde = layers_tilde_z[-1] # Dims (bs, n_classes, 1) 97 | assert u.shape == last_z_tilde.shape 98 | z_hat4 = self.combinator4(last_z_tilde, u) 99 | 100 | z_hat3 = self.trans_conv3(self.upsample(z_hat4)) 101 | z_hat3 = self.bn3(z_hat3) 102 | assert layers_tilde_z[3].shape == z_hat3.shape, layers_tilde_z[3].shape 103 | z_hat3 = self.combinator3(layers_tilde_z[3], z_hat3) 104 | 105 | z_hat2 = self.trans_conv2(z_hat3) 106 | z_hat2 = self.bn2(z_hat2) 107 | assert layers_tilde_z[2].shape == z_hat2.shape 108 | z_hat2 = self.combinator2(layers_tilde_z[2], z_hat2) 109 | 110 | z_hat1 = self.trans_conv1(z_hat2) 111 | z_hat1 = self.bn1(z_hat1) 112 | assert layers_tilde_z[1].shape == z_hat1.shape 113 | z_hat1 = self.combinator1(layers_tilde_z[1], z_hat1) 114 | 115 | z_hat0 = self.trans_conv0(z_hat1) 116 | z_hat0 = self.bn0(z_hat0) 117 | assert layers_tilde_z[0].shape == z_hat0.shape 118 | z_hat0 = self.combinator0(layers_tilde_z[0], z_hat0) 119 | 120 | # This masking is done to adhere to the Laddernet convention. 121 | # Ideally we would only use the decoder when we need the ladder 122 | # connection. This could be done similarily to what is done in the 123 | # ConvLarge ladder net. 124 | z_hats = [z_hat4, z_hat3, z_hat2, z_hat1, z_hat0] 125 | last_needed_decoder = len(self.ladders) - self.ladders.index(True) 126 | z_hats_masked = [z_hat if decode_layer_idx < last_needed_decoder else None for decode_layer_idx, z_hat in enumerate(z_hats)] 127 | 128 | return z_hats_masked 129 | -------------------------------------------------------------------------------- /ssltsc/architectures/fcn_multitask.py: -------------------------------------------------------------------------------- 1 | # Architecture for the self supervised model 2 | import torch 3 | import torch.nn.functional as F 4 | from torch import nn 5 | from .layers_utils import * 6 | 7 | 8 | class FCNMultitask(nn.Module): 9 | def __init__(self, c_in, c_out, horizon, layers=[128, 256, 128], kss=[7, 5, 3]): 10 | super(FCNMultitask, self).__init__() 11 | assert len(layers) == len(kss) 12 | self.convblock1 = ConvBlock(c_in, layers[0], kss[0]) 13 | self.convblock2 = ConvBlock(layers[0], layers[1], kss[1]) 14 | self.convblock3 = ConvBlock(layers[1], layers[2], kss[2]) 15 | self.gap = GAP1d(1) 16 | self.fc_classification = nn.Linear(layers[-1], c_out) 17 | self.fc_forecast = nn.Linear(layers[-1], horizon*c_in) 18 | self.c_in = c_in 19 | self.horizon = horizon 20 | 21 | def forward_train(self, x_cl, x_fc): 22 | x_cl = self.convblock1(x_cl) 23 | x_fc = self.convblock1(x_fc) 24 | 25 | x_cl = self.convblock2(x_cl) 26 | x_fc = self.convblock2(x_fc) 27 | 28 | x_cl = self.convblock3(x_cl) 29 | x_fc = self.convblock3(x_fc) 30 | 31 | x_cl = self.gap(x_cl) 32 | x_fc = self.gap(x_fc) 33 | 34 | out_classification = self.fc_classification(x_cl) 35 | out_forecast = self.fc_forecast(x_fc).reshape(-1, self.c_in, self.horizon) 36 | 37 | return out_classification, out_forecast 38 | 39 | def forward(self, x): 40 | x = self.convblock1(x) 41 | x = self.convblock2(x) 42 | x = self.convblock3(x) 43 | x = self.gap(x) 44 | return self.fc_classification(x) 45 | -------------------------------------------------------------------------------- /ssltsc/architectures/ladder.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from torch.nn import init 7 | 8 | torch.set_default_dtype(torch.float32) 9 | 10 | 11 | class Affine(nn.Module): 12 | """ 13 | This module implements the affine parameters gamma and beta seen in 14 | Eq. 10 in Pezeshki et al. (2016). It differs from the way affine 15 | is used in batchnorm out of the box of PyTorch. 16 | 17 | Pytorch affine : y = bn(x)*gamma + beta 18 | Rasmus et al. (2015): y = gamma * (bn(x) + beta) 19 | """ 20 | 21 | def __init__(self, n_channels, map_size): 22 | super(Affine, self).__init__() 23 | self.map_size = map_size 24 | self.n_channels = n_channels 25 | # initialize with no scaling (1) and shifting (0) 26 | # as well as in other implementations 27 | self.gamma = nn.Parameter(torch.Tensor(self.n_channels, self.map_size, self.map_size)) 28 | self.beta = nn.Parameter(torch.Tensor(self.n_channels, self.map_size, self.map_size)) 29 | 30 | def forward(self, x): 31 | out = self.gamma * (x + self.beta) 32 | return out 33 | 34 | def reset_parameters(self) -> None: 35 | init.kaiming_uniform_(self.gamma, a=math.sqrt(5)) 36 | init.kaiming_uniform_(self.beta, a=math.sqrt(5)) 37 | 38 | 39 | class Ladder(nn.Module): 40 | def __init__(self, encoder_architecture, decoder_architecture, 41 | n_classes, channels, length=None, ladders=None, 42 | noise_sd: float = 0.3, verbose=False): 43 | """ 44 | 45 | Args: 46 | n_classes: 47 | channels: 48 | verbose: 49 | """ 50 | super(Ladder, self).__init__() 51 | self.conv_net = encoder_architecture(n_classes=n_classes, 52 | channels=channels, 53 | verbose=verbose, 54 | return_hidden_states=True) 55 | self.noise_sd = noise_sd 56 | self.n_lateral_connections = self.conv_net.n_lateral_connections # L 57 | self.ladders = ladders if ladders else [True] * self.n_lateral_connections 58 | assert self.n_lateral_connections == len(self.ladders) 59 | 60 | self.n_classes = n_classes 61 | 62 | self.decoder = decoder_architecture(self.conv_net, length=length, 63 | ladders=ladders) 64 | self.verbose = verbose 65 | self.first_pass = True 66 | 67 | # send the modules to cuda if gpu is present 68 | if torch.cuda.is_available(): 69 | for m in self.modules(): 70 | m.to(torch.device('cuda')) 71 | 72 | def clean_encoder(self, x): 73 | return self.conv_net(x) 74 | 75 | def noisy_encoder(self, x): 76 | return self.conv_net(x, noise_sd=self.noise_sd) 77 | 78 | def forward(self, x, return_hidden_representations=False): 79 | x_noise = x 80 | x_clean = x.clone() # Detaching x 81 | 82 | # Always one pass through the clean encoder 83 | clean_logits, layers_z, batch_means, batch_std = self.clean_encoder(x_clean) 84 | 85 | if not self.training: # Prediction mode uses the clean encoder 86 | y = nn.functional.softmax(clean_logits, dim=1) 87 | 88 | if return_hidden_representations or self.training: 89 | # When doing a pass for testing we do not need the noisy encoder 90 | noise_logits, layers_tilde_z = self.noisy_encoder(x_noise) 91 | layers_z_hat = self.decoder(layers_tilde_z) 92 | 93 | if self.training: # Training mode uses noisy encoder 94 | y = nn.functional.softmax(noise_logits, dim=1) 95 | 96 | # We will return a dict of the hidden representations. 97 | # These are used when calculating the loss in a ladder model 98 | hidden_representations = { 99 | 'zs': layers_z, 100 | 'hat_zs': layers_z_hat, 101 | 'batch_means': batch_means, 102 | 'batch_std': batch_std 103 | } 104 | self.first_pass = False 105 | return y, hidden_representations 106 | 107 | self.first_pass = False 108 | return y 109 | 110 | -------------------------------------------------------------------------------- /ssltsc/architectures/ladder_utils.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import numpy as np 4 | 5 | 6 | class Combinator(nn.Module): 7 | """ 8 | The vanilla combinator function g() that combines vertical and 9 | lateral connections as explained in Pezeshki et al. (2016). 10 | The weights are initialized as described in Eq. 17 11 | and the g() is defined in Eq. 16. 12 | """ 13 | 14 | def __init__(self, n_channels, length, data_type='2d'): 15 | super(Combinator, self).__init__() 16 | 17 | if data_type == '2d': 18 | zeros = torch.zeros(n_channels, length, length) 19 | ones = torch.ones(n_channels, length, length) 20 | elif data_type == '1d': 21 | zeros = torch.zeros(n_channels, length) 22 | ones = torch.ones(n_channels, length) 23 | else: 24 | raise ValueError 25 | 26 | self.b0 = nn.Parameter(zeros) 27 | self.w0z = nn.Parameter(ones) 28 | self.w0u = nn.Parameter(zeros) 29 | self.w0zu = nn.Parameter(ones) 30 | 31 | self.b1 = nn.Parameter(zeros) 32 | self.w1z = nn.Parameter(ones) 33 | self.w1u = nn.Parameter(zeros) 34 | self.w1zu = nn.Parameter(zeros) 35 | 36 | self.wsig = nn.Parameter(ones) 37 | 38 | def forward(self, z_tilde, ulplus1): 39 | assert z_tilde.shape == ulplus1.shape 40 | 41 | out = self.b0 + z_tilde.mul(self.w0z) + ulplus1.mul(self.w0u) \ 42 | + z_tilde.mul(ulplus1.mul(self.w0zu)) \ 43 | + self.wsig.mul(torch.sigmoid(self.b1 + z_tilde.mul(self.w1z) 44 | + ulplus1.mul(self.w1u) 45 | + z_tilde.mul(ulplus1.mul(self.w1zu)))) 46 | return out 47 | 48 | 49 | class Combinator2d(Combinator): 50 | def __init__(self, n_channels, length): 51 | super(Combinator2d, self).__init__(n_channels, length, data_type='2d') 52 | 53 | 54 | class Combinator1d(Combinator): 55 | def __init__(self, n_channels, length): 56 | super(Combinator1d, self).__init__(n_channels, length, data_type='1d') 57 | 58 | 59 | def add_gaussian_noise(x, sd=0.3): 60 | # We are only constructing a single random tensor that will be repeated 61 | # for each of the datapoints in the batch. This "hack" significantly 62 | # reduces speed during training. 63 | np_vec = np.random.normal(0.0, sd, x[0].size()) 64 | noise = torch.Tensor(np_vec) 65 | 66 | # Alternatively we could generate a fully random tensor like this: 67 | # noise = torch.normal(0.0, 0.3, size=x.size()) 68 | 69 | if torch.cuda.is_available(): 70 | noise = noise.to(torch.device('cuda')) 71 | 72 | # Construct the noise tensor 73 | if len(x.shape) == 3: # Then we have 1D data 74 | noise = noise.unsqueeze(0).repeat(x.size()[0], 1, 1) 75 | elif len(x.shape) == 4: # Then we have 2D data 76 | noise = noise.unsqueeze(0).repeat(x.size()[0], 1, 1, 1) 77 | 78 | out = x + noise 79 | return out 80 | -------------------------------------------------------------------------------- /ssltsc/architectures/layers_utils.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from torch import nn 4 | from functools import partial 5 | from enum import Enum 6 | import pdb 7 | import re 8 | import torch.nn.functional as F 9 | 10 | 11 | class ConvBlock(nn.Sequential): 12 | "Create a sequence of conv1d (`ni` to `nf`), activation (if `act_cls`) and `norm_type` layers." 13 | def __init__(self, ni, nf, kernel_size=None, ks=3, stride=1, 14 | padding='same', bias=None, bias_std=0.01, norm='Batch', 15 | zero_norm=False, bn_1st=True, 16 | act=nn.ReLU, act_kwargs={}, init='auto', 17 | dropout=0., xtra=None, coord=False, separable=False, 18 | **kwargs): 19 | 20 | kernel_size = kernel_size or ks 21 | ndim = 1 22 | layers = [AddCoords1d()] if coord else [] 23 | norm_type = getattr(NormType, f"{snake2camel(norm)}{'Zero' if zero_norm else ''}") if norm is not None else None 24 | bn = norm_type in (NormType.Batch, NormType.BatchZero) 25 | inn = norm_type in (NormType.Instance, NormType.InstanceZero) 26 | if bias is None: 27 | bias = not (bn or inn) 28 | if separable: 29 | conv = SeparableConv1d(ni + coord, nf, ks=kernel_size, bias=bias, stride=stride, padding=padding, **kwargs) 30 | else: 31 | conv = Conv1d(ni + coord, nf, ks=kernel_size, bias=bias, stride=stride, padding=padding, **kwargs) 32 | act = None if act is None else act(**act_kwargs) 33 | if not separable: 34 | init_linear(conv, act, init=init, bias_std=bias_std) 35 | if norm_type == NormType.Weight: 36 | conv = weight_norm(conv) 37 | elif norm_type == NormType.Spectral: 38 | conv = spectral_norm(conv) 39 | layers += [conv] 40 | act_bn = [] 41 | if act is not None: 42 | act_bn.append(act) 43 | if bn: 44 | act_bn.append(BatchNorm(nf, norm_type=norm_type, ndim=ndim)) 45 | if inn: 46 | act_bn.append(InstanceNorm(nf, norm_type=norm_type, ndim=ndim)) 47 | if bn_1st: 48 | act_bn.reverse() 49 | if dropout: 50 | layers += [nn.Dropout(dropout)] 51 | layers += act_bn 52 | if xtra: 53 | layers.append(xtra) 54 | super().__init__(*layers) 55 | 56 | 57 | class GAP1d(nn.Module): 58 | "Global Adaptive Pooling + Flatten" 59 | def __init__(self, output_size=1): 60 | super(GAP1d, self).__init__() 61 | self.gap = nn.AdaptiveAvgPool1d(output_size) 62 | self.flatten = nn.Flatten() 63 | 64 | def forward(self, x): 65 | return self.flatten(self.gap(x)) 66 | 67 | 68 | class Squeeze(nn.Module): 69 | def __init__(self, dim=-1): 70 | super(Squeeze, self).__init__() 71 | self.dim = dim 72 | 73 | def forward(self, x): 74 | return x.squeeze(dim=self.dim) 75 | 76 | def __repr__(self): 77 | return f'{self.__class__.__name__}(dim={self.dim})' 78 | 79 | 80 | class Add(nn.Module): 81 | def forward(self, x, y): 82 | return x.add(y) 83 | def __repr__(self): 84 | return f'{self.__class__.__name__}' 85 | 86 | 87 | class Concat(nn.Module): 88 | def __init__(self, dim=1): 89 | super(Concat, self).__init__() 90 | self.dim = dim 91 | 92 | def forward(self, *x): 93 | return torch.cat(*x, dim=self.dim) 94 | 95 | def __repr__(self): 96 | return f'{self.__class__.__name__}(dim={self.dim})' 97 | 98 | 99 | 100 | def Norm(nf, ndim=1, norm='Batch', zero_norm=False, init=True, **kwargs): 101 | "Norm layer with `nf` features and `ndim` with auto init." 102 | assert 1 <= ndim <= 3 103 | nl = getattr(nn, f"{snake2camel(norm)}Norm{ndim}d")(nf, **kwargs) 104 | if nl.affine and init: 105 | nl.bias.data.fill_(1e-3) 106 | nl.weight.data.fill_(0. if zero_norm else 1.) 107 | return nl 108 | 109 | 110 | BN1d = partial(Norm, ndim=1, norm='Batch') 111 | ConvBN = partial(ConvBlock, norm='Batch', act=None) 112 | 113 | NormType = Enum('NormType', 'Batch BatchZero Weight Spectral Instance InstanceZero') 114 | 115 | def snake2camel(name): 116 | return re.sub(r'(?:^|_)([a-z])', lambda x: x.group(1).upper(), name) 117 | 118 | 119 | class Pad1d(nn.ConstantPad1d): 120 | def __init__(self, padding, value=0.): 121 | super().__init__(padding, value) 122 | 123 | 124 | class Conv1dSame(nn.Module): 125 | "Conv1d with padding='same'" 126 | def __init__(self, ni, nf, ks=3, stride=1, dilation=1, **kwargs): 127 | self.ks, self.stride, self.dilation = ks, stride, dilation 128 | self.conv1d_same = nn.Conv1d(ni, nf, ks, stride=stride, dilation=dilation, **kwargs) 129 | self.weight = self.conv1d_same.weight 130 | self.bias = self.conv1d_same.bias 131 | self.pad = Pad1d 132 | 133 | def forward(self, x): 134 | self.padding = same_padding1d(x.shape[-1], self.ks, dilation=self.dilation) #stride=self.stride not used in padding calculation! 135 | return self.conv1d_same(self.pad(self.padding)(x)) 136 | 137 | 138 | def Conv1d(ni, nf, kernel_size=None, ks=None, stride=1, padding='same', dilation=1, init='auto', bias_std=0.01, **kwargs): 139 | "conv1d layer with padding='same', 'causal', 'valid', or any integer (defaults to 'same')" 140 | assert not (kernel_size and ks), 'use kernel_size or ks but not both simultaneously' 141 | assert kernel_size is not None or ks is not None, 'you need to pass a ks' 142 | kernel_size = kernel_size or ks 143 | if padding == 'same': 144 | if kernel_size%2==1: 145 | conv = nn.Conv1d(ni, nf, kernel_size, stride=stride, padding=kernel_size//2 * dilation, dilation=dilation, **kwargs) 146 | else: 147 | conv = Conv1dSame(ni, nf, kernel_size, stride=stride, dilation=dilation, **kwargs) 148 | elif padding == 'causal': conv = Conv1dCausal(ni, nf, kernel_size, stride=stride, dilation=dilation, **kwargs) 149 | elif padding == 'valid': conv = nn.Conv1d(ni, nf, kernel_size, stride=stride, padding=0, dilation=dilation, **kwargs) 150 | else: conv = nn.Conv1d(ni, nf, kernel_size, stride=stride, padding=padding, dilation=dilation, **kwargs) 151 | init_linear(conv, None, init=init, bias_std=bias_std) 152 | return conv 153 | 154 | 155 | class SeparableConv1d(nn.Module): 156 | def __init__(self, ni, nf, ks, stride=1, padding='same', dilation=1, bias=True, bias_std=0.01): 157 | self.depthwise_conv = Conv1d(ni, ni, ks, stride=stride, padding=padding, dilation=dilation, groups=ni, bias=bias) 158 | self.pointwise_conv = nn.Conv1d(ni, nf, 1, stride=1, padding=0, dilation=1, groups=1, bias=bias) 159 | if bias: 160 | if bias_std != 0: 161 | normal_(self.depthwise_conv.bias, 0, bias_std) 162 | normal_(self.pointwise_conv.bias, 0, bias_std) 163 | else: 164 | self.depthwise_conv.bias.data.zero_() 165 | self.pointwise_conv.bias.data.zero_() 166 | 167 | def forward(self, x): 168 | x = self.depthwise_conv(x) 169 | x = self.pointwise_conv(x) 170 | return x 171 | 172 | 173 | def init_linear(m, act_func=None, init='auto', bias_std=0.01): 174 | if getattr(m,'bias',None) is not None and bias_std is not None: 175 | if bias_std != 0: normal_(m.bias, 0, bias_std) 176 | else: m.bias.data.zero_() 177 | if init=='auto': 178 | if act_func in (F.relu_,F.leaky_relu_): init = kaiming_uniform_ 179 | else: init = getattr(act_func.__class__, '__default_init__', None) 180 | if init is None: init = getattr(act_func, '__default_init__', None) 181 | if init is not None: init(m.weight) 182 | 183 | 184 | def BatchNorm(nf, ndim=2, norm_type=NormType.Batch, **kwargs): 185 | "BatchNorm layer with `nf` features and `ndim` initialized depending on `norm_type`." 186 | return _get_norm('BatchNorm', nf, ndim, zero=norm_type==NormType.BatchZero, **kwargs) 187 | 188 | def _get_norm(prefix, nf, ndim=2, zero=False, **kwargs): 189 | "Norm layer with `nf` features and `ndim` initialized depending on `norm_type`." 190 | assert 1 <= ndim <= 3 191 | bn = getattr(nn, f"{prefix}{ndim}d")(nf, **kwargs) 192 | if bn.affine: 193 | bn.bias.data.fill_(1e-3) 194 | bn.weight.data.fill_(0. if zero else 1.) 195 | return bn 196 | 197 | 198 | def ifnone(a, b): 199 | "`b` if `a` is None else `a`" 200 | return b if a is None else a 201 | -------------------------------------------------------------------------------- /ssltsc/architectures/utils.py: -------------------------------------------------------------------------------- 1 | """Utils for the architectures/ backbones 2 | """ 3 | from functools import partial 4 | from math import floor 5 | 6 | import pdb 7 | import torch 8 | import numpy as np 9 | import torch.nn as nn 10 | 11 | from ssltsc.architectures import convnet13, wideresnet28, ResNet, InceptionTime, ResCNN 12 | from ssltsc.architectures.FCN_tsai import FCN 13 | from ssltsc.architectures.fcn_multitask import FCNMultitask 14 | from ssltsc.architectures.convlarge import ConvLarge, ConvLargeDecoder 15 | from ssltsc.architectures.ladder import Ladder 16 | from ssltsc.architectures.fcn import LadderFCN, LadderFCNDecoder 17 | from ssltsc.architectures.cnn_lstm import CNNLSTM 18 | from ssltsc.architectures.wideresnet28 import WideResNet28 19 | from ssltsc.architectures.convnet13 import ConvNet13 20 | from ssltsc.architectures.ResNet import ResNet 21 | from ssltsc.architectures.InceptionTime import InceptionTime 22 | from ssltsc.architectures.ResCNN import ResCNN 23 | 24 | def backbone_factory(architecture, dataset, n_classes, n_channels, lengthts, horizon=None): 25 | """Creates backbone and backbone dictionary for 26 | instantiation of a backbone 27 | """ 28 | if architecture == 'wideresnet28': 29 | backbone_dict = {'n_classes': n_classes} 30 | backbone = WideResNet28 31 | elif architecture == 'convnet13': 32 | backbone_dict = {'n_classes': n_classes} 33 | backbone = ConvNet13 34 | elif architecture == 'FCN': 35 | backbone = FCN 36 | backbone_dict = {'c_in': n_channels, 37 | 'c_out': n_classes} 38 | elif architecture == 'resnet': 39 | backbone = ResNet 40 | backbone_dict = {'c_in': n_channels, 41 | 'c_out': n_classes} 42 | elif architecture == 'fcnmultitask': 43 | backbone = FCNMultitask 44 | backbone_dict = {'c_in': n_channels, 45 | 'c_out': n_classes, 46 | 'horizon': floor(horizon * lengthts)} 47 | elif architecture == 'inceptiontime': 48 | backbone = InceptionTime 49 | backbone_dict = {'c_in': n_channels, 50 | 'c_out': n_classes} 51 | elif architecture == 'rescnn': 52 | backbone = ResCNN 53 | backbone_dict = {'c_in': n_channels, 54 | 'c_out': n_classes} 55 | elif architecture == 'ladder': 56 | if dataset == 'cifar10': 57 | backbone_dict = {'n_classes': n_classes, 'channels': n_channels} 58 | backbone = partial(Ladder, 59 | encoder_architecture=ConvLarge, 60 | decoder_architecture=ConvLargeDecoder) 61 | else: 62 | backbone_dict = {'n_classes': n_classes, 'channels': n_channels} 63 | backbone = partial(Ladder, 64 | encoder_architecture=LadderFCN, 65 | decoder_architecture=LadderFCNDecoder, 66 | length=lengthts 67 | ) 68 | 69 | elif architecture == 'ConvLarge': 70 | assert dataset == 'cifar10', 'Ladder architecture is only ' \ 71 | 'implemented for image data' 72 | backbone_dict = {'n_classes': n_classes, 'channels': n_channels} 73 | backbone = ConvLarge 74 | 75 | elif architecture == 'CNNLSTM': 76 | backbone = CNNLSTM 77 | backbone_dict = { 78 | 'td_layer': "LSTM", 79 | 'num_classes': n_classes, 80 | 'c_in': n_channels, 81 | 'length_ts': lengthts 82 | } 83 | elif architecture == 'CNNBiLSTM': 84 | backbone = CNNLSTM 85 | backbone_dict = { 86 | 'td_layer': "BILSTM", 87 | 'num_classes': n_classes, 88 | 'c_in': n_channels, 89 | 'length_ts': lengthts 90 | } 91 | else: 92 | backbone_dict = {'n_classes': n_classes, 93 | 'n_variables': n_channels, 94 | 'length_ts': lengthts, 95 | 'dropout_ratio': 0.5} 96 | backbone = FCN 97 | 98 | return backbone, backbone_dict -------------------------------------------------------------------------------- /ssltsc/architectures/wideresnet28.py: -------------------------------------------------------------------------------- 1 | """Wideresnet 2 | reference architecture for mixmatch 3 | featured in realistic evaluation of semi supervised learning (2018) 4 | 5 | https://github.com/YU1ut/MixMatch-pytorch/blob/master/models/wideresnet.py 6 | """ 7 | import torch.autograd 8 | import torch.nn.functional as F 9 | import torch.nn as nn 10 | import torchvision 11 | import numpy as np 12 | import math 13 | import torch 14 | import pdb 15 | 16 | from torch.autograd import Variable, Function 17 | from torch.nn import Parameter 18 | 19 | torch.set_default_dtype(torch.float32) 20 | 21 | class BasicBlock(nn.Module): 22 | def __init__(self, in_planes, out_planes, stride, dropRate=0.0, activate_before_residual=False): 23 | super(BasicBlock, self).__init__() 24 | self.bn1 = nn.BatchNorm2d(in_planes, momentum=0.001) 25 | self.relu1 = nn.LeakyReLU(negative_slope=0.1, inplace=True) 26 | self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 27 | padding=1, bias=False) 28 | self.bn2 = nn.BatchNorm2d(out_planes, momentum=0.001) 29 | self.relu2 = nn.LeakyReLU(negative_slope=0.1, inplace=True) 30 | self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1, 31 | padding=1, bias=False) 32 | self.droprate = dropRate 33 | self.equalInOut = (in_planes == out_planes) 34 | self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, 35 | padding=0, bias=False) or None 36 | self.activate_before_residual = activate_before_residual 37 | def forward(self, x): 38 | if not self.equalInOut and self.activate_before_residual == True: 39 | x = self.relu1(self.bn1(x)) 40 | else: 41 | out = self.relu1(self.bn1(x)) 42 | out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x))) 43 | if self.droprate > 0: 44 | out = F.dropout(out, p=self.droprate, training=self.training) 45 | out = self.conv2(out) 46 | return torch.add(x if self.equalInOut else self.convShortcut(x), out) 47 | 48 | class NetworkBlock(nn.Module): 49 | def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0, activate_before_residual=False): 50 | super(NetworkBlock, self).__init__() 51 | self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropRate, activate_before_residual) 52 | def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropRate, activate_before_residual): 53 | layers = [] 54 | for i in range(int(nb_layers)): 55 | layers.append(block(i == 0 and in_planes or out_planes, out_planes, i == 0 and stride or 1, dropRate, activate_before_residual)) 56 | return nn.Sequential(*layers) 57 | def forward(self, x): 58 | return self.layer(x) 59 | 60 | class WideResNet28(nn.Module): 61 | def __init__(self, n_classes, depth=28, widen_factor=2, dropRate=0.0): 62 | super(WideResNet28, self).__init__() 63 | nChannels = [16, 16*widen_factor, 32*widen_factor, 64*widen_factor] 64 | assert((depth - 4) % 6 == 0) 65 | n = (depth - 4) / 6 66 | block = BasicBlock 67 | # 1st conv before any network block 68 | self.conv1 = nn.Conv2d(3, nChannels[0], kernel_size=3, stride=1, 69 | padding=1, bias=False) 70 | # 1st block 71 | self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropRate, activate_before_residual=True) 72 | # 2nd block 73 | self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, dropRate) 74 | # 3rd block 75 | self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, dropRate) 76 | # global average pooling and classifier 77 | self.bn1 = nn.BatchNorm2d(nChannels[3], momentum=0.001) 78 | self.relu = nn.LeakyReLU(negative_slope=0.1, inplace=True) 79 | self.fc = nn.Linear(nChannels[3], n_classes) 80 | self.nChannels = nChannels[3] 81 | self.n_classes = n_classes 82 | 83 | for m in self.modules(): 84 | if isinstance(m, nn.Conv2d): 85 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 86 | m.weight.data.normal_(0, math.sqrt(2. / n)) 87 | elif isinstance(m, nn.BatchNorm2d): 88 | m.weight.data.fill_(1) 89 | m.bias.data.zero_() 90 | elif isinstance(m, nn.Linear): 91 | nn.init.xavier_normal_(m.weight.data) 92 | m.bias.data.zero_() 93 | 94 | def forward(self, x): 95 | out = self.conv1(x) 96 | out = self.block1(out) 97 | out = self.block2(out) 98 | out = self.block3(out) 99 | out = self.relu(self.bn1(out)) 100 | out = F.avg_pool2d(out, 8) 101 | out = out.view(-1, self.nChannels) 102 | return self.fc(out) 103 | 104 | def embed(self, x): 105 | out = self.conv1(x) 106 | out = self.block1(out) 107 | out = self.block2(out) 108 | out = self.block3(out) 109 | out = self.relu(self.bn1(out)) 110 | out = F.avg_pool2d(out, 8) 111 | out = out.view(-1, self.nChannels) 112 | return out -------------------------------------------------------------------------------- /ssltsc/callbacks.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | from statistics import mean 4 | 5 | import optuna 6 | import mlflow 7 | import time 8 | import pdb 9 | from optuna import Trial 10 | from abc import ABC 11 | from pandas import DataFrame 12 | 13 | 14 | class Callback(ABC): 15 | """ 16 | Base callback class. The method naming convention is inspired by the 17 | Keras naming convention. 18 | https://keras.io/guides/writing_your_own_callbacks/ 19 | """ 20 | def on_train_batch_start(self): 21 | pass 22 | 23 | def on_train_batch_end(self, step: int): 24 | pass 25 | 26 | def on_validation_end(self, step: int, metrics: dict): 27 | """ 28 | Will be called every time in the train loop when the validation step is over. 29 | Args: 30 | step: 31 | metrics: 32 | Returns: 33 | """ 34 | pass 35 | 36 | def on_evaluation_end(self, metrics: dict): 37 | """ 38 | Will be called when an evaluation has been done. 39 | Args: 40 | metrics: 41 | """ 42 | pass 43 | 44 | def on_train_end(self, history: DataFrame): 45 | pass 46 | 47 | 48 | class ConsoleLoggingCallback(Callback): 49 | def on_validation_end(self, step: int, metrics: dict): 50 | validate_string = 'step {} | TRAIN: loss {:0.3f} acc {:0.3f} auc {:0.3f} VAL: loss {:0.3f} acc {:0.3f} auc {:0.3f}' 51 | assert any([bool(metric in metrics) for metric in ['train_loss', 'train_accuracy', 'train_weighted_auc', 'val_loss', 'val_accuracy', 'val_weighted_auc']]), "Missing metric for logging to console." 52 | 53 | print(validate_string.format(step, metrics['train_cross_entropy'], metrics['train_accuracy'], metrics['train_weighted_auc'], metrics['val_cross_entropy'], metrics['val_accuracy'], metrics['val_weighted_auc'])) 54 | 55 | def on_evaluation_end(self, metrics: dict): 56 | print('Final test acc: {:.4f} w. Auc {:.4f} macro Auc {:.4f} XE {:.4f} microF1 {:.4f}'.format( 57 | metrics['accuracy'], metrics['weighted_auc'], metrics['macro_auc'], 58 | metrics['cross_entropy'], metrics['micro_f1'])) 59 | 60 | class MeanTeacherConsoleLoggingCallback(Callback): 61 | """ 62 | This logger callback is necessary as the Mean Teacher model both has a student and a teacher loss 63 | """ 64 | def on_validation_end(self, step: int, metrics: dict) -> bool: 65 | validate_string = 'step {} | TRAIN: student loss {:0.3f} acc {:0.3f} auc {:0.3f}, ' \ 66 | 'teacher loss {:0.3f} acc {:0.3f} auc {:0.3f}, \n | VAL: student loss {:0.3f} ' \ 67 | 'acc {:0.3f} auc {:0.3f}, teacher loss {:0.3f} acc {:0.3f} auc {:0.3f}' 68 | 69 | print(validate_string.format(step, 70 | metrics['train_student_loss'], 71 | metrics['train_student_accuracy'], 72 | metrics['train_student_weighted_auc'], 73 | metrics['train_loss'], 74 | metrics['train_accuracy'], 75 | metrics['train_weighted_auc'], 76 | metrics['val_student_loss'], 77 | metrics['val_student_accuracy'], 78 | metrics['val_student_weighted_auc'], 79 | metrics['val_loss'], 80 | metrics['val_accuracy'], 81 | metrics['val_weighted_auc'])) 82 | 83 | stop_training = False 84 | return stop_training 85 | 86 | 87 | class MlflowLoggingCallback(Callback): 88 | def on_validation_end(self, step: int, metrics: dict): 89 | mlflow.log_metrics(metrics, step=step) 90 | 91 | def on_evaluation_end(self, metrics: dict): 92 | # Append test to the metrics and log to mlflow 93 | final_metrics = {"test_" + metric: v for metric, v in metrics.items()} 94 | mlflow.log_metrics(final_metrics) 95 | 96 | # def on_train_end(self, history: DataFrame): 97 | 98 | # # We log the history of the model as a csv artifact in mlflow 99 | # # with tempfile.TemporaryDirectory() as tmp_dir: 100 | # # history_path = os.path.join(tmp_dir, "history.csv") 101 | # # history.to_csv(history_path) 102 | # # mlflow.log_artifact(history_path) 103 | 104 | 105 | class TimerCallback(Callback): 106 | def __init__(self, verbose=False, log_to_mlflow=True): 107 | self.verbose = verbose 108 | self.batch_times = [] 109 | self.log_to_ml_flow = log_to_mlflow 110 | 111 | def on_train_batch_start(self): 112 | self.start = time.time() 113 | 114 | def on_train_batch_end(self, step: int): 115 | batch_time = time.time() - self.start 116 | self.batch_times.append(batch_time) 117 | 118 | if self.verbose: 119 | print("step {} | Batch took {:.2}s".format(step, batch_time)) 120 | 121 | def on_validation_end(self, step: int, metrics: dict): 122 | if len(self.batch_times) > 0: 123 | avg_batch_times = mean(self.batch_times) 124 | 125 | if self.log_to_ml_flow: 126 | mlflow.log_metric('avg_train_batch_times', 127 | avg_batch_times, step=step) 128 | print("step - | Avg. train batch time: {:.2}s".format(avg_batch_times)) 129 | self.batch_times = [] 130 | 131 | class TuneCallback(Callback): 132 | def __init__(self, trial: Trial, tuning_criterion: str): 133 | self.trial = trial 134 | self.tuning_criterion = tuning_criterion 135 | 136 | def on_validation_end(self, step: int, metrics: dict): 137 | assert self.tuning_criterion in metrics.keys(), 'Your tuning criterion is not part of the tracked performance metrics' 138 | value = metrics[self.tuning_criterion] 139 | print(f'Trial {self.trial._trial_id} step {step} with value {value}') 140 | self.trial.report(metrics[self.tuning_criterion], step) 141 | 142 | if self.trial.should_prune(): 143 | raise optuna.TrialPruned() -------------------------------------------------------------------------------- /ssltsc/constants.py: -------------------------------------------------------------------------------- 1 | """ 2 | constants module 3 | 4 | stores constants used for model, architectures, trainers etc. 5 | """ 6 | 7 | ##################### 8 | # Feature Extractor 9 | 10 | FC_PARAMETERS_1 = { 11 | 'abs_energy': None, 12 | 'absolute_sum_of_changes': None, 13 | 'count_above_mean': None, 14 | 'count_below_mean': None, 15 | 'first_location_of_maximum': None, 16 | 'first_location_of_minimum': None, 17 | 'has_duplicate': None, 18 | 'has_duplicate_max': None, 19 | 'has_duplicate_min': None, 20 | 'kurtosis': None, 21 | 'last_location_of_maximum': None, 22 | 'last_location_of_minimum': None, 23 | 'length': None, 24 | 'longest_strike_above_mean': None, 25 | 'longest_strike_below_mean': None, 26 | 'maximum': None, 27 | 'minimum': None, 28 | 'mean': None, 29 | 'mean_abs_change': None, 30 | 'mean_change': None, 31 | 'mean_second_derivative_central': None, 32 | 'median': None, 33 | } 34 | 35 | FC_PARAMETERS_2 = { 36 | 'abs_energy': None, 37 | 'absolute_sum_of_changes': None, 38 | 'count_above_mean': None, 39 | 'count_below_mean': None, 40 | 'first_location_of_maximum': None, 41 | 'first_location_of_minimum': None, 42 | 'has_duplicate': None, 43 | 'has_duplicate_max': None, 44 | 'has_duplicate_min': None, 45 | 'kurtosis': None, 46 | 'last_location_of_maximum': None, 47 | 'last_location_of_minimum': None, 48 | 'length': None, 49 | 'longest_strike_above_mean': None, 50 | 'longest_strike_below_mean': None, 51 | 'maximum': None, 52 | 'minimum': None, 53 | 'mean': None, 54 | 'mean_abs_change': None, 55 | 'mean_change': None, 56 | 'mean_second_derivative_central': None, 57 | 'median': None, 58 | 'sample_entropy': None, 59 | 'skewness': None, 60 | 'standard_deviation': None, 61 | 'sum_of_reoccurring_data_points': None, 62 | 'sum_of_reoccurring_values': None, 63 | 'sum_values': None, 64 | 'variance': None, 65 | 'variance_larger_than_standard_deviation': None, 66 | } 67 | 68 | ##################### 69 | # Data Sets 70 | 71 | MANUAL_DATASETS_DICT = {'pamap2': 'PAMAP2_Dataset/', 72 | 'wisdm': 'WISDM_ar_v1.1/', 73 | 'sits': 'SITS_Dataset/', 74 | 'cifar10': 'cifar10/', 75 | 'svhn': 'svhn/', 76 | 'crop': 'Crop/', 77 | 'fordb': 'FordB/', 78 | 'electricdevices': 'ElectricDevices/', 79 | 'simulated': 'simulated/', 80 | 'sits_balanced': 'SITS_Balanced_Dataset/', 81 | 'sits_hard_balanced': 'SITS_Hard_Balanced_Dataset/'} 82 | 83 | TRAINDATA_SIZE_DICT = {'pamap2': 11451, 84 | 'wisdm': 54907, 85 | 'sits': 90000, 86 | 'crop': 7200, 87 | 'electricdevices': 8926, 88 | 'fordb': 3636, 89 | 'cifar10': 50000} 90 | 91 | # COLOR_MODEL_DICT = {'randomforest': {'color': 'dodgerblue', 'linestyle': ':'}, 92 | # 'logisticregression': {'color': 'lightseagreen', 'linestyle': ':'}, 93 | # 'labelspreading': {'color': 'black', 'linestyle': ':'}, 94 | # 'supervised': {'color': 'blue', 'linestyle': ':'}, 95 | # 'meanteacher': {'color': 'sienna', 'linestyle': '-'}, 96 | # 'mixmatch': {'color': 'purple', 'linestyle': '-'}, 97 | # 'vat': {'color': 'orange', 'linestyle': '-'}, 98 | # 'ladder': {'color': 'forestgreen', 'linestyle': '-'},} 99 | # #800000 100 | # 'selfsupervised': {'color': '#99B181', 'linestyle': '-', 'name': 'Self-Supervised'}} 101 | 102 | # jco color palette 103 | # https://cran.r-project.org/web/packages/ggsci/vignettes/ggsci.html#non-ggplot2-graphics 104 | COLOR_MODEL_DICT = {'randomforest': {'color': '#003C67FF', 'linestyle': ':', 'name': 'Random Forest'}, 105 | 'logisticregression': {'color': '#7AA6DCFF', 'linestyle': ':', 'name': 'Logistic Regression'}, 106 | #'labelpropagation': {'color': '#3B3B3BFF', 'linestyle': ':', 'name': 'Label Propagation'}, 107 | 'supervised': {'color': '#0073C2FF', 'linestyle': ':', 'name': 'Supervised FCN'}, 108 | 'fully_supervised': {'color': 'black', 'linestyle': '--', 'name': 'Fully Labelled FCN'}, 109 | 'meanteacher': {'color': '#FFB2B2', 'linestyle': '-', 'name': 'Mean Teacher'}, 110 | 'mixmatch': {'color': '#CD534CFF', 'linestyle': '-', 'name': 'MixMatch'}, 111 | 'vat': {'color': '#EFC000FF', 'linestyle': '-', 'name': 'VAT'}, 112 | 'ladder': {'color': '#8F7700FF', 'linestyle': '-', 'name': 'Ladder'}, 113 | 'selfsupervised': {'color': '#6B8E23', 'linestyle': '-', 'name': 'Self-Supervised'}} 114 | 115 | COLOR_MODEL_DICT_SLIDES = {'supervised': {'color': '#0073C2FF', 'linestyle': ':', 'name': 'Supervised'}, 116 | 'fully_supervised': {'color': 'black', 'linestyle': '--', 'name': 'Fully Labelled'}, 117 | 'mixmatch': {'color': '#CD534CFF', 'linestyle': '-', 'name': 'Semi-Supervised'}} 118 | 119 | DATASET_NAMES_DICT = {'pamap2': 'Pamap2', 120 | 'wisdm': 'WISDM', 121 | 'sits': 'SITS', 122 | 'sits_hard_balanced': 'SITS', 123 | 'crop': 'Crop', 124 | 'fordb': 'FordB', 125 | 'electricdevices': 'Electric Devices'} 126 | 127 | SUPERVISED_BASELINE = {'pamap2': 0.98, 128 | 'wisdm': 0.995, 129 | 'sits': 0.97, 130 | 'sits_hard_balanced': 0.97, 131 | 'crop': 0.97, 132 | 'fordb': 0.85, 133 | 'electricdevices': 0.93} -------------------------------------------------------------------------------- /ssltsc/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Goschjann/ssltsc/08d6b1bf711bb1c8f19f9bfb66a98d4e423e932e/ssltsc/models/__init__.py -------------------------------------------------------------------------------- /ssltsc/models/basemodel.py: -------------------------------------------------------------------------------- 1 | """Base model class 2 | """ 3 | import pdb 4 | import numpy as np 5 | import torch 6 | import pandas as pd 7 | import os 8 | import tempfile 9 | 10 | from torch import nn, optim 11 | from abc import ABC, abstractmethod 12 | from matplotlib import pyplot as plt 13 | from torch.utils.data import DataLoader 14 | 15 | from ssltsc.models.utils import calculate_classification_metrics 16 | from ssltsc.visualization import store_reliability 17 | 18 | class BaseModel(ABC): 19 | """ABC for all deep learning models 20 | """ 21 | def __init__(self, backbone, backbone_dict, callbacks=None): 22 | self.backbone = backbone 23 | self.backbone_dict = backbone_dict 24 | self.history = pd.DataFrame() 25 | self.network = self.backbone(**self.backbone_dict) 26 | self.checkpoint_file = tempfile.NamedTemporaryFile() 27 | self.callbacks = callbacks if callbacks else [] 28 | self.es_step = 0 29 | if torch.cuda.is_available(): 30 | self.network.to(torch.device('cuda')) 31 | 32 | @abstractmethod 33 | def train(self, 34 | opt_dict, 35 | data_dict, 36 | model_params, 37 | exp_params, 38 | optimizer=optim.Adam): 39 | pass 40 | 41 | @abstractmethod 42 | def _validate_one_dataset(self): 43 | pass 44 | 45 | def validate(self, step, hp_dict, train_dataloader: DataLoader, val_dataloader: DataLoader = None, 46 | verbose=True): 47 | """ 48 | This method will validate two data_loaders (ie. train and val). It uses the _validate_one_dataset() 49 | for the actual metric and loss calculations. 50 | Args: 51 | step: 52 | The current step in the training process. 53 | hp_dict: 54 | dict of hyperparams we want to log each step 55 | train_dataloader: 56 | 57 | val_dataloader: 58 | verbose: 59 | plot_distribution: 60 | 61 | Returns: 62 | A history dict that contains metrics and losses and hparams. 63 | """ 64 | history_data = hp_dict 65 | history_data['step'] = int(step) 66 | for part, dataloader in {"train": train_dataloader, "val": val_dataloader}.items(): 67 | metrics = self._validate_one_dataset(dataloader, step) 68 | 69 | # Rename metrics for logging 70 | metrics = {part + "_" + metric: value for metric, value in metrics.items()} 71 | 72 | # Add data to history 73 | history_data.update(metrics) 74 | 75 | if not len(self.history.columns): # If the columns names are not set then set them 76 | self.history = self.history.reindex(columns=history_data.keys()) 77 | 78 | self.history = self.history.append(history_data, ignore_index=True) 79 | 80 | for callback in self.callbacks: 81 | callback.on_validation_end(step, history_data) 82 | 83 | return history_data 84 | 85 | 86 | def predict(self, data_loader): 87 | self.network.eval() 88 | 89 | yhat_list = [] 90 | y_true_list = [] 91 | 92 | for batch_idx, (X, Y) in enumerate(data_loader): 93 | if torch.cuda.is_available(): 94 | X = X.to(torch.device('cuda')) 95 | Yhat = self.network(X).detach().cpu() 96 | # weird checking due to deep label prop algorithm which also outputs certainty weights 97 | Y_out = Y.detach().cpu() if len(Y)!= 2 else Y[0].detach().cpu() 98 | 99 | yhat_list.append(Yhat) 100 | y_true_list.append(Y_out) 101 | 102 | Yhat_logits = torch.cat(yhat_list, dim=0) 103 | Y_out = torch.cat(y_true_list, dim=0) 104 | Yhat = Yhat_logits.softmax(1) 105 | return Yhat.numpy(), Yhat_logits.numpy(), Y_out.numpy() 106 | 107 | def embed(self, gen): 108 | 109 | def embed_batch(model, x): 110 | """DOKUMENTATION! 111 | """ 112 | for layer in model.children(): 113 | if layer._get_name() == 'Linear': 114 | break 115 | x = layer(x) 116 | return x 117 | 118 | self.network.eval() 119 | for batch_idx, (X, Y) in enumerate(gen): 120 | print('{}/{}'.format(batch_idx, len(gen))) 121 | if batch_idx == 0: 122 | V = embed_batch(model=self.network, x=X.to(torch.device('cuda'))).detach().cpu() 123 | # catch case where weights are added to train gen 124 | if len(Y) == 2: 125 | Y_out = Y[0].detach().cpu() 126 | else: 127 | Y_out = Y.detach().cpu() 128 | else: 129 | V = torch.cat((V, embed_batch(model=self.network, x=X.to(torch.device('cuda'))).detach().cpu()), dim=0) 130 | if len(Y) == 2: 131 | Y_out = torch.cat((Y_out, Y[0].detach().cpu()), dim=0) 132 | else: 133 | Y_out = torch.cat((Y_out, Y.detach().cpu()), dim=0) 134 | return V.numpy(), Y_out.numpy() 135 | 136 | def evaluate(self, data_loader: DataLoader, early_stopping: bool = False, plot_reliability: bool = False, model_name: str = None) -> dict: 137 | """ 138 | This method will calculate the metrics on a given data_loader. 139 | Ie. the losses are not calculated in this method 140 | Args: 141 | data_loader: 142 | DataLoader to calculate metrics on 143 | Returns: 144 | A dict of metrics 145 | """ 146 | if early_stopping: 147 | self.network = self.load_checkpoint() 148 | yhat_prob, _, y = self.predict(data_loader=data_loader) 149 | metrics = calculate_classification_metrics(yhat_prob, y) 150 | 151 | for callback in self.callbacks: 152 | callback.on_evaluation_end(metrics) 153 | 154 | if plot_reliability: 155 | store_reliability(y=y, yhat_prob=yhat_prob, model_name=model_name) 156 | 157 | return metrics 158 | 159 | def save_checkpoint(self, path: str = '', step: int = 1, verbose: bool = False): 160 | checkpoint = {'network': self.network, 161 | 'state_dict': self.network.state_dict(), 162 | 'step': step} 163 | # store checkpoint in random temp file (avoid issues training models in parallel) 164 | torch.save(checkpoint, self.checkpoint_file.name) 165 | print(f'Stored checkpoint at step {step}') 166 | 167 | def load_checkpoint(self, path: str = ''): 168 | checkpoint = torch.load(self.checkpoint_file.name) 169 | network = checkpoint['network'] 170 | # store early stopping step 171 | self.es_step = checkpoint['step'] 172 | print(f"Load Best Model from Step {self.es_step}") 173 | 174 | # overwrite network params with that of the checkpoint 175 | network.load_state_dict(checkpoint['state_dict']) 176 | for parameter in network.parameters(): 177 | parameter.requires_grad = False 178 | network.eval() 179 | 180 | # clear checkpoint file 181 | self.checkpoint_file.close() 182 | return network -------------------------------------------------------------------------------- /ssltsc/models/fixmatch.py: -------------------------------------------------------------------------------- 1 | """Implementation of supervised baseline model 2 | """ 3 | import math 4 | import mlflow 5 | import os 6 | import pdb 7 | import shutil 8 | import tempfile 9 | import torch 10 | from tqdm import tqdm 11 | from torch.utils.data import DataLoader 12 | from torch.nn.functional import cross_entropy 13 | 14 | from .basemodel import BaseModel 15 | from .utils import calculate_classification_metrics, get_cosine_schedule_with_warmup, \ 16 | interleave, de_interleave, accuracy, AverageMeter 17 | 18 | class Fixmatch(BaseModel): 19 | """Train backbone architecture supervised only as supervised baseline 20 | for ssl experiments 21 | """ 22 | def __init__(self, backbone, backbone_dict, callbacks=None): 23 | super().__init__(backbone=backbone, backbone_dict=backbone_dict, callbacks=callbacks) 24 | 25 | def load_checkpoint(self): 26 | checkpoint = torch.load(self.checkpoint_file.name) 27 | network = checkpoint['model'] 28 | self.es_step = checkpoint['step'] 29 | 30 | if self.use_ema: 31 | network.load_state_dict(checkpoint['ema_state_dict']) 32 | else: 33 | network.load_state_dict(checkpoint['state_dict']) 34 | for parameter in network.parameters(): 35 | parameter.requires_grad = False 36 | network.eval() 37 | 38 | self.checkpoint_file.close() 39 | return network 40 | 41 | def save_checkpoint(self, step): 42 | model_to_save = self.network.module if hasattr(self.network, "module") else self.network 43 | if self.use_ema: 44 | ema_to_save = self.ema_model.ema.module if hasattr(self.ema_model.ema, "module") else self.ema_model.ema 45 | 46 | checkpoint= { 47 | 'step': step, 48 | 'model': model_to_save, 49 | 'state_dict': model_to_save.state_dict(), 50 | 'ema_state_dict': ema_to_save.state_dict() if self.use_ema else None, 51 | } 52 | 53 | torch.save(checkpoint, self.checkpoint_file.name) 54 | print(f'Stored checkpoint at step {step}') 55 | 56 | def predict(self, data_loader): 57 | device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") 58 | 59 | if self.use_ema: 60 | eval_model = self.ema_model.ema 61 | else: 62 | eval_model = self.network 63 | 64 | yhat_list = [] 65 | y_true_list = [] 66 | with torch.no_grad(): 67 | for _, (X, Y) in enumerate(data_loader): 68 | eval_model.eval() 69 | 70 | X = X.to(device) 71 | Yhat = eval_model(X).detach().cpu() 72 | Y_out = Y.detach().cpu() 73 | 74 | yhat_list.append(Yhat) 75 | y_true_list.append(Y_out) 76 | 77 | Yhat_logits = torch.cat(yhat_list, dim=0) 78 | Y_out = torch.cat(y_true_list, dim=0) 79 | Yhat = Yhat_logits.softmax(1) 80 | return Yhat.numpy(), Yhat_logits.numpy(), Y_out.numpy() 81 | 82 | def _validate_one_dataset(self, data_loader: DataLoader, step: int): 83 | yhat_prob, yhat_logits, y = self.predict(data_loader) 84 | metrics = calculate_classification_metrics(yhat_prob, y) 85 | 86 | metrics['loss'] = cross_entropy(torch.tensor(yhat_logits), torch.tensor(y).long()).item() / len(y) 87 | 88 | return metrics 89 | 90 | def train(self, 91 | opt_dict, 92 | data_dict, 93 | model_params, 94 | exp_params, 95 | optimizer=torch.optim.Adam): 96 | """train the model for n_steps 97 | """ 98 | device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") 99 | self.use_ema = model_params['use_ema'] 100 | 101 | # optimizer and lr scheduler 102 | no_decay = ['bias', 'bn'] 103 | grouped_parameters = [ 104 | { 105 | 'params': [p for n, p in self.network.named_parameters() if not any(nd in n for nd in no_decay)], 106 | 'weight_decay': model_params['weight_decay'] 107 | }, 108 | { 109 | 'params': [p for n, p in self.network.named_parameters() if any(nd in n for nd in no_decay)], 110 | 'weight_decay': 0.0 111 | } 112 | ] 113 | optimizer = torch.optim.SGD(grouped_parameters, lr=model_params['lr'], momentum=0.9, nesterov=True) 114 | if exp_params['lr_scheduler'] == 'cosine': 115 | scheduler = get_cosine_schedule_with_warmup(optimizer, model_params['warmup_epochs'], exp_params['n_steps']) 116 | else: 117 | scheduler = None 118 | 119 | # ema model 120 | if self.use_ema: 121 | from .utils import ModelEMA 122 | self.ema_model = ModelEMA(self.network, model_params['ema_decay'], device) 123 | else: 124 | self.ema_model = None 125 | 126 | self.network.zero_grad() 127 | 128 | scaler = torch.cuda.amp.GradScaler() 129 | for step in tqdm(range(0, exp_params['n_steps'])): 130 | self.network.train() 131 | 132 | for cb in self.callbacks: 133 | cb.on_train_batch_start() 134 | 135 | try: 136 | inputs_x, targets_x = labelled_iter.next() 137 | except: 138 | labelled_iter = iter(data_dict['train_gen_l']) 139 | inputs_x, targets_x = labelled_iter.next() 140 | try: 141 | (inputs_u_w, inputs_u_s), _ = unlabelled_iter.next() 142 | except: 143 | unlabelled_iter = iter(data_dict['train_gen_ul']) 144 | (inputs_u_w, inputs_u_s), _ = unlabelled_iter.next() 145 | 146 | batch_size = inputs_x.shape[0] 147 | mu = inputs_u_w.shape[0] // batch_size 148 | 149 | with torch.cuda.amp.autocast(): 150 | inputs = interleave(torch.cat((inputs_x, inputs_u_w, inputs_u_s)), 2*mu+1).to(device) 151 | targets_x = targets_x.to(device) 152 | logits = self.network(inputs) 153 | logits = de_interleave(logits, 2*mu+1) 154 | logits_x = logits[:batch_size] 155 | logits_u_w, logits_u_s = logits[batch_size:].chunk(2) 156 | del logits 157 | 158 | Lx = cross_entropy(logits_x, targets_x, reduction='mean') 159 | train_acc = accuracy(logits_x, targets_x, topk=(1,))[0] 160 | 161 | pseudo_label = torch.softmax(logits_u_w.detach()/model_params['temp'], dim=-1) 162 | max_probs, targets_u = torch.max(pseudo_label, dim=-1) 163 | mask = max_probs.ge(model_params['threshold']).float() 164 | Lu = (cross_entropy(logits_u_s, targets_u, reduction='none') * mask).mean() 165 | 166 | loss = Lx + model_params['lambda_u'] * Lu 167 | 168 | scaler.scale(loss).backward() 169 | scaler.step(optimizer) 170 | scaler.update() 171 | if scheduler is not None: 172 | scheduler.step() 173 | lr = scheduler.get_last_lr()[0] 174 | else: 175 | lr = optimizer.param_groups[0]['lr'] 176 | 177 | if self.use_ema: 178 | self.ema_model.update(self.network) 179 | self.network.zero_grad() 180 | 181 | for cb in self.callbacks: 182 | cb.on_train_batch_end(step=step) 183 | 184 | if step % exp_params['val_steps'] == 0 and step > 0: 185 | best_metric = 0.0 if len(self.history) == 0 else max(self.history[exp_params['early_stopping_metric']]) 186 | metrics = self.validate(step=step, 187 | hp_dict={'lr': lr}, 188 | train_dataloader=data_dict['train_gen_val'], 189 | val_dataloader=data_dict['val_gen']) 190 | 191 | if exp_params['early_stopping'] and metrics[exp_params['early_stopping_metric']] > best_metric: 192 | self.save_checkpoint(step=step) -------------------------------------------------------------------------------- /ssltsc/models/losses.py: -------------------------------------------------------------------------------- 1 | """ 2 | losses used in sslts 3 | """ 4 | import numpy as np 5 | import torch 6 | import pdb 7 | import sklearn as sk 8 | 9 | from torch.nn import functional as F 10 | from numba import jit 11 | from torch.autograd import Function 12 | 13 | 14 | def mixup_cross_entropy(input_logits, targets): 15 | """cross entropy over 1hot-encoded (non binary) labels 16 | as required by mixup'ed labels 17 | """ 18 | loss = - torch.mean(torch.sum(targets * torch.log(input_logits.softmax(1)), 1)) 19 | return loss 20 | 21 | 22 | def softmax_mse_loss(input_logits, target_logits): 23 | """takes softmax on both sides and returns MSE loss 24 | Returns the sum over all examples. 25 | 26 | Arguments: 27 | input_logits -- student logits 28 | target_logits -- 'true' teacher logits 29 | 30 | Returns: 31 | loss -- the mse consistency loss 32 | """ 33 | assert input_logits.size() == target_logits.size() 34 | input_softmax = input_logits.softmax(1) 35 | target_softmax = target_logits.softmax(1) 36 | num_classes = input_logits.size()[1] 37 | loss = F.mse_loss(input_softmax, target_softmax, reduction='sum') / num_classes 38 | return loss 39 | 40 | 41 | def entropy_loss(y_ul): 42 | """Entropy regularization for VAT 43 | """ 44 | p = F.softmax(y_ul, dim=1) 45 | return -(p * F.log_softmax(y_ul, dim=1)).sum(dim=1).mean(dim=0) 46 | 47 | 48 | 49 | ########## 50 | # Soft dtw loss as proposed by cuturi et al 2017 51 | # implement by https://github.com/Sleepwalking/pytorch-softdtw 52 | ########## 53 | 54 | 55 | @jit(nopython = True) 56 | def compute_softdtw(D, gamma): 57 | B = D.shape[0] 58 | N = D.shape[1] 59 | M = D.shape[2] 60 | R = np.ones((B, N + 2, M + 2)) * np.inf 61 | R[:, 0, 0] = 0 62 | for k in range(B): 63 | for j in range(1, M + 1): 64 | for i in range(1, N + 1): 65 | r0 = -R[k, i - 1, j - 1] / gamma 66 | r1 = -R[k, i - 1, j] / gamma 67 | r2 = -R[k, i, j - 1] / gamma 68 | rmax = max(max(r0, r1), r2) 69 | rsum = np.exp(r0 - rmax) + np.exp(r1 - rmax) + np.exp(r2 - rmax) 70 | softmin = - gamma * (np.log(rsum) + rmax) 71 | R[k, i, j] = D[k, i - 1, j - 1] + softmin 72 | return R 73 | 74 | @jit(nopython = True) 75 | def compute_softdtw_backward(D_, R, gamma): 76 | B = D_.shape[0] 77 | N = D_.shape[1] 78 | M = D_.shape[2] 79 | D = np.zeros((B, N + 2, M + 2)) 80 | E = np.zeros((B, N + 2, M + 2)) 81 | D[:, 1:N + 1, 1:M + 1] = D_ 82 | E[:, -1, -1] = 1 83 | R[:, : , -1] = -np.inf 84 | R[:, -1, :] = -np.inf 85 | R[:, -1, -1] = R[:, -2, -2] 86 | for k in range(B): 87 | for j in range(M, 0, -1): 88 | for i in range(N, 0, -1): 89 | a0 = (R[k, i + 1, j] - R[k, i, j] - D[k, i + 1, j]) / gamma 90 | b0 = (R[k, i, j + 1] - R[k, i, j] - D[k, i, j + 1]) / gamma 91 | c0 = (R[k, i + 1, j + 1] - R[k, i, j] - D[k, i + 1, j + 1]) / gamma 92 | a = np.exp(a0) 93 | b = np.exp(b0) 94 | c = np.exp(c0) 95 | E[k, i, j] = E[k, i + 1, j] * a + E[k, i, j + 1] * b + E[k, i + 1, j + 1] * c 96 | return E[:, 1:N + 1, 1:M + 1] 97 | 98 | class _SoftDTW(Function): 99 | @staticmethod 100 | def forward(ctx, D, gamma): 101 | dev = D.device 102 | dtype = D.dtype 103 | gamma = torch.Tensor([gamma]).to(dev).type(dtype) 104 | D_ = D.detach().cpu().numpy() 105 | g_ = gamma.item() 106 | R = torch.Tensor(compute_softdtw(D_, g_)).to(dev).type(dtype) 107 | ctx.save_for_backward(D, R, gamma) 108 | return R[:, -2, -2] 109 | 110 | @staticmethod 111 | def backward(ctx, grad_output): 112 | dev = grad_output.device 113 | dtype = grad_output.dtype 114 | D, R, gamma = ctx.saved_tensors 115 | D_ = D.detach().cpu().numpy() 116 | R_ = R.detach().cpu().numpy() 117 | g_ = gamma.item() 118 | E = torch.Tensor(compute_softdtw_backward(D_, R_, g_)).to(dev).type(dtype) 119 | return grad_output.view(-1, 1, 1).expand_as(E) * E, None 120 | 121 | 122 | class SoftDTW(torch.nn.Module): 123 | def __init__(self, gamma=1.0, normalize=False): 124 | super(SoftDTW, self).__init__() 125 | self.normalize = normalize 126 | self.gamma = gamma 127 | self.func_dtw = _SoftDTW.apply 128 | 129 | def calc_distance_matrix(self, x, y): 130 | n = x.size(1) 131 | m = y.size(1) 132 | d = x.size(2) 133 | x = x.unsqueeze(2).expand(-1, n, m, d) 134 | y = y.unsqueeze(1).expand(-1, n, m, d) 135 | dist = torch.pow(x - y, 2).sum(3) 136 | return dist 137 | 138 | def forward(self, x, y): 139 | assert len(x.shape) == len(y.shape) 140 | squeeze = False 141 | if len(x.shape) < 3: 142 | x = x.unsqueeze(0) 143 | y = y.unsqueeze(0) 144 | squeeze = True 145 | if self.normalize: 146 | D_xy = self.calc_distance_matrix(x, y) 147 | out_xy = self.func_dtw(D_xy, self.gamma) 148 | D_xx = self.calc_distance_matrix(x, x) 149 | out_xx = self.func_dtw(D_xx, self.gamma) 150 | D_yy = self.calc_distance_matrix(y, y) 151 | out_yy = self.func_dtw(D_yy, self.gamma) 152 | # distance 153 | result = out_xy - (1 / 2) * (out_xx + out_yy) 154 | else: 155 | D_xy = self.calc_distance_matrix(x, y) 156 | out_xy = self.func_dtw(D_xy, self.gamma) 157 | # discrepancy 158 | result = out_xy 159 | return result.squeeze(0) if squeeze else result 160 | 161 | 162 | def rbf_kernel_safe(X, Y=None, gamma=None): 163 | """Different rbf_kernel function which avoids returning of 0.0 distances 164 | Important for label prop/ spreading algorithms as semi-supervised baseline 165 | """ 166 | X, Y = sk.metrics.pairwise.check_pairwise_arrays(X, Y) 167 | if gamma is None: 168 | gamma = 1.0 / X.shape[1] 169 | 170 | K = sk.metrics.pairwise.euclidean_distances(X, Y, squared=True) 171 | K = K * (-1 * gamma) 172 | K -= K.max() 173 | np.exp(K, K) 174 | return K -------------------------------------------------------------------------------- /ssltsc/models/meanteacher.py: -------------------------------------------------------------------------------- 1 | """Code for Mean teacher model 2 | """ 3 | import torch.autograd 4 | import numpy as np 5 | import pandas as pd 6 | import datetime 7 | 8 | import torch 9 | import os 10 | import pdb 11 | import datetime 12 | import time 13 | import cProfile 14 | import torch.nn.functional as F 15 | 16 | from itertools import cycle 17 | from torch import nn, optim 18 | from torch.optim.lr_scheduler import CyclicLR, CosineAnnealingLR 19 | from sklearn.metrics import log_loss 20 | from matplotlib import pyplot as plt 21 | from torch.utils.data import DataLoader 22 | 23 | from .utils import ema_update, SigmoidScheduler, rampup 24 | from .losses import softmax_mse_loss 25 | from .basemodel import BaseModel 26 | from ssltsc.models.utils import calculate_classification_metrics 27 | from ssltsc.visualization import store_reliability 28 | 29 | torch.set_default_dtype(torch.float32) 30 | 31 | class MeanTeacher(BaseModel): 32 | """Mean Teacher model class 33 | 34 | Args: 35 | backbone: {nn.Module} 36 | 37 | """ 38 | def __init__(self, backbone, backbone_dict, callbacks=None): 39 | super().__init__(backbone=backbone, backbone_dict=backbone_dict, callbacks=callbacks) 40 | self.student = backbone(**backbone_dict) 41 | self.teacher = backbone(**backbone_dict) 42 | self.network = None # The network will be set throughout the train loop 43 | if torch.cuda.is_available(): 44 | self.student.to(torch.device('cuda')) 45 | self.teacher.to(torch.device('cuda')) 46 | 47 | def _validate_one_dataset(self, data_loader: DataLoader, step: int): 48 | """ 49 | Helper method that 50 | Args: 51 | data_loader: 52 | DataLoader to calculate metrics and losses on. 53 | step: 54 | The step in the trainin loop. This is useds for the loss calculation. 55 | 56 | Returns: 57 | The dict of metrics and the average total loss and average 58 | reconstructions losses over the batches. 59 | """ 60 | all_metrics = {} 61 | for submodel in ['student', 'teacher']: 62 | yhat_prob, yhat_logits, y = self.predict(data_loader, which=submodel) 63 | # calculate general metrics 64 | metrics = calculate_classification_metrics(yhat_prob, y) 65 | 66 | # calculate the 'originally' reduced loss 67 | logloss = nn.CrossEntropyLoss(weight=None, reduction='sum') 68 | 69 | metrics['loss'] = logloss(torch.tensor(yhat_logits), torch.tensor(y)).item() / len(data_loader.dataset) 70 | if submodel == 'teacher': 71 | all_metrics.update({k: v for k, v in metrics.items()}) 72 | elif submodel == 'student': 73 | all_metrics.update({submodel + '_' + k: v for k, v in metrics.items()}) 74 | 75 | return all_metrics 76 | 77 | def train(self, 78 | opt_dict, 79 | data_dict, 80 | model_params, 81 | exp_params, 82 | optimizer=optim.Adam): 83 | 84 | # objective function 85 | # ignore all -1 labels in the loss computation 86 | objective_sup = nn.CrossEntropyLoss(reduction='sum', ignore_index=-1) 87 | optimizer = optimizer(self.student.parameters(), **opt_dict) 88 | 89 | if torch.cuda.is_available(): 90 | self.student.to(torch.device('cuda')) 91 | self.teacher.to(torch.device('cuda')) 92 | 93 | # detach teacher from gradient flow 94 | for param in self.teacher.parameters(): 95 | param.detach_() 96 | 97 | if exp_params['lr_scheduler'] == 'cosine': 98 | scheduler = CosineAnnealingLR(optimizer=optimizer, 99 | eta_min=0.0, 100 | T_max=exp_params['n_steps'] * 1.2) 101 | elif exp_params['lr_scheduler'] == 'sigmoid': 102 | scheduler = SigmoidScheduler(optimizer=optimizer, 103 | rampup_length=exp_params['rampup_length']) 104 | else: 105 | scheduler = None 106 | 107 | self.student.train() 108 | self.teacher.train() 109 | 110 | track_class_loss, track_cons_loss = 0, 0 111 | 112 | scaler = torch.cuda.amp.GradScaler() 113 | 114 | for step in range(exp_params['n_steps']): 115 | try: 116 | (X_stud, X_teach), Y = next(train_gen_iter) 117 | except: 118 | train_gen_iter = iter(data_dict['train_gen_l']) 119 | (X_stud, X_teach), Y = next(train_gen_iter) 120 | 121 | with torch.cuda.amp.autocast(): 122 | if torch.cuda.is_available(): 123 | X_stud = X_stud.to(torch.device('cuda')) 124 | X_teach = X_teach.to(torch.device('cuda')) 125 | Y = Y.to(torch.device('cuda')) 126 | 127 | yhat_all_stud = self.student(X_stud) 128 | with torch.no_grad(): 129 | yhat_all_teach = self.teacher(X_teach) 130 | minibatch_size = len(Y) 131 | 132 | # objective_sup discards -1 labeled data 133 | loss_sup = objective_sup(yhat_all_stud, Y) / minibatch_size 134 | # combine losses 135 | beta = model_params['max_w'] * rampup(current=step, 136 | rampup_length=model_params['rampup_length']) 137 | # mse over the predictions on all samples 138 | # softmax happens inside the loss 139 | # consistency loss 140 | loss_cons = beta * softmax_mse_loss(yhat_all_stud, yhat_all_teach) / minibatch_size 141 | loss = loss_sup + loss_cons 142 | track_class_loss += loss_sup.item() 143 | track_cons_loss += loss_cons.item() 144 | 145 | # update student 146 | optimizer.zero_grad() 147 | scaler.scale(loss).backward() 148 | scaler.step(optimizer) 149 | scaler.update() 150 | # update teacher via exponential moving average 151 | ema_update(student=self.student, 152 | teacher=self.teacher, 153 | alpha=model_params['alpha_ema'], 154 | verbose=False) 155 | 156 | if scheduler is not None: 157 | scheduler.step() 158 | lr = scheduler.get_last_lr()[0] 159 | else: 160 | lr = model_params['lr'] 161 | # validation 162 | if step % exp_params['val_steps'] == 0 and step > 0: 163 | best_metric = 0.0 if len(self.history) == 0 else max(self.history[exp_params['early_stopping_metric']]) 164 | metrics = self.validate(step=step, 165 | hp_dict={'beta': beta, 'lr': lr}, 166 | train_dataloader=data_dict['train_gen_val'], 167 | val_dataloader=data_dict['val_gen']) 168 | 169 | # early stopping 170 | self.network = self.teacher 171 | if exp_params['early_stopping'] and metrics[exp_params['early_stopping_metric']] > best_metric: 172 | self.save_checkpoint(step=step, verbose=True) 173 | 174 | # Training is over 175 | for callback in self.callbacks: 176 | callback.on_train_end(self.history) 177 | 178 | def evaluate(self, data_loader: DataLoader, early_stopping: bool = False, which: str = 'teacher', plot_reliability: bool =False, model_name: str = 'meanteacher') -> dict: 179 | """ 180 | This method will calculate the metrics on a given data_loader. 181 | Ie. the losses are not calculated in this method 182 | Args: 183 | data_loader: 184 | DataLoader to calculate metrics on 185 | Returns: 186 | A dict of metrics 187 | """ 188 | self.network = self.teacher if which == 'teacher' else self.student 189 | if early_stopping: 190 | self.network = self.load_checkpoint() 191 | yhat_prob, _, y = self.predict(data_loader=data_loader) 192 | metrics = calculate_classification_metrics(yhat_prob, y) 193 | 194 | for callback in self.callbacks: 195 | callback.on_evaluation_end(metrics) 196 | 197 | if plot_reliability: 198 | store_reliability(y=y, yhat_prob=yhat_prob, model_name=model_name) 199 | 200 | return metrics 201 | 202 | def predict(self, data_loader: DataLoader, which='student'): 203 | self.network = self.student if which == 'student' else self.teacher 204 | # inherit predict method from ABC class 205 | preds = super().predict(data_loader=data_loader) 206 | self.network = None 207 | return preds 208 | 209 | def print_arch(self): 210 | print(self.student) 211 | 212 | 213 | # timing decorator 214 | def timeit(method): 215 | def timed(*args, **kw): 216 | ts = time.time() 217 | result = method(*args, **kw) 218 | te = time.time() 219 | print('%r %2.2f ms' % (method.__name__, (te - ts) * 1000)) 220 | return result 221 | return timed 222 | 223 | @timeit 224 | def printer(): 225 | for i in range(1000): 226 | a = i**2 - 98 227 | print('done') 228 | -------------------------------------------------------------------------------- /ssltsc/models/model_factory.py: -------------------------------------------------------------------------------- 1 | from ssltsc.models.ladder import LadderNet 2 | from ssltsc.models.supervised import Supervised 3 | from ssltsc.models.selfsupervised import SelfSupervised 4 | from ssltsc.models.mixmatch import MixMatch 5 | from ssltsc.models.meanteacher import MeanTeacher 6 | from ssltsc.models.vat import VAT 7 | from ssltsc.models.fixmatch import Fixmatch 8 | 9 | MODEL_DICT = {'supervised': Supervised, 10 | 'vat': VAT, 11 | 'mixmatch': MixMatch, 12 | 'meanteacher': MeanTeacher, 13 | 'ladder': LadderNet, 14 | 'selfsupervised': SelfSupervised, 15 | 'fixmatch': Fixmatch} 16 | 17 | 18 | def model_factory(model_name, backbone, backbone_dict, callbacks): 19 | """Create a model instance 20 | 21 | Args: 22 | model_name (str): name of the model 23 | backbone (BaseModel): backbone class 24 | backbone_dict (dict): backbone dictionary to instantiate above class 25 | callbacks (list): list of callbacks 26 | 27 | Returns: 28 | nn.model: model instance 29 | """ 30 | return MODEL_DICT[model_name](backbone=backbone, 31 | backbone_dict=backbone_dict, 32 | callbacks=callbacks) 33 | -------------------------------------------------------------------------------- /ssltsc/models/selfsupervised.py: -------------------------------------------------------------------------------- 1 | """Implementation of the self-supervised model by Jawed et al. 2020 2 | """ 3 | import torch.autograd 4 | import pandas as pd 5 | import time 6 | import torch 7 | import pdb 8 | import numpy as np 9 | 10 | from torch import nn, optim 11 | from torch.optim.lr_scheduler import CosineAnnealingLR 12 | from tslearn.barycenters import softdtw_barycenter 13 | from .losses import mixup_cross_entropy 14 | from .basemodel import BaseModel 15 | from torch.utils.data import DataLoader 16 | 17 | from .utils import calculate_classification_metrics 18 | 19 | class SelfSupervised(BaseModel): 20 | """Train backbone architecture supervised only as supervised baseline 21 | for ssl experiments 22 | """ 23 | def __init__(self, backbone, backbone_dict, callbacks=None): 24 | super().__init__(backbone=backbone, backbone_dict=backbone_dict, callbacks=callbacks) 25 | 26 | def _validate_one_dataset(self, data_loader: DataLoader, step: int): 27 | """ 28 | Helper method that 29 | Args: 30 | data_loader: 31 | DataLoader to calculate metrics and losses on. 32 | step: 33 | The step in the trainin loop. This is useds for the loss calculation. 34 | 35 | Returns: 36 | The dict of metrics and the average total loss and average 37 | reconstructions losses over the batches. 38 | """ 39 | yhat_prob, yhat_logits, y = self.predict(data_loader) 40 | # calculate general metrics 41 | metrics = calculate_classification_metrics(yhat_prob, y) 42 | 43 | # calculate the 'originally' reduced loss 44 | logloss = nn.CrossEntropyLoss(weight=None, reduction='sum') 45 | 46 | metrics['loss'] = logloss(torch.tensor(yhat_logits), torch.tensor(y)).item() / len(data_loader.dataset) 47 | 48 | return metrics 49 | 50 | def train(self, 51 | opt_dict, 52 | data_dict, 53 | model_params, 54 | exp_params, 55 | optimizer=optim.Adam): 56 | """train the model for n_steps 57 | """ 58 | 59 | assert data_dict['train_gen_forecast'] is not None, "You need to give me a forecasting data loader" 60 | 61 | # objective functions for supervised and self-supervised loss 62 | objective_sup = nn.CrossEntropyLoss(reduction='mean') 63 | 64 | objective_forecast = nn.MSELoss(reduction='mean') 65 | 66 | optimizer = optimizer(self.network.parameters(), **opt_dict) 67 | 68 | if exp_params['lr_scheduler'] == 'cosine': 69 | scheduler = CosineAnnealingLR(optimizer=optimizer, 70 | eta_min=0.0, 71 | T_max=exp_params['n_steps'] * 1.2) 72 | else: 73 | scheduler = None 74 | 75 | scaler = torch.cuda.amp.GradScaler() 76 | 77 | train_tracking_loss = train_cl_loss = train_fc_loss = 0.0 78 | for step in range(exp_params['n_steps']): 79 | self.network.train() 80 | 81 | for cb in self.callbacks: 82 | cb.on_train_batch_start() 83 | 84 | # get the classification data 85 | try: 86 | X, Y = next(train_gen_classification_iter) 87 | except: 88 | train_gen_classification_iter = iter(data_dict['train_gen_l']) 89 | X, Y = next(train_gen_classification_iter) 90 | 91 | # get the forecasting data 92 | try: 93 | X_fc, Y_fc = next(train_gen_forecast_iter) 94 | except: 95 | train_gen_forecast_iter = iter(data_dict['train_gen_forecast']) 96 | X_fc, Y_fc = next(train_gen_forecast_iter) 97 | 98 | optimizer.zero_grad() 99 | 100 | with torch.cuda.amp.autocast(): 101 | if torch.cuda.is_available(): 102 | X = X.to(torch.device('cuda')) 103 | Y = Y.to(torch.device('cuda')) 104 | X_fc = X_fc.to(torch.device('cuda')) 105 | Y_fc = Y_fc.to(torch.device('cuda')) 106 | 107 | Yhat_cl, Yhat_fc = self.network.forward_train(X, X_fc) 108 | 109 | loss_cl = objective_sup(Yhat_cl, Y) 110 | loss_fc = objective_forecast(Yhat_fc, Y_fc) 111 | loss = loss_cl + model_params['lambda'] * loss_fc 112 | 113 | # log losses 114 | train_tracking_loss += loss.item() 115 | train_cl_loss += loss_cl.item() 116 | train_fc_loss += loss_fc.item() 117 | 118 | scaler.scale(loss).backward() 119 | scaler.step(optimizer) 120 | scaler.update() 121 | if scheduler is not None: 122 | scheduler.step() 123 | lr = scheduler.get_last_lr()[0] 124 | else: 125 | lr = optimizer.param_groups[0]['lr'] 126 | 127 | for cb in self.callbacks: 128 | cb.on_train_batch_end(step=step) 129 | 130 | if step % exp_params['val_steps'] == 0 and step > 0: 131 | best_metric = 0.0 if len(self.history) == 0 else max(self.history[exp_params['early_stopping_metric']]) 132 | print(f'Step {step}, loss {round(loss.item(), 5)}, class loss {round(loss_cl.item(), 5)}, forecast loss {round(loss_fc.item(), 5)}') 133 | metrics = self.validate(step=step, 134 | hp_dict={'lr': lr, 135 | 'train_tracking_loss_cl': train_cl_loss / exp_params['val_steps'], 136 | 'train_tracking_loss_fc': train_fc_loss / exp_params['val_steps'], 137 | 'train_tracking_loss': train_tracking_loss / exp_params['val_steps']}, 138 | train_dataloader=data_dict['train_gen_val'], 139 | val_dataloader=data_dict['val_gen']) 140 | 141 | # early stopping 142 | if exp_params['early_stopping'] and metrics[exp_params['early_stopping_metric']] > best_metric: 143 | self.save_checkpoint(step=step, verbose=True) 144 | 145 | train_tracking_loss = train_cl_loss = train_fc_loss = 0.0 146 | 147 | # Training is over 148 | for callback in self.callbacks: 149 | callback.on_train_end(self.history) 150 | -------------------------------------------------------------------------------- /ssltsc/models/supervised.py: -------------------------------------------------------------------------------- 1 | """Implementation of supervised baseline model 2 | """ 3 | import torch.autograd 4 | import pandas as pd 5 | import time 6 | import torch 7 | import pdb 8 | import numpy as np 9 | 10 | from torch import nn, optim 11 | from torch.optim.lr_scheduler import CosineAnnealingLR 12 | from tslearn.barycenters import softdtw_barycenter 13 | from .losses import mixup_cross_entropy 14 | from .basemodel import BaseModel 15 | from torch.utils.data import DataLoader 16 | 17 | from .utils import calculate_classification_metrics 18 | 19 | class Supervised(BaseModel): 20 | """Train backbone architecture supervised only as supervised baseline 21 | for ssl experiments 22 | """ 23 | def __init__(self, backbone, backbone_dict, callbacks=None): 24 | super().__init__(backbone=backbone, backbone_dict=backbone_dict, callbacks=callbacks) 25 | 26 | def _indices_to_one_hot(self, data, n_classes): 27 | """Convert an iterable of indices to one-hot encoded labels 28 | Args: 29 | data: {np.array} output array with the respective class 30 | n_classes: {int} number of classes to one-hot encoded 31 | Returns: 32 | {np.array} a one-hot encoded array 33 | """ 34 | targets = data.reshape(-1) 35 | return torch.eye(n_classes)[targets] 36 | 37 | def _mixup(self, x1, x2, y1, y2, alpha=0.75, dtw=False, shuffle=False): 38 | """Mixup of two data points 39 | yields an interpolated mixture of both input samples 40 | """ 41 | # shuffle (x2, y2) if x1=x2 42 | if torch.all(y1==y2).item(): 43 | rand_idx = np.random.choice(a=np.arange(len(y1)), size=len(y1), replace=False) 44 | x2 = x2[rand_idx, ] 45 | y2 = y2[rand_idx] 46 | beta = np.random.beta(alpha, alpha) 47 | beta = max([beta, 1 - beta]) 48 | if dtw: 49 | x = torch.empty(x1.shape) 50 | w1 = max([beta, 1 - beta]) 51 | w = [w1, 1 - w1] 52 | for i in range(x.shape[0]): 53 | x[i, 0, :] = torch.tensor(softdtw_barycenter(X=[x1[i, 0, :].cpu(), x2[i, 0, :].cpu()], weights=w)[:, 0]) 54 | y = beta * y1 + (1 - beta) * y2 55 | return x.to(torch.device('cuda')), y 56 | else: 57 | x = beta * x1 + (1 - beta) * x2 58 | y = beta * y1 + (1 - beta) * y2 59 | return x, y 60 | 61 | def _validate_one_dataset(self, data_loader: DataLoader, step: int): 62 | """ 63 | Helper method that 64 | Args: 65 | data_loader: 66 | DataLoader to calculate metrics and losses on. 67 | step: 68 | The step in the trainin loop. This is useds for the loss calculation. 69 | 70 | Returns: 71 | The dict of metrics and the average total loss and average 72 | reconstructions losses over the batches. 73 | """ 74 | yhat_prob, yhat_logits, y = self.predict(data_loader) 75 | # calculate general metrics 76 | metrics = calculate_classification_metrics(yhat_prob, y) 77 | 78 | # calculate the 'originally' reduced loss 79 | logloss = nn.CrossEntropyLoss(weight=None, reduction='sum') 80 | 81 | metrics['loss'] = logloss(torch.tensor(yhat_logits), torch.tensor(y).long()).item() / len(data_loader.dataset) 82 | 83 | return metrics 84 | 85 | def train(self, 86 | opt_dict, 87 | data_dict, 88 | model_params, 89 | exp_params, 90 | optimizer=optim.Adam): 91 | """train the model for n_steps 92 | """ 93 | # objective functions for both losses 94 | # different loss if mixup training: 95 | objective_sup = nn.CrossEntropyLoss(reduction='mean', ignore_index=-1) 96 | 97 | optimizer = optimizer(self.network.parameters(), **opt_dict) 98 | 99 | if exp_params['lr_scheduler'] == 'cosine': 100 | scheduler = CosineAnnealingLR(optimizer=optimizer, 101 | eta_min=0.0, 102 | T_max=exp_params['n_steps'] * 1.2) 103 | else: 104 | scheduler = None 105 | 106 | scaler = torch.cuda.amp.GradScaler() 107 | 108 | for step in range(exp_params['n_steps']): 109 | self.network.train() 110 | 111 | for cb in self.callbacks: 112 | cb.on_train_batch_start() 113 | 114 | try: 115 | X, Y = next(train_gen_iter) 116 | except: 117 | train_gen_iter = iter(data_dict['train_gen_l']) 118 | X, Y = next(train_gen_iter) 119 | 120 | if model_params['mixup']: 121 | # labels to 1hot vectors 122 | Y = self._indices_to_one_hot(data=Y, n_classes=train_gen.dataset.nclasses) # FIXME: The train_gen is not defined?? 123 | X, Y = self._mixup(x1=X, x2=X, y1=Y, y2=Y, shuffle=True) 124 | optimizer.zero_grad() 125 | 126 | # casts operations to mixed precision 127 | with torch.cuda.amp.autocast(): 128 | if torch.cuda.is_available(): 129 | X = X.to(torch.device('cuda')) 130 | Y = Y.to(torch.device('cuda')) 131 | 132 | Yhat = self.network(X) 133 | if model_params['mixup']: 134 | loss = mixup_cross_entropy(Yhat, Y.long()) 135 | else: 136 | loss = objective_sup(Yhat, Y.long()) 137 | 138 | # scales the loss, and calls backward() to create scaled gradients 139 | scaler.scale(loss).backward() 140 | # unscales gradients and calls optimizer.step() 141 | scaler.step(optimizer) 142 | # updates the scale for next iteration 143 | scaler.update() 144 | 145 | if scheduler is not None: 146 | scheduler.step() 147 | lr = scheduler.get_last_lr()[0] 148 | else: 149 | lr = optimizer.param_groups[0]['lr'] 150 | 151 | for cb in self.callbacks: 152 | cb.on_train_batch_end(step=step) 153 | 154 | if step % exp_params['val_steps'] == 0 and step > 0: 155 | best_metric = 0.0 if len(self.history) == 0 else max(self.history[exp_params['early_stopping_metric']]) 156 | metrics = self.validate(step=step, 157 | hp_dict={'lr': lr}, 158 | train_dataloader=data_dict['train_gen_val'], 159 | val_dataloader=data_dict['val_gen']) 160 | 161 | # early stopping 162 | if exp_params['early_stopping'] and metrics[exp_params['early_stopping_metric']] > best_metric: 163 | self.save_checkpoint(step=step, verbose=True) 164 | 165 | # Training is over 166 | for callback in self.callbacks: 167 | callback.on_train_end(self.history) 168 | -------------------------------------------------------------------------------- /ssltsc/models/utils.py: -------------------------------------------------------------------------------- 1 | """Utility functions for submodule 'models' 2 | """ 3 | from copy import deepcopy 4 | import math 5 | import numpy as np 6 | import pandas as pd 7 | import pdb 8 | import torch 9 | 10 | from sklearn.metrics import log_loss, roc_auc_score, f1_score 11 | from uncertainty_metrics.numpy import ece 12 | 13 | def ema_update(student, teacher, alpha=0.9, verbose=False): 14 | """Update a teacher model based on the exponential moving average 15 | of its weights and that of the current studen model. 16 | 17 | Controlled by alpha \\in [0, 1] with 18 | * alpha -> 1: teacher = past teacher 19 | * alpha -> 0: teacher = student, std SGD training 20 | Args: 21 | student: the student model 22 | teacher: the teacher 23 | alpha: ema alpha rate 24 | verbose: {bool} for checking: with alpha = 0.0 this should print True 25 | only as weights from both models should be equal 26 | """ 27 | for teacher_param, student_param in zip(teacher.parameters(), student.parameters()): 28 | # alpha * theta'_t-1 + (1-a) * theta_t 29 | teacher_param.data.mul_(alpha).add_(student_param.data, alpha=1 - alpha) 30 | if verbose: 31 | print(teacher_param.data.equal(student_param.data)) 32 | 33 | 34 | class SigmoidScheduler: 35 | """"sigmoid rampup for learning rate as used in the 36 | mean teacher implement 37 | """ 38 | def __init__(self, optimizer, rampup_length): 39 | self.optimizer = optimizer 40 | self.rampup_length = rampup_length 41 | self.counter = 0 42 | self.init_lr = optimizer.param_groups[0]['lr'] 43 | self.last_lr = 0.0 44 | 45 | def step(self): 46 | self.optimizer.param_groups[0]['lr'] = self.init_lr * rampup(self.counter, self.rampup_length) 47 | self.counter += 1 48 | self.last_lr = self.optimizer.param_groups[0]['lr'] 49 | 50 | def get_last_lr(self): 51 | return [self.last_lr] 52 | 53 | 54 | def rampup(current, rampup_length): 55 | """sigmoid rampup 56 | """ 57 | if current < rampup_length: 58 | p = max(0.0, float(current)) / float(rampup_length) 59 | p = 1.0 - p 60 | return float(np.exp(-p * p * 5.0)) 61 | else: 62 | return 1.0 63 | 64 | 65 | def linear_rampup(step, rampup_length=10): 66 | """linear rampup factor for the mixmatch model 67 | step = current step 68 | rampup_length = amount of steps till final weight 69 | """ 70 | if rampup_length == 0: 71 | return 1.0 72 | else: 73 | return float(np.clip(step / rampup_length, 0, 1)) 74 | 75 | 76 | def calculate_classification_metrics(pred_prob_y, true_y) -> dict: 77 | """ 78 | Wrapper to calculate all kinds of classification metrics 79 | which are then passed to the (mlflow) logger 80 | Args: 81 | pred_prob_y: 82 | true_y: 83 | Returns: 84 | A dictionary of metrics. 85 | """ 86 | assert pred_prob_y[:, 0].shape == true_y.shape 87 | idx_labelled = np.where(true_y != -1)[0] 88 | pred_prob_y = pred_prob_y[idx_labelled] 89 | true_y = true_y[idx_labelled] 90 | yhat_hard = pred_prob_y.argmax(axis=1) 91 | 92 | # catch the binary case 93 | if pred_prob_y.shape[1] == 2: 94 | pred_prob_y = pred_prob_y[:, 1] 95 | metrics = {} 96 | # explicitly add list of possible labels in case of too small batch sizes 97 | # catch binary case as well 98 | labels = np.arange(pred_prob_y.shape[1]) if len(pred_prob_y.shape) > 1 else np.arange(2) 99 | metrics['ece'] = ece(labels=true_y, probs=pred_prob_y, num_bins=30) 100 | metrics['accuracy'] = sum(yhat_hard == true_y) / len(true_y) 101 | metrics['cross_entropy'] = log_loss(y_true=true_y, y_pred=pred_prob_y, labels=labels) 102 | metrics['weighted_auc'] = roc_auc_score(y_true=true_y, y_score=pred_prob_y, average='weighted', multi_class='ovr', labels=labels) 103 | metrics['macro_auc'] = roc_auc_score(y_true=true_y, y_score=pred_prob_y, average='macro', multi_class='ovo', labels=labels) 104 | metrics['macro_f1'] = f1_score(y_true=true_y, y_pred=yhat_hard, average='macro', labels=labels) 105 | metrics['micro_f1'] = f1_score(y_true=true_y, y_pred=yhat_hard, average='micro', labels=labels) 106 | metrics['weighted_f1'] = f1_score(y_true=true_y, y_pred=yhat_hard, average='weighted', labels=labels) 107 | 108 | return metrics 109 | 110 | 111 | def get_cosine_schedule_with_warmup(optimizer, 112 | num_warmup_steps, 113 | num_training_steps, 114 | num_cycles=7./16., 115 | last_epoch=-1): 116 | def _lr_lambda(current_step): 117 | if current_step < num_warmup_steps: 118 | return float(current_step) / float(max(1, num_warmup_steps)) 119 | no_progress = float(current_step - num_warmup_steps) / \ 120 | float(max(1, num_training_steps - num_warmup_steps)) 121 | return max(0., math.cos(math.pi * num_cycles * no_progress)) 122 | 123 | return torch.optim.lr_scheduler.LambdaLR(optimizer, _lr_lambda, last_epoch) 124 | 125 | 126 | class ModelEMA(object): 127 | def __init__(self, model, ema_decay, device): 128 | self.ema = deepcopy(model) 129 | self.ema.to(device) 130 | self.ema.eval() 131 | self.decay = ema_decay 132 | self.ema_has_module = hasattr(self.ema, 'module') 133 | # Fix EMA. https://github.com/valencebond/FixMatch_pytorch thank you! 134 | self.param_keys = [k for k, _ in self.ema.named_parameters()] 135 | self.buffer_keys = [k for k, _ in self.ema.named_buffers()] 136 | for p in self.ema.parameters(): 137 | p.requires_grad_(False) 138 | 139 | def update(self, model): 140 | needs_module = hasattr(model, 'module') and not self.ema_has_module 141 | with torch.no_grad(): 142 | msd = model.state_dict() 143 | esd = self.ema.state_dict() 144 | for k in self.param_keys: 145 | if needs_module: 146 | j = 'module.' + k 147 | else: 148 | j = k 149 | model_v = msd[j].detach() 150 | ema_v = esd[k] 151 | esd[k].copy_(ema_v * self.decay + (1. - self.decay) * model_v) 152 | 153 | for k in self.buffer_keys: 154 | if needs_module: 155 | j = 'module.' + k 156 | else: 157 | j = k 158 | esd[k].copy_(msd[j]) 159 | 160 | 161 | def interleave(x, size): 162 | s = list(x.shape) 163 | return x.reshape([-1, size] + s[1:]).transpose(0, 1).reshape([-1] + s[1:]) 164 | 165 | 166 | def de_interleave(x, size): 167 | s = list(x.shape) 168 | return x.reshape([size, -1] + s[1:]).transpose(0, 1).reshape([-1] + s[1:]) 169 | 170 | 171 | def accuracy(output, target, topk=(1,)): 172 | """Computes the precision@k for the specified values of k""" 173 | maxk = max(topk) 174 | batch_size = target.size(0) 175 | 176 | _, pred = output.topk(maxk, 1, True, True) 177 | pred = pred.t() 178 | correct = pred.eq(target.reshape(1, -1).expand_as(pred)) 179 | 180 | res = [] 181 | for k in topk: 182 | correct_k = correct[:k].reshape(-1).float().sum(0) 183 | res.append(correct_k.mul_(1.0 / batch_size)) 184 | return res 185 | 186 | 187 | class AverageMeter(object): 188 | """Computes and stores the average and current value 189 | Imported from https://github.com/pytorch/examples/blob/master/imagenet/main.py#L247-L262 190 | """ 191 | 192 | def __init__(self): 193 | self.reset() 194 | 195 | def reset(self): 196 | self.val = 0 197 | self.avg = 0 198 | self.sum = 0 199 | self.count = 0 200 | 201 | def update(self, val, n=1): 202 | self.val = val 203 | self.sum += val * n 204 | self.count += n 205 | self.avg = self.sum / self.count 206 | -------------------------------------------------------------------------------- /ssltsc/postprocessing.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import pdb 4 | from ssltsc import visualization 5 | 6 | def get_mlflow_results(mlflow_id, path='mlruns/'): 7 | """collect and summarize the mlflow results for one experiment 8 | 9 | Args: 10 | mlflow_id (int): mlflow id 11 | path (str, optional): path to the mlruns folder. Defaults to 'mlruns/'. 12 | """ 13 | path = f'{path}{mlflow_id}/' 14 | 15 | # filter for folders that start with those 32length mlflow hashes 16 | runs = [run for run in os.listdir(path) if len(run) == 32 and not run.startswith('performance')] 17 | 18 | dict_list = [] 19 | for run in runs: 20 | # read params 21 | param_dict = {param: open(f'{path}{run}/params/{param}').read() for param in os.listdir(f'{path}{run}/params/')} 22 | # read (only test) metrics 23 | metric_dict = {metric: float(open(f'{path}{run}/metrics/{metric}').read().split(" ")[1]) for metric in os.listdir(f'{path}{run}/metrics/') if metric.startswith('test')} 24 | # read tags 25 | tag_dict = {tag: open(f'{path}{run}/tags/{tag}').read() for tag in os.listdir(f'{path}{run}/tags/') if tag not in os.listdir(f'{path}{run}/params/')} 26 | # combine all dicts in one large dict 27 | final_dict = {**param_dict, **metric_dict, **tag_dict} 28 | dict_list.append(final_dict) 29 | final_frame = pd.DataFrame(dict_list) 30 | final_frame.to_csv(f'{path}results.csv', index=False) 31 | print(f'Concatenated and stored results from {len(runs)} runs') 32 | 33 | def visualize_experiment(mlflow_id, path='mlruns/'): 34 | """create boxplots for all runs in one experiment. 35 | Collects and stores results on the fly if not already done. 36 | 37 | Args: 38 | mlflow_id (int): mlflow id 39 | path (str, optional): path to the mlruns folder. Defaults to 'mlruns/'. 40 | """ 41 | #if not os.path.exists(f'{path}{mlflow_id}/results.csv'): 42 | get_mlflow_results(mlflow_id=mlflow_id, path=path) 43 | # visualization.visualize_results_boxplot(mlflow_id=mlflow_id, storage_path=path) 44 | visualization.visualize_results_lineplot(mlflow_id=mlflow_id, storage_path=path) 45 | -------------------------------------------------------------------------------- /tests/cifar10/X_test.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Goschjann/ssltsc/08d6b1bf711bb1c8f19f9bfb66a98d4e423e932e/tests/cifar10/X_test.npy -------------------------------------------------------------------------------- /tests/cifar10/X_train.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Goschjann/ssltsc/08d6b1bf711bb1c8f19f9bfb66a98d4e423e932e/tests/cifar10/X_train.npy -------------------------------------------------------------------------------- /tests/cifar10/Y_test.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Goschjann/ssltsc/08d6b1bf711bb1c8f19f9bfb66a98d4e423e932e/tests/cifar10/Y_test.npy -------------------------------------------------------------------------------- /tests/cifar10/Y_train.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Goschjann/ssltsc/08d6b1bf711bb1c8f19f9bfb66a98d4e423e932e/tests/cifar10/Y_train.npy -------------------------------------------------------------------------------- /tests/pamap2/X_train.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Goschjann/ssltsc/08d6b1bf711bb1c8f19f9bfb66a98d4e423e932e/tests/pamap2/X_train.npy -------------------------------------------------------------------------------- /tests/pamap2/Y_train.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Goschjann/ssltsc/08d6b1bf711bb1c8f19f9bfb66a98d4e423e932e/tests/pamap2/Y_train.npy -------------------------------------------------------------------------------- /tests/test_architectures.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from torch.utils.data import DataLoader 4 | 5 | from dl4d.datasets.pamap import PAMAP2 6 | from dl4d.datasets.cifar10 import Cifar10 7 | from ssltsc.architectures.convlarge import ConvLarge, ConvLargeDecoder 8 | from ssltsc.architectures.ladder import Ladder 9 | from ssltsc.architectures.fcn import LadderFCN, LadderFCNDecoder 10 | 11 | import os 12 | import torch 13 | 14 | PROJECT_ROOT_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 15 | DATA_ROOT = os.path.join(PROJECT_ROOT_PATH, 'tests') 16 | 17 | 18 | class TestImageArchitectures(TestCase): 19 | batch_size = 12 20 | 21 | cifar10_path = os.path.join(PROJECT_ROOT_PATH, "tests", "cifar10/") 22 | pamap2_path = os.path.join(PROJECT_ROOT_PATH, 'tests', 'pamap2/') 23 | 24 | cifar10_classes = 10 25 | cifar_channels = 3 26 | height = width = 32 27 | 28 | pamap2_classes = 19 29 | pamap2_channels = 6 30 | pamap2_length = 100 31 | 32 | pamap_dataloader = DataLoader(dataset=PAMAP2(root=DATA_ROOT, part='train'), batch_size=batch_size) 33 | one_pamap2_batch, _ = next(iter(pamap_dataloader)) 34 | 35 | cifar_dataloader = DataLoader(dataset=Cifar10(root=DATA_ROOT, part='train'), 36 | batch_size=batch_size) 37 | one_cifar10_batch, _ = next(iter(cifar_dataloader)) 38 | 39 | 40 | def setUp(self): 41 | if torch.cuda.is_available(): 42 | self.one_cifar10_batch = self.one_cifar10_batch.to(torch.device('cuda')) 43 | self.one_pamap2_batch = self.one_pamap2_batch.to(torch.device('cuda')) 44 | 45 | def test_conv_large_forward_pass(self): 46 | self.assertEqual(self.one_cifar10_batch.shape, (self.batch_size, self.cifar_channels, self.height, self.width)) 47 | 48 | # Make one forward pass and check that the outcome has the right shapes 49 | laddernet_architecture = ConvLarge(n_classes=self.cifar10_classes, channels=self.cifar_channels) 50 | 51 | out = laddernet_architecture(self.one_cifar10_batch) 52 | 53 | self.assertEqual((self.batch_size, 10), out.shape) 54 | 55 | def test_conv_large_ladder_forward_pass(self): 56 | self.assertEqual(self.one_cifar10_batch.shape, (self.batch_size, self.cifar_channels, self.height, self.width)) 57 | 58 | # Make one forward pass and check that the outcome has the right shapes 59 | laddernet_architecture = Ladder(encoder_architecture=ConvLarge, 60 | decoder_architecture=ConvLargeDecoder, 61 | n_classes=self.cifar10_classes, 62 | channels=self.cifar_channels, 63 | ) 64 | laddernet_architecture.train() 65 | out, hidden_reps = laddernet_architecture(self.one_cifar10_batch) 66 | 67 | self.assertEqual((self.batch_size, 10), out.shape) 68 | self.assertListEqual([13]*4, [len(h_reps) for h_reps in hidden_reps.values()]) 69 | 70 | # Test the batch statistics shapes of the clean encoder hidden representations 71 | for m, std in zip(hidden_reps['batch_means'], hidden_reps['batch_std']): 72 | self.assertEqual((), tuple(m.shape)) 73 | self.assertEqual((), tuple(std.shape)) 74 | 75 | # Test the shapes of the hidden representations 76 | for z, z_hat in zip(hidden_reps['zs'], reversed(hidden_reps['hat_zs'])): 77 | self.assertEqual(z.shape, z_hat.shape) 78 | 79 | def test_conv_large_ladder_forward_pass_with_subset_of_ladders(self): 80 | 81 | # Counting from the top to bottom. This list say we need the top 82 | # ladder only ie. we only need one layer from the decoder 83 | ladders = [False] * 12 + [True] 84 | laddernet_architecture = Ladder(encoder_architecture=ConvLarge, 85 | decoder_architecture=ConvLargeDecoder, 86 | n_classes=self.cifar10_classes, 87 | channels=self.cifar_channels, 88 | ladders=ladders 89 | ) 90 | laddernet_architecture.train() 91 | out, hidden_reps = laddernet_architecture(self.one_cifar10_batch) 92 | 93 | # Assert that we get correct length list back 94 | self.assertListEqual([13]*4, [len(h_reps) for h_reps in hidden_reps.values()]) 95 | 96 | # Assert that everything other than the weighted loss is None. Here 97 | # we count from the top to button. 98 | self.assertListEqual([True] + [False] * 12, [hat_z is not None for hat_z in hidden_reps['hat_zs']]) 99 | 100 | def test_fcn_forward_pass(self): 101 | 102 | self.assertEqual(self.one_pamap2_batch.shape, (self.batch_size, self.pamap2_channels, self.pamap2_length)) 103 | 104 | # Make one forward pass and check that the outcome has the right shapes 105 | architecture = LadderFCN(channels=self.pamap2_channels, n_classes=self.pamap2_classes) 106 | 107 | out = architecture(self.one_pamap2_batch) 108 | 109 | self.assertEqual((self.batch_size, self.pamap2_classes), out.shape) 110 | 111 | def test_fcn_ladder_forward_pass(self): 112 | self.assertEqual(self.one_pamap2_batch.shape, (self.batch_size, self.pamap2_channels, self.pamap2_length)) 113 | 114 | # Make one forward pass and check that the outcome has the right shapes 115 | laddernet_architecture = Ladder(encoder_architecture=LadderFCN, 116 | decoder_architecture=LadderFCNDecoder, 117 | n_classes=self.pamap2_classes, 118 | channels=self.pamap2_channels, 119 | length=self.pamap2_length) 120 | laddernet_architecture.train() 121 | out, hidden_reps = laddernet_architecture(self.one_pamap2_batch) 122 | 123 | self.assertEqual((self.batch_size, 19), out.shape) 124 | self.assertListEqual([5]*4, [len(h_reps) for h_reps in hidden_reps.values()]) 125 | 126 | # Test the batch statistics shapes of the clean encoder hidden representations 127 | for m, std in zip(hidden_reps['batch_means'], hidden_reps['batch_std']): 128 | self.assertEqual((), tuple(m.shape)) 129 | self.assertEqual((), tuple(std.shape)) 130 | 131 | # Test the shapes of the hidden representations 132 | for z, z_hat in zip(hidden_reps['zs'], reversed(hidden_reps['hat_zs'])): 133 | self.assertEqual(z.shape, z_hat.shape) 134 | 135 | def test_fcn_ladder_forward_pass_with_subset_of_ladders(self): 136 | 137 | # Counting from the top to bottom. This list say we need the top 138 | # ladder only ie. we only need one layer from the decoder 139 | ladders = [False] * 4 + [True] 140 | laddernet_architecture = Ladder(encoder_architecture=LadderFCN, 141 | decoder_architecture=LadderFCNDecoder, 142 | n_classes=self.pamap2_classes, 143 | channels=self.pamap2_channels, 144 | length=self.pamap2_length, 145 | ladders=ladders 146 | ) 147 | laddernet_architecture.train() 148 | out, hidden_reps = laddernet_architecture(self.one_pamap2_batch) 149 | 150 | # Assert that we get correct length list back 151 | self.assertListEqual([5]*4, [len(h_reps) for h_reps in hidden_reps.values()]) 152 | 153 | # Assert that everything other than the weighted loss is None. Here 154 | # we count from the top to button. 155 | self.assertListEqual([True] + [False] * 4, [hat_z is not None for hat_z in hidden_reps['hat_zs']]) -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | PROJECT_ROOT_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) --------------------------------------------------------------------------------