├── models ├── __init__.py ├── NTXent.py ├── backbones.py ├── builder.py └── builder_utils.py ├── .gitignore ├── figures └── FreRA.png ├── requirements.txt ├── data_preprocess ├── base_loader.py ├── data_preprocess_uea.py ├── motionsense_raw_preprocess.py ├── preprocess_uea.py ├── data_preprocess_ms.py ├── data_preprocess_utils.py ├── data_preprocess_ucr.py ├── data_preprocess_fd.py ├── data_preprocess_shar.py ├── data_preprocess_ucihar.py └── data_preprocess_wisdm.py ├── autoaug └── fourier.py ├── README.md ├── data_loaders.py ├── augmentations.py └── main_FreRA.py /models/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .idea/* -------------------------------------------------------------------------------- /figures/FreRA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tian0426/FreRA/HEAD/figures/FreRA.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scikit-learn==0.22.1 2 | torch==1.11.0 3 | torchvision==0.12.0 4 | einops==0.3.2 5 | pickle5==0.0.11 6 | numpy==1.21.2 7 | fitlog 8 | requests 9 | matplotlib 10 | seaborn -------------------------------------------------------------------------------- /data_preprocess/base_loader.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch.utils.data import Dataset 3 | 4 | class base_loader(Dataset): 5 | def __init__(self, samples, labels, domains): 6 | self.samples = samples 7 | self.labels = labels 8 | self.domains = domains 9 | 10 | def __getitem__(self, index): 11 | sample, target, domain = self.samples[index], self.labels[index], self.domains[index] 12 | return sample, target, domain 13 | 14 | def __len__(self): 15 | return len(self.samples) 16 | 17 | -------------------------------------------------------------------------------- /autoaug/fourier.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn.parameter import Parameter 3 | from torch.nn.modules.module import Module 4 | 5 | class FreRA(Module): 6 | def __init__(self, len_sw, device=None, dtype=None) -> None: 7 | super(FreRA,self).__init__() 8 | print('Initializing FreRA') 9 | factory_kwargs = {'device': device, 'dtype': None} 10 | 11 | n_fourier_comp = len_sw //2 + 1 12 | self.weight = Parameter(torch.empty((n_fourier_comp, 2), **factory_kwargs)) 13 | self.reset_parameters() 14 | 15 | def get_sampling(self, weight, temperature=0.1, bias=0.0): 16 | 17 | if self.training: 18 | bias = bias + 0.0001 # If bias is 0, we run into problems 19 | eps = (bias - (1 - bias)) * torch.rand(weight.size()) + (1 - bias) 20 | gate_inputs = torch.log(eps) - torch.log(1 - eps) 21 | gate_inputs = gate_inputs.cuda() 22 | gate_inputs = (gate_inputs + weight) / temperature # todo adaptive temperature 23 | para = torch.sigmoid(gate_inputs) 24 | return para 25 | else: 26 | return torch.sigmoid(weight) 27 | 28 | 29 | def reset_parameters(self) -> None: 30 | # init.kaiming_uniform_(self.weight, a=math.sqrt(5)) 31 | torch.nn.init.normal_(self.weight, mean=0.0, std=0.10) 32 | def forward(self,x, temperature): 33 | para = self.get_sampling(self.weight, temperature=temperature) 34 | self.para = para 35 | 36 | noise_para = self.weight.detach().clone() * (-1) 37 | noise_para[noise_para < max(0, noise_para[:, 0].mean())] = 0.0 38 | scaling_factor = 1.0 / noise_para[:, 0][noise_para[:, 0] != 0].mean() 39 | 40 | x_ft = torch.fft.rfft(x, dim=-2) 41 | x_ft = x_ft * torch.unsqueeze(para[:, 0] + noise_para[:, 0]*scaling_factor, -1) 42 | aug = torch.fft.irfft(x_ft, n=x.shape[-2], dim=-2) 43 | 44 | return aug -------------------------------------------------------------------------------- /models/NTXent.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | class NTXentLoss(torch.nn.Module): 5 | 6 | def __init__(self, device, batch_size, temperature=0.1, use_cosine_similarity=True): 7 | super(NTXentLoss, self).__init__() 8 | self.batch_size = batch_size 9 | self.temperature = temperature 10 | print(self.temperature) 11 | self.device = device 12 | self.softmax = torch.nn.Softmax(dim=-1) 13 | self.mask_samples_from_same_repr = self._get_correlated_mask().type(torch.bool) 14 | self.similarity_function = self._get_similarity_function(use_cosine_similarity) 15 | 16 | def _get_similarity_function(self, use_cosine_similarity): 17 | if use_cosine_similarity: 18 | self._cosine_similarity = torch.nn.CosineSimilarity(dim=-1) 19 | return self._cosine_simililarity 20 | else: 21 | return self._dot_simililarity 22 | 23 | def _get_correlated_mask(self): 24 | diag = np.eye(2 * self.batch_size) 25 | l1 = np.eye((2 * self.batch_size), 2 * self.batch_size, k=-self.batch_size) 26 | l2 = np.eye((2 * self.batch_size), 2 * self.batch_size, k=self.batch_size) 27 | mask = torch.from_numpy((diag + l1 + l2)) 28 | mask = (1 - mask).type(torch.bool) 29 | return mask.to(self.device) 30 | 31 | @staticmethod 32 | def _dot_simililarity(x, y): 33 | v = torch.tensordot(x.unsqueeze(1), y.T.unsqueeze(0), dims=2) 34 | # x shape: (N, 1, C) 35 | # y shape: (1, C, 2N) 36 | # v shape: (N, 2N) 37 | return v 38 | 39 | def _cosine_simililarity(self, x, y): 40 | # x shape: (N, 1, C) 41 | # y shape: (1, 2N, C) 42 | # v shape: (N, 2N) 43 | v = self._cosine_similarity(x.unsqueeze(1), y.unsqueeze(0)) 44 | return v 45 | 46 | def forward(self, zis, zjs): 47 | representations = torch.cat([zjs, zis], dim=0) 48 | 49 | similarity_matrix = self.similarity_function(representations, representations) 50 | 51 | # filter out the scores from the positive samples 52 | l_pos = torch.diag(similarity_matrix, self.batch_size) 53 | r_pos = torch.diag(similarity_matrix, -self.batch_size) 54 | self.positives = torch.cat([l_pos, r_pos]).view(2 * self.batch_size, 1) 55 | 56 | self.negatives = similarity_matrix[self.mask_samples_from_same_repr].view(2 * self.batch_size, -1) 57 | 58 | logits = torch.cat((self.positives, self.negatives), dim=1) 59 | logits /= self.temperature 60 | 61 | labels = torch.zeros(2 * self.batch_size).to(self.device).long() 62 | 63 | return logits, labels 64 | -------------------------------------------------------------------------------- /models/backbones.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | class FCN(nn.Module): 5 | def __init__(self, dataset, n_channels, n_classes, out_channels=128, backbone=True): 6 | super(FCN, self).__init__() 7 | 8 | self.backbone = backbone 9 | 10 | kernel_size, stride = 8, 1 11 | 12 | self.conv_block1 = nn.Sequential( 13 | nn.Conv1d(n_channels, 32, kernel_size=kernel_size, stride=stride, bias=False, padding=int(kernel_size / 2)), 14 | nn.BatchNorm1d(32), 15 | nn.ReLU(), 16 | nn.MaxPool1d(kernel_size=2, stride=2, padding=1), 17 | nn.Dropout(0.35)) 18 | self.conv_block2 = nn.Sequential( 19 | nn.Conv1d(32, 64, kernel_size=kernel_size, stride=stride, bias=False, padding=int(kernel_size / 2)), 20 | nn.BatchNorm1d(64), 21 | nn.ReLU(), 22 | nn.MaxPool1d(kernel_size=2, stride=2, padding=1)) 23 | self.conv_block3 = nn.Sequential(nn.Conv1d(64, out_channels, kernel_size=kernel_size, stride=stride, bias=False, 24 | padding=int(kernel_size / 2)), 25 | nn.BatchNorm1d(out_channels), 26 | nn.ReLU(), 27 | nn.MaxPool1d(kernel_size=2, stride=2, padding=1)) 28 | 29 | if dataset == 'ucihar': # ucihar 30 | self.out_len = 18 31 | elif dataset == 'wisdm': # wisdm 32 | self.out_len = 27 33 | elif dataset == 'ms': # and n_classes == 6: # ms 34 | self.out_len = 27 35 | elif dataset == 'fm': # fm 36 | self.out_len = 8 37 | elif dataset == 'FaceDetection': 38 | self.out_len = 10 39 | elif dataset == 'HandMovementDirection': 40 | self.out_len = 52 41 | elif dataset == 'Heartbeat': 42 | self.out_len = 53 43 | elif dataset == 'Libras': 44 | self.out_len = 8 45 | 46 | 47 | self.out_channels = out_channels 48 | self.out_dim = self.out_len * self.out_channels 49 | 50 | if backbone == False: 51 | self.logits = nn.Linear(self.out_dim, n_classes) 52 | 53 | def forward(self, x_in, return_feature=False): 54 | if len(x_in.shape) == 2: 55 | x_in = x_in.unsqueeze(-1) 56 | x_in = x_in.permute(0, 2, 1) 57 | x = self.conv_block1(x_in) 58 | x = self.conv_block2(x) 59 | x = self.conv_block3(x) 60 | 61 | if self.backbone: 62 | return x 63 | else: 64 | x_flat = x.reshape(x.shape[0], -1) 65 | logits = self.logits(x_flat) 66 | if return_feature: 67 | return logits, x_flat 68 | else: 69 | return logits 70 | -------------------------------------------------------------------------------- /data_preprocess/data_preprocess_uea.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import DataLoader 3 | from torch.utils.data import Dataset 4 | import os 5 | import numpy as np 6 | from sklearn.model_selection import train_test_split 7 | from data_preprocess.data_preprocess_utils import get_sample_weights, train_test_val_split 8 | from data_preprocess.base_loader import base_loader 9 | 10 | class data_loader_uea(base_loader): 11 | def __init__(self, samples, labels, domains): 12 | super(data_loader_uea, self).__init__(samples, labels, domains) 13 | 14 | def data_generator(args): 15 | data_path = './data/'+args.dataset+'/' 16 | 17 | train_dataset = torch.load(os.path.join(data_path, args.dataset+"_train.pt")) 18 | x_train, y_train = train_dataset["samples"], train_dataset["labels"] 19 | test_dataset = torch.load(os.path.join(data_path, args.dataset+"_test.pt")) 20 | x_test, y_test = test_dataset["samples"], test_dataset["labels"] 21 | 22 | if isinstance(x_train, np.ndarray): 23 | x_train, x_test = torch.from_numpy(x_train), torch.from_numpy(x_test) 24 | y_train, y_test = torch.from_numpy(y_train).long(), torch.from_numpy(y_test).long() 25 | 26 | d_train = np.full(x_train.shape[0], 0) 27 | d_test = np.full(x_test.shape[0], 0) 28 | 29 | x_all = np.concatenate((x_train, x_test), axis=0) 30 | y_all = np.concatenate((y_train, y_test)) 31 | d_all = np.concatenate((d_train, d_test)) 32 | 33 | x_win_train, x_win_val, x_win_test, \ 34 | y_win_train, y_win_val, y_win_test, \ 35 | d_win_train, d_win_val, d_win_test = train_test_val_split(x_all, y_all, d_all, split_ratio=args.split_ratio) 36 | 37 | print(x_win_train.shape, x_win_val.shape, x_win_test.shape) 38 | 39 | unique_y, counts_y = np.unique(y_win_train, return_counts=True) 40 | print('y_train label distribution: ', dict(zip(unique_y, counts_y))) 41 | weights = 100.0 / torch.Tensor(counts_y) 42 | print('weights of sampler: ', weights) 43 | weights = weights.double() 44 | sample_weights = get_sample_weights(y_win_train, weights) 45 | sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), 46 | replacement=True) 47 | 48 | train_set_r = data_loader_uea(x_win_train, y_win_train, d_win_train) 49 | train_loader_r = DataLoader(train_set_r, batch_size=args.batch_size, shuffle=False, drop_last=True, sampler=sampler) 50 | val_set_r = data_loader_uea(x_win_val, y_win_val, d_win_val) 51 | val_loader_r = DataLoader(val_set_r, batch_size=args.batch_size, shuffle=False) 52 | test_set_r = data_loader_uea(x_test, y_test, d_test) 53 | test_loader_r = DataLoader(test_set_r, batch_size=args.batch_size, shuffle=False) 54 | 55 | return [train_loader_r], val_loader_r, test_loader_r 56 | 57 | def prep_uea(args): 58 | if args.cases == 'random': 59 | return data_generator(args) -------------------------------------------------------------------------------- /models/builder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from random import sample 4 | from models.backbones import FCN 5 | from models.NTXent import * 6 | from models.builder_utils import * 7 | import copy 8 | 9 | class SimCLR(nn.Module): 10 | def __init__(self, device, dataset, n_feature, batch_size, base_encoder, dim=128, T=0.1): 11 | super(SimCLR, self).__init__() 12 | 13 | if base_encoder == 'FCN': 14 | self.encoder_q = FCN(dataset, n_channels=n_feature, n_classes=dim, backbone=False) 15 | dim_mlp = self.encoder_q.logits.weight.shape[1] 16 | self.encoder_q.logits = nn.Sequential(nn.Linear(dim_mlp, dim_mlp), nn.ReLU(), self.encoder_q.logits) 17 | 18 | 19 | 20 | self.NTXentLoss = NTXentLoss(device=device, batch_size=batch_size, temperature=T) 21 | 22 | def forward(self, im_q, im_k): 23 | z1 = self.encoder_q(im_q) 24 | z2 = self.encoder_q(im_k) 25 | 26 | z1 = nn.functional.normalize(z1, dim=1) 27 | z2 = nn.functional.normalize(z2, dim=1) 28 | 29 | logits, labels = self.NTXentLoss(z1, z2) 30 | 31 | return logits, labels, z1, z2 32 | 33 | class BYOL(nn.Module): 34 | def __init__( 35 | self, 36 | DEVICE, 37 | base_encoder, 38 | dataset, 39 | n_feature, 40 | window_size, 41 | hidden_layer = -1, 42 | projection_size = 128, 43 | moving_average = 0.99, 44 | use_momentum = True, 45 | ): 46 | super().__init__() 47 | 48 | if base_encoder == 'FCN': 49 | self.encoder_q = FCN(dataset, n_channels=n_feature, n_classes=projection_size, backbone=False) 50 | 51 | dim_mlp = self.encoder_q.logits.weight.shape[1] 52 | self.encoder_q = NetWrapper(self.encoder_q, projection_size, dim_mlp, DEVICE=DEVICE, layer=hidden_layer) 53 | 54 | self.use_momentum = use_momentum 55 | self.target_encoder = None 56 | self.target_ema_updater = EMA(moving_average) 57 | 58 | self.online_predictor = Predictor(model='byol', dim=projection_size, pred_dim=projection_size) 59 | 60 | self.to(DEVICE) 61 | 62 | # send a mock image tensor to instantiate singleton parameters 63 | self.forward(torch.randn(2, window_size, n_feature, device=DEVICE), 64 | torch.randn(2, window_size, n_feature, device=DEVICE)) 65 | 66 | @singleton('target_encoder') 67 | def _get_target_encoder(self): 68 | target_encoder = copy.deepcopy(self.encoder_q) 69 | for p in target_encoder.parameters(): 70 | p.requires_grad = False 71 | return target_encoder 72 | 73 | def reset_moving_average(self): 74 | del self.target_encoder 75 | self.target_encoder = None 76 | 77 | def update_moving_average(self): 78 | assert self.target_encoder is not None, 'target encoder has not been created yet' 79 | update_moving_average(self.target_ema_updater, self.target_encoder, self.encoder_q) 80 | 81 | def forward( 82 | self, 83 | im_q, 84 | im_k, 85 | ): 86 | assert not (self.training and im_q.shape[0] == 1), 'you must have greater than 1 sample when training, due to the batchnorm in the projection layer' 87 | 88 | online_proj_one, lat1 = self.encoder_q(im_q) 89 | online_proj_two, lat2 = self.encoder_q(im_k) 90 | 91 | online_pred_one = self.online_predictor(online_proj_one) 92 | online_pred_two = self.online_predictor(online_proj_two) 93 | 94 | with torch.no_grad(): 95 | target_encoder = self._get_target_encoder() if self.use_momentum else self.encoder_q 96 | target_proj_one, _ = target_encoder(im_q) 97 | target_proj_two, _ = target_encoder(im_k) 98 | target_proj_one.detach_() 99 | target_proj_two.detach_() 100 | 101 | return online_pred_one, online_pred_two, target_proj_one.detach(), target_proj_two.detach() 102 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## FreRA: A Frequency-Refined Augmentation for Contrastive Learning on Time Series Classification 2 | 3 | Code for KDD 2025 paper "FreRA: A Frequency-Refined Augmentation for Contrastive Learning on Time Series Classification". 4 | 5 | ## Abstract 6 | 7 | >Contrastive learning has emerged as a competent approach for unsupervised representation learning. However, the design of an optimal augmentation strategy, although crucial for contrastive learning, is less explored for time series classification tasks. Existing predefined time-domain augmentation methods are primarily adopted from vision and are not specific to time series data. Consequently, this cross-modality incompatibility may distort the semantically relevant information of time series by introducing mismatched patterns into the data. To address this limitation, we present a novel perspective from the frequency domain and identify three advantages for downstream classification: 1) the frequency component naturally encodes global features, 2) the orthogonal nature of the Fourier basis allows easier isolation and independent modifications of critical and unimportant information, and 3) a compact set of frequency components can preserve semantic integrity. To fully utilize the three properties, we propose the lightweight yet effective Frequency Refined Augmentation (FreRA) tailored for time series contrastive learning on classification tasks, which can be seamlessly integrated with contrastive learning frameworks in a plug-and-play manner. Specifically, FreRA automatically separates critical and unimportant frequency components. Accordingly, we propose semantic-aware Identity Modification and semantic-agnostic Self-adaptive Modification to protect semantically relevant information in the critical frequency components and infuse variance into the unimportant ones respectively. Theoretically, we prove that FreRA generates semantic-preserving views. Empirically, we conduct extensive experiments on two benchmark datasets including UCR and UEA archives, as well as five large-scale datasets on diverse applications. FreRA consistently outperforms ten leading baselines on time series classification, anomaly detection, and transfer learning tasks, demonstrating superior capabilities in contrastive representation learning and generalization in transfer learning scenarios across diverse datasets. 8 | 9 | ![overview](figures/FreRA.png) 10 | 11 | ## Environment Setup 12 | Build an environment with Anaconda to install required packages. 13 | ``` 14 | conda create -n FreRA python=3.8.3 15 | conda activate FreRA 16 | pip install -r requirements.txt 17 | ``` 18 | 19 | ## Models 20 | The following models are provided under `./models/`. 21 | - contrastive models: SimCLR, BYOL ```./models/builder.py``` 22 | - backbone encoder: FCN ```./models/backbones.py``` 23 | 24 | ## Main Functions 25 | - ```main_FreRA.py``` 26 | 27 | ## Datasets 28 | Datasets can be downloaded from the following websites to folder `./data/` and the datasets will be pre-processed automatically by our codes under `./data_preprocess/`. 29 | - [UCIHAR](https://archive.ics.uci.edu/ml/datasets/human+activity+recognition+using+smartphones) 30 | - [WISDM](https://www.cis.fordham.edu/wisdm/dataset.php) 31 | - [MotionSense](https://github.com/mmalekzadeh/motion-sense) 32 | - [SHAR](http://www.sal.disco.unimib.it/technologies/unimib-shar/) 33 | - [Fault Diagnosis](https://mb.uni-paderborn.de/kat/datacenter) 34 | - [UEA Archive](https://timeseriesclassification.com/dataset.php) 35 | - [UCR Archive](https://www.cs.ucr.edu/~eamonn/time_series_data_2018/) 36 | 37 | ## Usage 38 | Scripts for using the proposed FreRA model. 39 | ```angular2html 40 | python main_FreRA.py --f_aug_mode 'FreRA' --l1_weight 0.003 --framework 'simclr' --dataset 'ucihar' --lr 0.01 --f_lr 0.001 --batch_size 128 --epochs 200 --temperature 0.2 --f_temperature 0.1 --gpu 0 41 | python main_FreRA.py --f_aug_mode 'FreRA' --l1_weight 0.003 --framework 'simclr' --dataset 'wisdm' --lr 0.01 --f_lr 0.001 --batch_size 128 --epochs 200 --temperature 0.2 --f_temperature 0.1 --gpu 0 42 | python main_FreRA.py --f_aug_mode 'FreRA' --l1_weight 0.003 --framework 'simclr' --dataset 'ms' --lr 0.01 --f_lr 0.001 --batch_size 128 --epochs 200 --temperature 0.2 --f_temperature 0.1 --gpu 0 43 | ``` 44 | -------------------------------------------------------------------------------- /data_preprocess/motionsense_raw_preprocess.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import re 3 | import os 4 | import pandas as pd 5 | import numpy as np 6 | import pickle as cp 7 | 8 | __author__ = "C. I. Tang" 9 | __copyright__ = "Copyright (C) 2020 C. I. Tang" 10 | 11 | """ 12 | Based on work of Tang et al.: https://arxiv.org/abs/2011.11542 13 | Contact: cit27@cl.cam.ac.uk 14 | License: GNU General Public License v3.0 15 | This program is free software: you can redistribute it and/or modify 16 | it under the terms of the GNU General Public License as published by 17 | the Free Software Foundation, either version 3 of the License, or 18 | (at your option) any later version. 19 | This program is distributed in the hope that it will be useful, 20 | but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 22 | GNU General Public License for more details. 23 | You should have received a copy of the GNU General Public License 24 | along with this program. If not, see . 25 | """ 26 | 27 | 28 | def process_motion_sense_accelerometer_files(accelerometer_data_folder_path): 29 | """ 30 | Preprocess the accelerometer files of the MotionSense dataset into the 'user-list' format 31 | Data files can be found at https://github.com/mmalekzadeh/motion-sense/tree/master/data 32 | Parameters: 33 | accelerometer_data_folder_path (str): 34 | the path to the folder containing the data files (unzipped) 35 | e.g. motionSense/B_Accelerometer_data/ 36 | the trial folders should be directly inside it (e.g. motionSense/B_Accelerometer_data/dws_1/) 37 | Return: 38 | 39 | user_datsets (dict of {user_id: [(sensor_values, activity_labels)]}) 40 | the processed dataset in a dictionary, of type {user_id: [(sensor_values, activity_labels)]} 41 | the keys of the dictionary is the user_id (participant id) 42 | the values of the dictionary are lists of (sensor_values, activity_labels) pairs 43 | sensor_values are 2D numpy array of shape (length, channels=3) 44 | activity_labels are 1D numpy array of shape (length) 45 | each pair corresponds to a separate trial 46 | (i.e. time is not contiguous between pairs, which is useful for making sliding windows, where it is easy to separate trials) 47 | """ 48 | 49 | # label_set = {} 50 | user_datasets = {} 51 | all_trials_folders = sorted(glob.glob(accelerometer_data_folder_path + "/*")) 52 | 53 | # Loop through every trial folder 54 | for trial_folder in all_trials_folders: 55 | trial_name = os.path.split(trial_folder)[-1] 56 | 57 | # label of the trial is given in the folder name, separated by underscore 58 | label = trial_name.split("_")[0] 59 | # label_set[label] = True 60 | print(trial_folder) 61 | 62 | # Loop through files for every user of the trail 63 | for trial_user_file in sorted(glob.glob(trial_folder + "/*.csv")): 64 | 65 | # use regex to match the user id 66 | user_id_match = re.search(r'(?P[0-9]+)\.csv', os.path.split(trial_user_file)[-1]) 67 | if user_id_match is not None: 68 | user_id = int(user_id_match.group('user_id')) 69 | 70 | # Read file 71 | user_trial_dataset = pd.read_csv(trial_user_file) 72 | user_trial_dataset.dropna(how="any", inplace=True) 73 | 74 | # Extract the x, y, z channels 75 | values = user_trial_dataset[['attitude.roll', 'attitude.pitch', 'attitude.yaw', 'gravity.x', 'gravity.y', 'gravity.z', 'rotationRate.x', 'rotationRate.y', 'rotationRate.z', 'userAcceleration.x', 'userAcceleration.y', 'userAcceleration.z']].values 76 | 77 | # the label is the same during the entire trial, so it is repeated here to pad to the same length as the values 78 | labels = np.repeat(label, values.shape[0]) 79 | 80 | if user_id not in user_datasets: 81 | user_datasets[user_id] = [] 82 | user_datasets[user_id].append((values, labels)) 83 | else: 84 | print("[ERR] User id not found", trial_user_file) 85 | 86 | return user_datasets 87 | 88 | # accelerometer_data_folder_path = 'data/MotionSense/' 89 | accelerometer_data_folder_path = 'data/MotionSense/A_DeviceMotion_data/' 90 | user_datasets = process_motion_sense_accelerometer_files(accelerometer_data_folder_path) 91 | with open(accelerometer_data_folder_path + 'motion_sense_user_split.pkl', 'wb') as f: 92 | cp.dump({ 93 | 'user_split': user_datasets, 94 | }, f) -------------------------------------------------------------------------------- /data_preprocess/preprocess_uea.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import numpy as np 4 | import torch 5 | from tqdm import tqdm 6 | # from sktime.utils.data_io import load_from_arff_to_dataframe 7 | from sktime.datasets import load_from_arff_to_dataframe 8 | from sklearn.preprocessing import LabelEncoder 9 | 10 | # original UEA(0,1,2) [instances, length, features/channels] 11 | # UEA(0,1,2) --> later will be permuted in dataloader-->get UEA(0,2,1) [instances, features/channels, length] 12 | 13 | DATA_DIR = '../data' 14 | def mkdir_if_not_exists(loc, file=False): 15 | loc_ = os.path.dirname(loc) if file else loc 16 | if not os.path.exists(loc): 17 | os.makedirs(loc_, exist_ok=True) 18 | 19 | def create_torch_data(train_file, test_file): 20 | # Get arff format 21 | train_data, train_labels = load_from_arff_to_dataframe(train_file) 22 | test_data, test_labels = load_from_arff_to_dataframe(test_file) 23 | 24 | def convert_data(data): 25 | # Expand the series to numpy 26 | data_expand = data.applymap(lambda x: x.values).values 27 | # Single array, then to tensor 28 | data_numpy = np.stack([np.vstack(x).T for x in data_expand]) 29 | tensor_data = torch.Tensor(data_numpy) 30 | return tensor_data 31 | 32 | train_data, test_data = convert_data(train_data), convert_data(test_data) 33 | 34 | # Encode labels as often given as strings 35 | encoder = LabelEncoder().fit(train_labels) 36 | train_labels, test_labels = encoder.transform(train_labels), encoder.transform(test_labels) 37 | train_labels, test_labels = torch.Tensor(train_labels), torch.Tensor(test_labels) 38 | 39 | return train_data, test_data, train_labels, test_labels 40 | def save_pickle(obj, filename, protocol=4, create_folder=True): 41 | if create_folder: 42 | mkdir_if_not_exists(filename, file=True) 43 | 44 | # Save 45 | with open(filename, 'wb') as file: 46 | pickle.dump(obj, file, protocol=protocol) 47 | 48 | def convert_all_files(dataset='uea'): 49 | """ Convert all files from a given /raw/{subfolder} into torch data to be stored in /interim. """ 50 | assert dataset in ['uea', 'ucr'] 51 | arff_folder = DATA_DIR + '/Multivariate_arff' 52 | 53 | # Time for a big for loop 54 | for ds_name in ['DuckDuckGeese', 'FaceDetection', 'HandMovementDirection', 'Handwriting', 'Heartbeat', 'Libras', 'LSST', 'RacketSports', 'SpokenArabicDigits']: # ['Epilepsy', 'FingerMovements', 'ECG5000', ]: 55 | # File locations 56 | print(f'ds_name:{ds_name}') 57 | train_file = arff_folder + '/{}/{}_TRAIN.arff'.format(ds_name, ds_name) 58 | test_file = arff_folder + '/{}/{}_TEST.arff'.format(ds_name, ds_name) 59 | 60 | # Ready save dir 61 | save_dir = DATA_DIR + '/{}/'.format(ds_name) 62 | print(f'save_dir:{save_dir}') 63 | # If files don't exist, skip. 64 | if os.path.isdir(save_dir) == False: 65 | os.makedirs(save_dir) 66 | if any([x.split('/')[-1] not in os.listdir(arff_folder + '/{}'.format(ds_name)) for x in (train_file, test_file)]): 67 | if ds_name not in ['Images', 'Descriptions']: 68 | print('No files found for folder: {}'.format(ds_name)) 69 | continue 70 | # elif os.path.isdir(save_dir): 71 | # print('Files already exist for: {}'.format(ds_name)) 72 | # continue 73 | else: 74 | train_data, test_data, train_labels, test_labels = create_torch_data(train_file, test_file) 75 | 76 | dat_dict = dict() 77 | dat_dict["samples"] = train_data 78 | dat_dict["labels"] = train_labels 79 | torch.save(dat_dict, save_dir+ds_name+"_train.pt") 80 | 81 | dat_dict = dict() 82 | dat_dict["samples"] = test_data 83 | dat_dict["labels"] = test_labels 84 | torch.save(dat_dict, save_dir+ds_name+"_test.pt") 85 | print(train_data.shape, test_data.shape) 86 | 87 | # # Compile train and test data together 88 | # data = torch.cat([train_data, test_data]) 89 | # labels = torch.cat([train_labels, test_labels]) 90 | # 91 | # # Save original train test indexes in case we wish to use original splits 92 | # original_idxs = (np.arange(0, train_data.size(0)), np.arange(train_data.size(0), data.size(0))) 93 | 94 | # # Save data 95 | # save_pickle(data, save_dir + '/data.pkl') 96 | # save_pickle(labels, save_dir + '/labels.pkl') 97 | # save_pickle(original_idxs, save_dir + '/original_idxs.pkl') 98 | 99 | 100 | if __name__ == '__main__': 101 | dataset = 'uea' 102 | convert_all_files(dataset) -------------------------------------------------------------------------------- /data_preprocess/data_preprocess_ms.py: -------------------------------------------------------------------------------- 1 | ''' 2 | # Data Pre-processing on UEA datasets. 3 | # 4 | # ''' 5 | 6 | import os 7 | import numpy as np 8 | from torch.utils.data import Dataset, DataLoader 9 | from torchvision import transforms 10 | import torch 11 | import pickle as cp 12 | from data_preprocess.data_preprocess_utils import get_sample_weights, train_test_val_split, opp_sliding_window, normalize 13 | from data_preprocess.base_loader import base_loader 14 | 15 | 16 | class data_loader_uea(base_loader): 17 | def __init__(self, samples, labels, domains): 18 | super(data_loader_uea, self).__init__(samples, labels, domains) 19 | 20 | def apply_label_map(y, label_map): 21 | y_mapped = [] 22 | for l in y: 23 | y_mapped.append(label_map.get(l)) 24 | return np.array(y_mapped) 25 | 26 | def get_windows_dataset_from_user_list_format(user_datasets, window_size=200, shift=100): 27 | user_dataset_windowed = {} 28 | label_list = ['sit', 'std', 'wlk', 'ups', 'dws', 'jog'] # no null class 29 | label_map = dict([(l, i) for i, l in enumerate(label_list)]) 30 | 31 | for user_id in user_datasets: 32 | x = [] 33 | y = [] 34 | 35 | # Loop through each trail of each user 36 | for v, l in user_datasets[user_id]: 37 | # print(l) 38 | l = apply_label_map(l, label_map) 39 | # print(l) 40 | v_sw, l_sw = opp_sliding_window(v, l, window_size, shift) 41 | 42 | if len(v_sw) > 0: 43 | x.append(v_sw) 44 | y.append(l_sw) 45 | # print(f"Data: {v_sw.shape}, Labels: {l_sw.shape}") 46 | 47 | # combine all trials 48 | user_dataset_windowed[user_id] = (np.concatenate(x), np.concatenate(y).squeeze()) 49 | 50 | x = [] 51 | y = [] 52 | d = [] 53 | for user_id in user_dataset_windowed: 54 | 55 | v, l = user_dataset_windowed[user_id] 56 | x.append(v) 57 | y.append(l) 58 | d.append(np.full(len(l), user_id)) 59 | 60 | x = np.concatenate(x) 61 | y = np.concatenate(y).squeeze() 62 | d = np.concatenate(d).squeeze() 63 | 64 | return x, y, d 65 | 66 | def prep_ms_random(args, sw, ss): 67 | # with open('data/MotionSense/motion_sense_user_split.pkl', 'rb') as f: 68 | with open('data/MotionSense/A_DeviceMotion_data/motion_sense_user_split.pkl', 'rb') as f: 69 | dataset_dict = cp.load(f) 70 | user_datasets = dataset_dict['user_split'] 71 | 72 | x, y, d = get_windows_dataset_from_user_list_format(user_datasets, window_size=sw, shift=ss) 73 | x = normalize(x) 74 | print(x.shape, y.shape, d.shape) 75 | 76 | x_train, x_val, x_test, \ 77 | y_train, y_val, y_test, \ 78 | d_train, d_val, d_test = train_test_val_split(x, y, d, split_ratio=args.split_ratio) 79 | 80 | unique_y, counts_y = np.unique(y_train, return_counts=True) 81 | _, dataset_len_sw, dataset_n_feature = x_train.shape 82 | print('y_train label distribution: ', dict(zip(unique_y, counts_y))) 83 | weights = 100.0 / torch.Tensor(counts_y) 84 | print('weights of sampler: ', weights) 85 | weights = weights.double() 86 | sample_weights = get_sample_weights(y_train, weights) 87 | sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), 88 | replacement=True) 89 | 90 | # transform = transforms.Compose([ 91 | # transforms.ToTensor(), 92 | # transforms.Normalize(mean=(0, 0, 0, 0, 0, 0, 0, 0, 0), std=(1, 1, 1, 1, 1, 1, 1, 1, 1)) 93 | # ]) 94 | 95 | print(y_train.shape, y_val.shape, y_test.shape) 96 | 97 | print(x_train.shape, x_val.shape, x_test.shape) 98 | train_set_r = data_loader_uea(x_train, y_train, d_train) 99 | train_loader_r = DataLoader(train_set_r, batch_size=args.batch_size, shuffle=False, drop_last=True, sampler=sampler) 100 | val_set_r = data_loader_uea(x_val, y_val, d_val) 101 | val_loader_r = DataLoader(val_set_r, batch_size=args.batch_size, shuffle=False) 102 | test_set_r = data_loader_uea(x_test, y_test, d_test) 103 | test_loader_r = DataLoader(test_set_r, batch_size=args.batch_size, shuffle=False) 104 | 105 | return [train_loader_r], val_loader_r, test_loader_r 106 | 107 | 108 | 109 | 110 | def prep_ms(args, SLIDING_WINDOW_LEN=200, SLIDING_WINDOW_STEP=100): 111 | # todo: to check whether uea dataset belongs to subject or random 112 | if args.cases == 'random': 113 | return prep_ms_random(args, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP) 114 | # elif args.cases == 'subject': 115 | # return prep_domains_ucihar_subject(args, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP) 116 | # elif args.cases == 'subject_large': 117 | # return prep_domains_ucihar_subject_large(args, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP) 118 | elif args.cases == '': 119 | pass 120 | else: 121 | return 'Error! Unknown args.cases!\n' -------------------------------------------------------------------------------- /data_loaders.py: -------------------------------------------------------------------------------- 1 | from data_preprocess import data_preprocess_ucihar 2 | from data_preprocess import data_preprocess_shar 3 | from data_preprocess import data_preprocess_wisdm 4 | from data_preprocess import data_preprocess_ms 5 | from data_preprocess import data_preprocess_uea 6 | from data_preprocess import data_preprocess_ucr 7 | 8 | uea_list = ['ArticularyWordRecognition','AtrialFibrillation','BasicMotions','CharacterTrajectories','Cricket','DuckDuckGeese','EigenWorms','Epilepsy','ERing','EthanolConcentration','FaceDetection','FingerMovements','HandMovementDirection','Handwriting','Heartbeat','JapaneseVowels','Libras','LSST','MotorImagery','NATOPS','PEMS-SF','PenDigits','PhonemeSpectra','RacketSports','SelfRegulationSCP1','SelfRegulationSCP2','SpokenArabicDigits','StandWalkJump','UWaveGestureLibrary','InsectWingbeat'] 9 | ucr_list = ['MoteStrain', 'ScreenType', 'MelbournePedestrian', 'RefrigerationDevices', 'PigArtPressure', 'SemgHandSubjectCh2', 'Car', 'HandOutlines', 'NonInvasiveFetalECGThorax2', 'FreezerRegularTrain', 'ArrowHead', 'FreezerSmallTrain', 'ECG200', 'ChlorineConcentration', 'CricketZ', 'CricketX', 'EOGHorizontalSignal', 'DiatomSizeReduction', 'Herring', 'Missing_value_and_variable_length_datasets_adjusted', 'SonyAIBORobotSurface2', 'PickupGestureWiimoteZ', 'ACSF1', 'EOGVerticalSignal', 'Rock', 'FiftyWords', 'ShakeGestureWiimoteZ', 'Symbols', 'ECGFiveDays', 'ProximalPhalanxTW', 'ProximalPhalanxOutlineAgeGroup', 'SyntheticControl', 'Wafer', 'Worms', 'BME', 'MiddlePhalanxTW', 'InsectWingbeatSound', 'UWaveGestureLibraryX', 'Coffee', 'TwoPatterns', 'ShapeletSim', 'Crop', 'AllGestureWiimoteY', 'PigAirwayPressure', 'Meat', 'StarLightCurves', 'UWaveGestureLibraryY', 'PhalangesOutlinesCorrect', 'DistalPhalanxOutlineCorrect', 'Earthquakes', 'CBF', 'Chinatown', 'AllGestureWiimoteZ', 'LargeKitchenAppliances', 'SmoothSubspace', 'GestureMidAirD2', 'MiddlePhalanxOutlineAgeGroup', 'ShapesAll', 'Computers', 'TwoLeadECG', 'DistalPhalanxTW', 'GestureMidAirD3', 'Lightning2', 'ProximalPhalanxOutlineCorrect', 'Plane', 'FacesUCR', 'DodgerLoopGame', 'ItalyPowerDemand', 'CinCECGTorso', 'GunPoint', 'MixedShapesSmallTrain', 'Fungi', 'MiddlePhalanxOutlineCorrect', 'Adiac', 'Phoneme', 'ElectricDevices', 'CricketY', 'NonInvasiveFetalECGThorax1', 'UWaveGestureLibraryZ', 'Yoga', 'BeetleFly', 'Fish', 'ToeSegmentation2', 'MedicalImages', 'Trace', 'GunPointAgeSpan', 'Beef', 'MixedShapesRegularTrain', 'SonyAIBORobotSurface1', 'FaceFour', 'PLAID', 'GesturePebbleZ2', 'OliveOil', 'ToeSegmentation1', 'SemgHandGenderCh2', 'FordB', 'Strawberry', 'Lightning7', 'UWaveGestureLibraryAll', 'InsectEPGSmallTrain', 'SwedishLeaf', 'BirdChicken', 'HouseTwenty', 'FordA', 'DistalPhalanxOutlineAgeGroup', 'InlineSkate', 'SmallKitchenAppliances', 'PigCVP', 'Mallat', 'GestureMidAirD1', 'WormsTwoClass', 'ECG5000', 'GunPointOldVersusYoung', 'Haptics', 'DodgerLoopDay', 'PowerCons', 'EthanolLevel', 'GunPointMaleVersusFemale', 'UMD', 'DodgerLoopWeekend', 'Ham', 'Wine', 'SemgHandMovementCh2', 'FaceAll', 'GesturePebbleZ1', 'AllGestureWiimoteX', 'OSULeaf', 'InsectEPGRegularTrain', 'WordSynonyms', 'MelbournePedestrian', 'PickupGestureWiimoteZ', 'ShakeGestureWiimoteZ', 'AllGestureWiimoteY', 'AllGestureWiimoteZ', 'GestureMidAirD2', 'GestureMidAirD3', 'DodgerLoopGame', 'PLAID', 'GesturePebbleZ2', 'GestureMidAirD1', 'DodgerLoopDay', 'DodgerLoopWeekend', 'GesturePebbleZ1', 'AllGestureWiimoteX'] 10 | 11 | def setup_dataloaders(args): 12 | if args.dataset == 'ucihar': 13 | args.n_feature = 9 14 | args.len_sw = 128 15 | args.n_class = 6 16 | if args.cases not in ['subject', 'subject_large']: 17 | args.target_domain == '0' 18 | train_loaders, val_loader, test_loader = data_preprocess_ucihar.prep_ucihar(args, SLIDING_WINDOW_LEN=args.len_sw, SLIDING_WINDOW_STEP=int( args.len_sw * 0.5)) 19 | 20 | elif args.dataset == 'shar': 21 | args.n_feature = 3 22 | args.len_sw = 151 23 | args.n_class = 17 24 | if args.cases not in ['subject', 'subject_large']: 25 | args.target_domain == '1' 26 | train_loaders, val_loader, test_loader = data_preprocess_shar.prep_shar(args, SLIDING_WINDOW_LEN=args.len_sw, SLIDING_WINDOW_STEP=int(args.len_sw * 0.5)) 27 | 28 | elif args.dataset == 'ms': 29 | # args.dataset = 'MotionSenseHAR' 30 | args.n_feature = 12 31 | args.len_sw = 200 32 | args.n_class = 6 33 | if args.cases not in ['subject', 'subject_large']: 34 | args.target_domain == '1' 35 | train_loaders, val_loader, test_loader = data_preprocess_ms.prep_ms(args, SLIDING_WINDOW_LEN=args.len_sw, SLIDING_WINDOW_STEP=int(args.len_sw * 0.5)) 36 | 37 | elif args.dataset == 'wisdm': 38 | args.n_feature = 3 39 | args.len_sw = 200 40 | args.n_class = 6 41 | if args.cases not in ['subject', 'subject_large']: 42 | args.target_domain == '1' 43 | train_loaders, val_loader, test_loader = data_preprocess_wisdm.prep_wisdm(args, SLIDING_WINDOW_LEN=args.len_sw, SLIDING_WINDOW_STEP=int(args.len_sw * 0.5)) 44 | 45 | if args.dataset in uea_list: 46 | train_loaders, val_loader, test_loader = data_preprocess_uea.prep_uea(args) 47 | 48 | if args.dataset in ucr_list: 49 | train_loaders, val_loader, test_loader = data_preprocess_ucr.prep_ucr(args) 50 | 51 | 52 | return train_loaders[0], val_loader, test_loader -------------------------------------------------------------------------------- /models/builder_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | # from .MMB import * 4 | 5 | class Classifier(nn.Module): 6 | def __init__(self, bb_dim, n_classes): 7 | super(Classifier, self).__init__() 8 | 9 | self.classifier = nn.Linear(bb_dim, n_classes) 10 | 11 | def forward(self, x): 12 | out = self.classifier(x) 13 | 14 | return out 15 | 16 | 17 | class Projector(nn.Module): 18 | def __init__(self, model, bb_dim, prev_dim, dim): 19 | super(Projector, self).__init__() 20 | if model == 'SimCLR': 21 | self.projector = nn.Sequential(nn.Linear(bb_dim, prev_dim), 22 | nn.ReLU(inplace=True), 23 | nn.Linear(prev_dim, dim)) 24 | elif model == 'byol': 25 | self.projector = nn.Sequential(nn.Linear(bb_dim, prev_dim, bias=False), 26 | nn.BatchNorm1d(prev_dim), 27 | nn.ReLU(inplace=True), 28 | nn.Linear(prev_dim, dim, bias=False), 29 | nn.BatchNorm1d(dim, affine=False)) 30 | else: 31 | raise NotImplementedError 32 | 33 | def forward(self, x): 34 | x = self.projector(x) 35 | return x 36 | 37 | 38 | class Predictor(nn.Module): 39 | def __init__(self, model, dim, pred_dim): 40 | super(Predictor, self).__init__() 41 | if model == 'SimCLR': 42 | pass 43 | elif model == 'byol': 44 | self.predictor = nn.Sequential(nn.Linear(dim, pred_dim), 45 | nn.BatchNorm1d(pred_dim), 46 | nn.ReLU(inplace=True), 47 | nn.Linear(pred_dim, dim)) 48 | else: 49 | raise NotImplementedError 50 | 51 | def forward(self, x): 52 | x = self.predictor(x) 53 | return x 54 | 55 | class EMA(): 56 | def __init__(self, beta): 57 | super().__init__() 58 | self.beta = beta 59 | 60 | def update_average(self, old, new): 61 | if old is None: 62 | return new 63 | return old * self.beta + (1 - self.beta) * new 64 | 65 | 66 | def update_moving_average(ema_updater, ma_model, current_model): 67 | for current_params, ma_params in zip(current_model.parameters(), ma_model.parameters()): 68 | old_weight, up_weight = ma_params.data, current_params.data 69 | ma_params.data = ema_updater.update_average(old_weight, up_weight) 70 | 71 | 72 | from functools import wraps 73 | 74 | 75 | def singleton(cache_key): 76 | def inner_fn(fn): 77 | @wraps(fn) 78 | def wrapper(self, *args, **kwargs): 79 | instance = getattr(self, cache_key) 80 | if instance is not None: 81 | return instance 82 | 83 | instance = fn(self, *args, **kwargs) 84 | setattr(self, cache_key, instance) 85 | return instance 86 | 87 | return wrapper 88 | 89 | return inner_fn 90 | 91 | 92 | # a wrapper class for the base neural network 93 | # will manage the interception of the hidden layer output 94 | # and pipe it into the projector and predictor nets 95 | 96 | class NetWrapper(nn.Module): 97 | def __init__(self, net, projection_size, projection_hidden_size, DEVICE, layer=-2): 98 | super().__init__() 99 | self.net = net 100 | self.layer = layer 101 | self.DEVICE = DEVICE 102 | 103 | self.projector = None 104 | self.projection_size = projection_size 105 | self.projection_hidden_size = projection_hidden_size 106 | 107 | self.hidden = {} 108 | self.hook_registered = False 109 | 110 | def _find_layer(self): 111 | children = [*self.net.children()] 112 | print('children[self.layer]:', children[self.layer]) 113 | return children[self.layer] 114 | return None 115 | 116 | def _hook(self, _, input, output): 117 | device = input[0].device 118 | self.hidden[device] = output.reshape(output.shape[0], -1) 119 | 120 | def _register_hook(self): 121 | layer = self._find_layer() 122 | assert layer is not None, f'hidden layer ({self.layer}) not found' 123 | handle = layer.register_forward_hook(self._hook) 124 | self.hook_registered = True 125 | 126 | @singleton('projector') 127 | def _get_projector(self, hidden): 128 | _, dim = hidden.shape 129 | projector = Projector(model='byol', bb_dim=dim, prev_dim=self.projection_hidden_size, dim=self.projection_size) 130 | return projector.to(hidden) 131 | 132 | def get_representation(self, x): 133 | 134 | if self.layer == -1: 135 | return self.net(x) 136 | 137 | if not self.hook_registered: 138 | self._register_hook() 139 | 140 | self.hidden.clear() 141 | _ = self.net(x) 142 | hidden = self.hidden[x.device] 143 | self.hidden.clear() 144 | 145 | assert hidden is not None, f'hidden layer {self.layer} never emitted an output' 146 | return hidden 147 | 148 | def forward(self, x): 149 | representation = self.get_representation(x) 150 | 151 | if len(representation.shape) == 3: 152 | representation = representation.reshape(representation.shape[0], -1) 153 | 154 | projector = self._get_projector(representation) 155 | projection = projector(representation) 156 | return projection, representation 157 | 158 | -------------------------------------------------------------------------------- /data_preprocess/data_preprocess_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.lib.stride_tricks import as_strided as ast 3 | # from dataclasses import dataclass 4 | from sklearn.model_selection import train_test_split 5 | 6 | # @dataclass 7 | # class Params: 8 | # x: float 9 | # y: float 10 | # z: float 11 | 12 | 13 | def train_test_val_split(x_win_all, y_win_all, d_win_all, split_ratio=0.8): 14 | # split all data into train and test 15 | x_win_train, x_win_test, y_win_train, y_win_test, d_win_train, d_win_test = \ 16 | train_test_split(x_win_all, y_win_all, d_win_all, test_size=split_ratio, random_state=0) 17 | 18 | # split train into train and validation with the same ratio 19 | x_win_train, x_win_val, y_win_train, y_win_val, d_win_train, d_win_val = \ 20 | train_test_split(x_win_train, y_win_train, d_win_train, test_size=split_ratio, random_state=0) 21 | 22 | return x_win_train, x_win_val, x_win_test, \ 23 | y_win_train, y_win_val, y_win_test, \ 24 | d_win_train, d_win_val, d_win_test 25 | 26 | 27 | def onehot_to_label(y_onehot): 28 | a = np.argwhere(y_onehot == 1) 29 | return a[:, -1] 30 | 31 | def get_sample_weights(y, weights): 32 | ''' 33 | to assign weights to each sample 34 | ''' 35 | label_unique = np.unique(y) 36 | sample_weights = [] 37 | for val in y: 38 | idx = np.where(label_unique == val) 39 | sample_weights.append(weights[idx]) 40 | return sample_weights 41 | 42 | 43 | def normalize(x): 44 | """Normalizes all sensor channels by mean substraction, 45 | dividing by the standard deviation and by 2. 46 | 47 | :param x: numpy integer matrix 48 | Sensor data 49 | :return: 50 | Normalized sensor data 51 | """ 52 | x = np.array(x, dtype=np.float32) 53 | m = np.mean(x, axis=0) 54 | x -= m 55 | std = np.std(x, axis=0) 56 | std += 0.000001 57 | 58 | x /= std 59 | return x 60 | 61 | def find_label(labels): 62 | # find the label of a sw given the labels of frames of a sliding window 63 | if np.bincount(labels)[np.argmax(np.bincount(labels))] < len(labels) * 0.5: 64 | return 0 # class 0 will be removed 65 | else: 66 | return np.argmax(np.bincount(labels)) 67 | 68 | def opp_sliding_window_w_d(data_x, data_y, d, ws, ss): # window size, step size 69 | data_x = sliding_window(data_x, (ws, data_x.shape[1]), (ss, 1)) 70 | data_y = np.asarray([[find_label(i)] for i in sliding_window(data_y, ws, ss)]) 71 | data_d = np.asarray([[find_label(i)] for i in sliding_window(d, ws, ss)]) 72 | return data_x.astype(np.float32), data_y.reshape(len(data_y)).astype(np.uint8), data_d.reshape(len(data_d)).astype(np.uint8) 73 | 74 | 75 | def sliding_window(a, ws, ss=None, flatten=True): 76 | ''' 77 | Return a sliding window over a in any number of dimensions 78 | 79 | Parameters: 80 | a - an n-dimensional numpy array 81 | ws - an int (a is 1D) or tuple (a is 2D or greater) representing the size 82 | of each dimension of the window 83 | ss - an int (a is 1D) or tuple (a is 2D or greater) representing the 84 | amount to slide the window in each dimension. If not specified, it 85 | defaults to ws. 86 | flatten - if True, all slices are flattened, otherwise, there is an 87 | extra dimension for each dimension of the input. 88 | 89 | Returns 90 | an array containing each n-dimensional window from a 91 | ''' 92 | 93 | if None is ss: 94 | # ss was not provided. the windows will not overlap in any direction. 95 | ss = ws 96 | ws = norm_shape(ws) 97 | ss = norm_shape(ss) 98 | 99 | # convert ws, ss, and a.shape to numpy arrays so that we can do math in every 100 | # dimension at once. 101 | ws = np.array(ws) 102 | ss = np.array(ss) 103 | shape = np.array(a.shape) 104 | 105 | # ensure that ws, ss, and a.shape all have the same number of dimensions 106 | ls = [len(shape), len(ws), len(ss)] 107 | if 1 != len(set(ls)): 108 | raise ValueError( \ 109 | 'a.shape, ws and ss must all have the same length. They were %s' % str(ls)) 110 | 111 | # ensure that ws is smaller than a in every dimension 112 | if np.any(ws > shape): 113 | raise ValueError( \ 114 | 'ws cannot be larger than a in any dimension.\ 115 | a.shape was %s and ws was %s' % (str(a.shape), str(ws))) 116 | 117 | newshape = norm_shape(((shape - ws) // ss) + 1) 118 | # the shape of the strided array will be the number of slices in each dimension 119 | # plus the shape of the window (tuple addition) 120 | newshape += norm_shape(ws) 121 | # the strides tuple will be the array's strides multiplied by step size, plus 122 | # the array's strides (tuple addition) 123 | newstrides = norm_shape(np.array(a.strides) * ss) + a.strides 124 | strided = ast(a, shape=newshape, strides=newstrides) 125 | if not flatten: 126 | return strided 127 | 128 | # Collapse strided so that it has one more dimension than the window. I.e., 129 | # the new array is a flat list of slices. 130 | meat = len(ws) if ws.shape else 0 131 | firstdim = (np.product(newshape[:-meat]),) if ws.shape else () 132 | dim = firstdim + (newshape[-meat:]) 133 | # remove any dimensions with size 1 134 | # commented by hangwei 135 | # dim = filter(lambda i: i != 1, dim) 136 | return strided.reshape(dim) 137 | 138 | def norm_shape(shape): 139 | ''' 140 | Normalize numpy array shapes so they're always expressed as a tuple, 141 | even for one-dimensional shapes. 142 | 143 | Parameters 144 | shape - an int, or a tuple of ints 145 | 146 | Returns 147 | a shape tuple 148 | ''' 149 | try: 150 | i = int(shape) 151 | return (i,) 152 | except TypeError: 153 | # shape was not a number 154 | pass 155 | 156 | try: 157 | t = tuple(shape) 158 | return t 159 | except TypeError: 160 | # shape was not iterable 161 | pass 162 | 163 | raise TypeError('shape must be an int, or a tuple of ints') 164 | 165 | def opp_sliding_window(data_x, data_y, ws, ss): # window size, step size 166 | data_x = sliding_window(data_x, (ws, data_x.shape[1]), (ss, 1)) 167 | data_y = np.asarray([[find_label(i)] for i in sliding_window(data_y, ws, ss)]) 168 | return data_x.astype(np.float32), data_y.reshape(len(data_y)).astype(np.uint8) 169 | -------------------------------------------------------------------------------- /data_preprocess/data_preprocess_ucr.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | import pandas as pd 5 | import math 6 | import random 7 | from datetime import datetime 8 | import pickle 9 | from scipy.io.arff import loadarff 10 | from sklearn.preprocessing import StandardScaler, MinMaxScaler 11 | 12 | import torch 13 | from torch.utils.data import DataLoader 14 | from torch.utils.data import Dataset 15 | import os 16 | import numpy as np 17 | from sklearn.model_selection import train_test_split 18 | from data_preprocess.data_preprocess_utils import get_sample_weights, train_test_val_split 19 | from data_preprocess.base_loader import base_loader 20 | 21 | ucr_list = ['MoteStrain', 'ScreenType', 'MelbournePedestrian', 'RefrigerationDevices', 'PigArtPressure', 'SemgHandSubjectCh2', 'Car', 'HandOutlines', 'NonInvasiveFetalECGThorax2', 'FreezerRegularTrain', 'ArrowHead', 'FreezerSmallTrain', 'ECG200', 'ChlorineConcentration', 'CricketZ', 'CricketX', 'EOGHorizontalSignal', 'DiatomSizeReduction', 'Herring', 'Missing_value_and_variable_length_datasets_adjusted', 'SonyAIBORobotSurface2', 'PickupGestureWiimoteZ', 'ACSF1', 'EOGVerticalSignal', 'Rock', 'FiftyWords', 'ShakeGestureWiimoteZ', 'Symbols', 'ECGFiveDays', 'ProximalPhalanxTW', 'ProximalPhalanxOutlineAgeGroup', 'SyntheticControl', 'Wafer', 'Worms', 'BME', 'MiddlePhalanxTW', 'InsectWingbeatSound', 'UWaveGestureLibraryX', 'Coffee', 'TwoPatterns', 'ShapeletSim', 'Crop', 'AllGestureWiimoteY', 'PigAirwayPressure', 'Meat', 'StarLightCurves', 'UWaveGestureLibraryY', 'PhalangesOutlinesCorrect', 'DistalPhalanxOutlineCorrect', 'Earthquakes', 'CBF', 'Chinatown', 'AllGestureWiimoteZ', 'LargeKitchenAppliances', 'SmoothSubspace', 'GestureMidAirD2', 'MiddlePhalanxOutlineAgeGroup', 'ShapesAll', 'Computers', 'TwoLeadECG', 'DistalPhalanxTW', 'GestureMidAirD3', 'Lightning2', 'ProximalPhalanxOutlineCorrect', 'Plane', 'FacesUCR', 'DodgerLoopGame', 'ItalyPowerDemand', 'CinCECGTorso', 'GunPoint', 'MixedShapesSmallTrain', 'Fungi', 'MiddlePhalanxOutlineCorrect', 'Adiac', 'Phoneme', 'ElectricDevices', 'CricketY', 'NonInvasiveFetalECGThorax1', 'UWaveGestureLibraryZ', 'Yoga', 'BeetleFly', 'Fish', 'ToeSegmentation2', 'MedicalImages', 'Trace', 'GunPointAgeSpan', 'Beef', 'MixedShapesRegularTrain', 'SonyAIBORobotSurface1', 'FaceFour', 'PLAID', 'GesturePebbleZ2', 'OliveOil', 'ToeSegmentation1', 'SemgHandGenderCh2', 'FordB', 'Strawberry', 'Lightning7', 'UWaveGestureLibraryAll', 'InsectEPGSmallTrain', 'SwedishLeaf', 'BirdChicken', 'HouseTwenty', 'FordA', 'DistalPhalanxOutlineAgeGroup', 'InlineSkate', 'SmallKitchenAppliances', 'PigCVP', 'Mallat', 'GestureMidAirD1', 'WormsTwoClass', 'ECG5000', 'GunPointOldVersusYoung', 'Haptics', 'DodgerLoopDay', 'PowerCons', 'EthanolLevel', 'GunPointMaleVersusFemale', 'UMD', 'DodgerLoopWeekend', 'Ham', 'Wine', 'SemgHandMovementCh2', 'FaceAll', 'GesturePebbleZ1', 'AllGestureWiimoteX', 'OSULeaf', 'InsectEPGRegularTrain', 'WordSynonyms', 'MelbournePedestrian', 'PickupGestureWiimoteZ', 'ShakeGestureWiimoteZ', 'AllGestureWiimoteY', 'AllGestureWiimoteZ', 'GestureMidAirD2', 'GestureMidAirD3', 'DodgerLoopGame', 'PLAID', 'GesturePebbleZ2', 'GestureMidAirD1', 'DodgerLoopDay', 'DodgerLoopWeekend', 'GesturePebbleZ1', 'AllGestureWiimoteX'] 22 | 23 | class data_loader_ucr(base_loader): 24 | def __init__(self, samples, labels, domains): 25 | super(data_loader_ucr, self).__init__(samples, labels, domains) 26 | 27 | def load_UCR(dataset,use_fft=False): 28 | train_file = os.path.join('./data/UCR', dataset, dataset + "_TRAIN.tsv") 29 | test_file = os.path.join('./data/UCR', dataset, dataset + "_TEST.tsv") 30 | train_df = pd.read_csv(train_file, sep='\t', header=None) 31 | test_df = pd.read_csv(test_file, sep='\t', header=None) 32 | train_array = np.array(train_df) 33 | test_array = np.array(test_df) 34 | 35 | # Move the labels to {0, ..., L-1} 36 | labels = np.unique(train_array[:, 0]) 37 | transform = {} 38 | for i, l in enumerate(labels): 39 | transform[l] = i 40 | 41 | train = train_array[:, 1:].astype(np.float64) 42 | train_labels = np.vectorize(transform.get)(train_array[:, 0]) 43 | test = test_array[:, 1:].astype(np.float64) 44 | test_labels = np.vectorize(transform.get)(test_array[:, 0]) 45 | 46 | # Normalization for non-normalized data 47 | # To keep the amplitude information, we do not normalize values over 48 | # individual time series, but on the whole dataset 49 | if dataset in [ 50 | 'AllGestureWiimoteX', 51 | 'AllGestureWiimoteY', 52 | 'AllGestureWiimoteZ', 53 | 'BME', 54 | 'Chinatown', 55 | 'Crop', 56 | 'EOGHorizontalSignal', 57 | 'EOGVerticalSignal', 58 | 'Fungi', 59 | 'GestureMidAirD1', 60 | 'GestureMidAirD2', 61 | 'GestureMidAirD3', 62 | 'GesturePebbleZ1', 63 | 'GesturePebbleZ2', 64 | 'GunPointAgeSpan', 65 | 'GunPointMaleVersusFemale', 66 | 'GunPointOldVersusYoung', 67 | 'HouseTwenty', 68 | 'InsectEPGRegularTrain', 69 | 'InsectEPGSmallTrain', 70 | 'MelbournePedestrian', 71 | 'PickupGestureWiimoteZ', 72 | 'PigAirwayPressure', 73 | 'PigArtPressure', 74 | 'PigCVP', 75 | 'PLAID', 76 | 'PowerCons', 77 | 'Rock', 78 | 'SemgHandGenderCh2', 79 | 'SemgHandMovementCh2', 80 | 'SemgHandSubjectCh2', 81 | 'ShakeGestureWiimoteZ', 82 | 'SmoothSubspace', 83 | 'UMD' 84 | ]: 85 | mean = np.nanmean(train) 86 | std = np.nanstd(train) 87 | train = (train - mean) / std 88 | test = (test - mean) / std 89 | 90 | return train[..., np.newaxis], train_labels, test[..., np.newaxis], test_labels 91 | 92 | def data_generator(args): 93 | x_train, y_train, x_test, y_test = load_UCR(args.dataset) 94 | 95 | x_train, x_test = torch.Tensor(x_train), torch.Tensor(x_test) 96 | if torch.isnan(x_train).sum() > 0: 97 | x_train = torch.nan_to_num(x_train, nan=0.0) 98 | if torch.isnan(x_test).sum() > 0: 99 | x_test = torch.nan_to_num(x_test, nan=0.0) 100 | 101 | unique_y, counts_y = np.unique(y_train, return_counts=True) 102 | 103 | args.n_feature = x_train.shape[-1] 104 | args.len_sw = x_train.shape[-2] 105 | args.n_class = len(unique_y) 106 | 107 | train_set_r = data_loader_ucr(x_train, y_train, y_train) 108 | train_loader_r = DataLoader(train_set_r, batch_size=args.batch_size, shuffle=True, drop_last=True) # , sampler=sampler) 109 | val_set_r = data_loader_ucr(x_test, y_test, y_test) 110 | val_loader_r = DataLoader(val_set_r, batch_size=args.batch_size, shuffle=False) 111 | test_set_r = data_loader_ucr(x_test, y_test, y_test) 112 | test_loader_r = DataLoader(test_set_r, batch_size=args.batch_size, shuffle=False) 113 | 114 | return [train_loader_r], None, test_loader_r 115 | 116 | 117 | def prep_ucr(args): 118 | if args.cases == 'random': 119 | return data_generator(args) 120 | 121 | 122 | -------------------------------------------------------------------------------- /data_preprocess/data_preprocess_fd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import os 4 | from torch.utils.data import Dataset, DataLoader 5 | from data_preprocess.base_loader import base_loader 6 | from data_preprocess.data_preprocess_utils import get_sample_weights, train_test_val_split 7 | from sklearn.model_selection import StratifiedShuffleSplit 8 | 9 | def load_domain_data(domain_idx): 10 | data_dir = './data/FD/' 11 | filename = domain_idx +'.pt' 12 | print(filename) 13 | if os.path.isfile(data_dir + filename) == True: 14 | data = torch.load(data_dir + filename) 15 | x = data['x'] 16 | y = data['y'] 17 | else: 18 | for domain in ['a', 'b', 'c', 'd']: 19 | all_x, all_y = None, None 20 | for pre in ['train', 'val', 'test']: 21 | filename = pre + '_' + domain + '.pt' 22 | data = torch.load('./data/FD/' + filename) 23 | x = data['samples'] 24 | y = data['labels'] 25 | print(filename, x.shape, y.shape) 26 | all_x = torch.cat([all_x, x], axis=0) if all_x is not None else x 27 | all_y = torch.cat([all_y, y], axis=0) if all_y is not None else y 28 | unique_y, counts_y = np.unique(all_y, return_counts=True) 29 | # print(x[0, :10]) 30 | print(all_x.shape, all_y.shape) 31 | print('y_train label distribution: ', dict(zip(unique_y, counts_y))) 32 | torch.save({'x': all_x, 'y': all_y}, './data/FD/' + domain + '.pt') 33 | data = torch.load(data_dir + domain + '.pt') 34 | x = data['x'] 35 | y = data['y'] 36 | # print({'a': 0, 'b': 1, 'c': 2, 'd': 3}[domain_idx]) 37 | d = torch.Tensor(np.full(y.shape, {'a': 0, 'b': 1, 'c': 2, 'd': 3}[domain_idx], dtype=int)) 38 | print(x.shape, y.shape, d.shape) 39 | unique_y, counts_y = np.unique(y, return_counts=True) 40 | print('y label distribution: ', dict(zip(unique_y, counts_y))) 41 | return x, y, d 42 | 43 | def load_domain_data_bd(domain_idx='bd'): 44 | if domain_idx != 'bd': 45 | return 'Error! Domain idx should be bd\n' 46 | data_dir = './data/FD/' 47 | filename = domain_idx + '.pt' 48 | if os.path.isfile(data_dir + filename) == True: 49 | data = torch.load(data_dir + filename) 50 | x = data['x'] 51 | y = data['y'] 52 | else: 53 | all_x, all_y = None, None 54 | for domain in ['b', 'd']: 55 | filename = domain +'.pt' 56 | print(filename) 57 | if os.path.isfile(data_dir + filename) == True: 58 | data = torch.load(data_dir + filename) 59 | x = data['x'] 60 | y = data['y'] 61 | sp = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=0) 62 | for selected_index, _ in sp.split(x, y): 63 | x_selected, y_selected = x[selected_index], y[selected_index] 64 | all_x = torch.cat([all_x, x_selected], axis=0) if all_x is not None else x_selected 65 | all_y = torch.cat([all_y, y_selected], axis=0) if all_y is not None else y_selected 66 | unique_y, counts_y = np.unique(all_y, return_counts=True) 67 | print(all_x.shape, all_y.shape) 68 | print('y_train label distribution: ', dict(zip(unique_y, counts_y))) 69 | torch.save({'x': all_x, 'y': all_y}, './data/FD/' + domain_idx + '.pt') 70 | data = torch.load(data_dir + domain_idx + '.pt') 71 | x = data['x'] 72 | y = data['y'] 73 | d = torch.Tensor(np.full(y.shape, {'a': 0, 'bd': 1, 'c': 2}[domain_idx], dtype=int)) 74 | print(x.shape, y.shape, d.shape) 75 | unique_y, counts_y = np.unique(y, return_counts=True) 76 | print('y label distribution: ', dict(zip(unique_y, counts_y))) 77 | return x, y, d 78 | 79 | class data_loader_fd(base_loader): 80 | def __init__(self, samples, labels, domains): 81 | super(data_loader_fd, self).__init__(samples, labels, domains) 82 | 83 | def prep_domains_fd_comb(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0): 84 | # note: for fd dataset with total 4 domains, 85 | source_domain_list = ['a', 'bd', 'c'] 86 | 87 | source_domain_list.remove(args.target_domain) 88 | 89 | # source domain data prep 90 | x_win_all, y_win_all, d_win_all = np.array([]), np.array([]), np.array([]) 91 | for source_domain in source_domain_list: 92 | print('source_domain:', source_domain) 93 | 94 | if source_domain == 'bd': 95 | x, y, d = load_domain_data_bd(source_domain) 96 | else: 97 | x, y, d = load_domain_data(source_domain) 98 | 99 | x = x.reshape(-1, 5120, 1) 100 | print(" ..after sliding window: inputs {0}, targets {1}".format(x.shape, y.shape)) 101 | 102 | x_win_all = np.concatenate((x_win_all, x), axis=0) if x_win_all.size else x 103 | y_win_all = np.concatenate((y_win_all, y), axis=0) if y_win_all.size else y 104 | d_win_all = np.concatenate((d_win_all, d), axis=0) if d_win_all.size else d 105 | 106 | unique_y, counts_y = np.unique(y_win_all, return_counts=True) 107 | print('y_train label distribution: ', dict(zip(unique_y, counts_y))) 108 | weights = 100.0 / torch.Tensor(counts_y) 109 | print('weights of sampler: ', weights) 110 | weights = weights.double() 111 | 112 | sample_weights = get_sample_weights(y_win_all, weights) 113 | 114 | sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) 115 | 116 | data_set = data_loader_fd(x_win_all, y_win_all, d_win_all) 117 | source_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False, drop_last=True, sampler=sampler) 118 | print('source_loader batch: ', len(source_loader)) 119 | source_loaders = [source_loader] 120 | 121 | # target domain data prep 122 | print('target_domain:', args.target_domain) 123 | if args.target_domain == 'bd': 124 | x, y, d = load_domain_data_bd(args.target_domain) 125 | else: 126 | x, y, d = load_domain_data(args.target_domain) 127 | 128 | x = x.reshape(-1, 5120, 1) 129 | 130 | print(" ..after sliding window: inputs {0}, targets {1}".format(x.shape, y.shape)) 131 | 132 | data_set = data_loader_fd(x, y, d) 133 | target_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False) 134 | print('target_loader batch: ', len(target_loader)) 135 | return source_loaders, None, target_loader 136 | 137 | def prep_domains_fd_random(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0): 138 | # note: for fd dataset with total 4 domains, 139 | source_domain_list = ['a', 'b', 'd', 'c'] 140 | 141 | # source domain data prep 142 | x_win_all, y_win_all, d_win_all = np.array([]), np.array([]), np.array([]) 143 | for source_domain in source_domain_list: 144 | print('source_domain:', source_domain) 145 | 146 | if source_domain == 'bd': 147 | x, y, d = load_domain_data_bd(source_domain) 148 | else: 149 | x, y, d = load_domain_data(source_domain) 150 | 151 | x = x.reshape(-1, 5120, 1) 152 | print(" ..after sliding window: inputs {0}, targets {1}".format(x.shape, y.shape)) 153 | 154 | x_win_all = np.concatenate((x_win_all, x), axis=0) if x_win_all.size else x 155 | y_win_all = np.concatenate((y_win_all, y), axis=0) if y_win_all.size else y 156 | d_win_all = np.concatenate((d_win_all, d), axis=0) if d_win_all.size else d 157 | 158 | x_win_train, x_win_val, x_win_test, \ 159 | y_win_train, y_win_val, y_win_test, \ 160 | d_win_train, d_win_val, d_win_test = train_test_val_split(x_win_all, y_win_all, d_win_all, 161 | split_ratio=args.split_ratio) 162 | 163 | unique_y, counts_y = np.unique(y_win_train, return_counts=True) 164 | print('y_train label distribution: ', dict(zip(unique_y, counts_y))) 165 | weights = 100.0 / torch.Tensor(counts_y) 166 | print('weights of sampler: ', weights) 167 | weights = weights.double() 168 | sample_weights = get_sample_weights(y_win_train, weights) 169 | sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) 170 | 171 | train_set_r = data_loader_fd(x_win_train, y_win_train, d_win_train) 172 | train_loader_r = DataLoader(train_set_r, batch_size=args.batch_size, shuffle=False, drop_last=True, sampler=sampler) 173 | val_set_r = data_loader_fd(x_win_val, y_win_val, d_win_val) 174 | val_loader_r = DataLoader(val_set_r, batch_size=args.batch_size, shuffle=False) 175 | test_set_r = data_loader_fd(x_win_test, y_win_test, d_win_test) 176 | test_loader_r = DataLoader(test_set_r, batch_size=args.batch_size, shuffle=False) 177 | 178 | return [train_loader_r], val_loader_r, test_loader_r 179 | 180 | def prep_eeg(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0): 181 | if args.cases == 'rich_comb': 182 | return prep_domains_fd_comb(args, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP) 183 | if args.cases == 'random': 184 | return prep_domains_fd_random(args, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP) 185 | elif args.cases == '': 186 | pass 187 | else: 188 | return 'Error!\n' 189 | -------------------------------------------------------------------------------- /data_preprocess/data_preprocess_shar.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Data Pre-processing on SHAR dataset. 3 | 4 | ''' 5 | 6 | import os 7 | import numpy as np 8 | from torch.utils.data import Dataset, DataLoader 9 | import torch 10 | import pickle as cp 11 | from data_preprocess.data_preprocess_utils import get_sample_weights, train_test_val_split, normalize 12 | import scipy.io 13 | from data_preprocess.base_loader import base_loader 14 | 15 | torch.manual_seed(10) 16 | 17 | def load_domain_data(domain_idx): 18 | """ to load all the data from the specific domain with index domain_idx 19 | :param domain_idx: index of a single domain 20 | :return: X and y data of the entire domain 21 | """ 22 | data_dir = './data/UniMiB-SHAR/' 23 | saved_filename = 'shar_domain_' + domain_idx + '_wd.data' # "wd": with domain label 24 | 25 | if os.path.isfile(data_dir + saved_filename) == True: 26 | data = np.load(data_dir + saved_filename, allow_pickle=True) 27 | X = data[0][0] 28 | y = data[0][1] 29 | d = data[0][2] 30 | else: 31 | str_folder = './data/UniMiB-SHAR/data/' 32 | data_all = scipy.io.loadmat(str_folder + 'acc_data.mat') 33 | y_id_all = scipy.io.loadmat(str_folder + 'acc_labels.mat') 34 | y_id_all = y_id_all['acc_labels'] # (11771, 3) 35 | 36 | X_all = data_all['acc_data'] # data: (11771, 453) 37 | y_all = y_id_all[:, 0] - 1 # to map the labels to [0, 16] 38 | id_all = y_id_all[:, 1] 39 | 40 | print('\nProcessing domain {0} files...\n'.format(domain_idx)) 41 | 42 | target_idx = np.where(id_all == int(domain_idx)) 43 | X = X_all[target_idx] 44 | y = y_all[target_idx] 45 | 46 | domain_idx_map = {'1':0, '2':1, '3':2, '5':3} 47 | domain_idx_int = domain_idx_map[domain_idx] 48 | 49 | d = np.full(y.shape, domain_idx_int, dtype=int) 50 | 51 | print('\nProcessing domain {0} files | X: {1} y: {2} d:{3} \n'.format(domain_idx, X.shape, y.shape, d.shape)) 52 | obj = [(X, y, d)] 53 | f = open(os.path.join(data_dir, saved_filename), 'wb') 54 | cp.dump(obj, f, protocol=cp.HIGHEST_PROTOCOL) 55 | f.close() 56 | return X, y, d 57 | 58 | def load_domain_data_large(domain_idx): 59 | """ to load all the data from the specific domain 60 | :param domain_idx: 61 | :return: X and y data of the entire domain 62 | """ 63 | data_dir = './data/UniMiB-SHAR/' 64 | saved_filename = 'shar_domain_' + domain_idx + '_wd.data' # with domain label 65 | 66 | if os.path.isfile(data_dir + saved_filename) == True: 67 | data = np.load(data_dir + saved_filename, allow_pickle=True) 68 | X = data[0][0] 69 | y = data[0][1] 70 | d = data[0][2] 71 | else: 72 | str_folder = './data/UniMiB-SHAR/data/' 73 | data_all = scipy.io.loadmat(str_folder + 'acc_data.mat') 74 | y_id_all = scipy.io.loadmat(str_folder + 'acc_labels.mat') 75 | y_id_all = y_id_all['acc_labels'] # (11771, 3) 76 | 77 | X_all = data_all['acc_data'] # data: (11771, 453) 78 | y_all = y_id_all[:, 0] - 1 # to map the labels to [0, 16] 79 | id_all = y_id_all[:, 1] 80 | 81 | print('\nProcessing domain {0} files...\n'.format(domain_idx)) 82 | 83 | target_idx = np.where(id_all == int(domain_idx)) 84 | X = X_all[target_idx] 85 | y = y_all[target_idx] 86 | # note: to change domain ID 87 | # source_domain_list = ['1', '2', '3', '5', '6', '9', 88 | # '11', '13', '14', '15', '16', '17', '19', '20', 89 | # '21', '22', '23', '24', '25', '29'] 90 | domain_idx_map = {'1':0, '2':1, '3':2, '5':3, '6':4, '9':5, 91 | '11':6, '13':7, '14':8, '15':9, '16':10, '17':11, '19':12, '20':13, 92 | '21':14, '22':15, '23':16, '24':17, '25':18, '29':19} 93 | domain_idx_int = domain_idx_map[domain_idx] 94 | 95 | d = np.full(y.shape, domain_idx_int, dtype=int) 96 | 97 | print('\nProcessing domain {0} files | X: {1} y: {2} d:{3} \n'.format(domain_idx, X.shape, y.shape, d.shape)) 98 | 99 | obj = [(X, y, d)] 100 | f = open(os.path.join(data_dir, saved_filename), 'wb') 101 | cp.dump(obj, f, protocol=cp.HIGHEST_PROTOCOL) 102 | f.close() 103 | return X, y, d 104 | 105 | 106 | class data_loader_shar(base_loader): 107 | def __init__(self, samples, labels, domains): 108 | super(data_loader_shar, self).__init__(samples, labels, domains) 109 | 110 | # def __getitem__(self, index): 111 | # sample, target, domain = self.samples[index], self.labels[index], self.domains[index] 112 | # sample = normalize(sample) 113 | # return sample, target, domain 114 | 115 | 116 | def prep_domains_shar_subject(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0): 117 | 118 | # info: for SHAR dataset, the following domains have incomplete classes: 4,7,8,10 119 | source_domain_list = ['1', '2', '3', '5'] 120 | source_domain_list.remove(args.target_domain) 121 | 122 | # source domain data prep 123 | x_win_all, y_win_all, d_win_all = np.array([]), np.array([]), np.array([]) 124 | for source_domain in source_domain_list: 125 | print('source_domain:', source_domain) 126 | x, y, d = load_domain_data(source_domain) 127 | 128 | x = x.reshape(-1, 151, 3) 129 | print(" ..after sliding window: inputs {0}, targets {1}".format(x.shape, y.shape)) 130 | 131 | x_win_all = np.concatenate((x_win_all, x), axis=0) if x_win_all.size else x 132 | y_win_all = np.concatenate((y_win_all, y), axis=0) if y_win_all.size else y 133 | d_win_all = np.concatenate((d_win_all, d), axis=0) if d_win_all.size else d 134 | 135 | unique_y, counts_y = np.unique(y_win_all, return_counts=True) 136 | print('y_train label distribution: ', dict(zip(unique_y, counts_y))) 137 | weights = 100.0 / torch.Tensor(counts_y) 138 | print('weights of sampler: ', weights) 139 | weights = weights.double() 140 | 141 | sample_weights = get_sample_weights(y_win_all, weights) 142 | 143 | sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=sample_weights, 144 | num_samples=len(sample_weights), replacement=True) 145 | 146 | data_set = data_loader_shar(x_win_all, y_win_all, d_win_all) 147 | source_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=True, drop_last=True) # , sampler=sampler) 148 | print('source_loader batch: ', len(source_loader)) 149 | source_loaders = [source_loader] 150 | 151 | # target domain data prep 152 | print('target_domain:', args.target_domain) 153 | x, y, d = load_domain_data(args.target_domain) 154 | 155 | x = x.reshape(-1, 151, 3) 156 | 157 | print(" ..after sliding window: inputs {0}, targets {1}".format(x.shape, y.shape)) 158 | 159 | unique_y, counts_y = np.unique(y, return_counts=True) 160 | print('y_train label distribution: ', dict(zip(unique_y, counts_y))) 161 | weights = 100.0 / torch.Tensor(counts_y) 162 | print('weights of sampler: ', weights) 163 | 164 | data_set = data_loader_shar(x, y, d) 165 | # shuffle is forced to be False when sampler is available 166 | target_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False) 167 | print('target_loader batch: ', len(target_loader)) 168 | return source_loaders, None, target_loader 169 | 170 | 171 | def prep_domains_shar_subject_large(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0): 172 | # note: for SHAR dataset with total 30 domains, 173 | # note: for SHAR dataset, the following domains have incomplete classes: 4, 7, 8, 10, 12, 18, 26, 27, 28, 30 174 | source_domain_list = ['1', '2', '3', '5', '6', '9', 175 | '11', '13', '14', '15', '16', '17', '19', '20', 176 | '21', '22', '23', '24', '25', '29'] 177 | 178 | source_domain_list.remove(args.target_domain) 179 | 180 | # source domain data prep 181 | x_win_all, y_win_all, d_win_all = np.array([]), np.array([]), np.array([]) 182 | for source_domain in source_domain_list: 183 | print('source_domain:', source_domain) 184 | # todo: index change of domain ID is different from smaller indices; can be combined to a function when time is more available 185 | x, y, d = load_domain_data_large(source_domain) 186 | 187 | x = x.reshape(-1, 151, 3) 188 | print(" ..after sliding window: inputs {0}, targets {1}".format(x.shape, y.shape)) 189 | 190 | x_win_all = np.concatenate((x_win_all, x), axis=0) if x_win_all.size else x 191 | y_win_all = np.concatenate((y_win_all, y), axis=0) if y_win_all.size else y 192 | d_win_all = np.concatenate((d_win_all, d), axis=0) if d_win_all.size else d 193 | 194 | unique_y, counts_y = np.unique(y_win_all, return_counts=True) 195 | print('y_train label distribution: ', dict(zip(unique_y, counts_y))) 196 | weights = 100.0 / torch.Tensor(counts_y) 197 | print('weights of sampler: ', weights) 198 | weights = weights.double() 199 | 200 | sample_weights = get_sample_weights(y_win_all, weights) 201 | 202 | sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) 203 | 204 | data_set = data_loader_shar(x_win_all, y_win_all, d_win_all) 205 | source_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False, drop_last=True, sampler=sampler) 206 | print('source_loader batch: ', len(source_loader)) 207 | source_loaders = [source_loader] 208 | 209 | # target domain data prep 210 | print('target_domain:', args.target_domain) 211 | x, y, d = load_domain_data_large(args.target_domain) 212 | 213 | x = x.reshape(-1, 151, 3) 214 | 215 | print(" ..after sliding window: inputs {0}, targets {1}".format(x.shape, y.shape)) 216 | 217 | data_set = data_loader_shar(x, y, d) 218 | target_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False) 219 | print('target_loader batch: ', len(target_loader)) 220 | return source_loaders, None, target_loader 221 | 222 | def prep_shar(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0): 223 | if args.cases == 'subject': 224 | return prep_domains_shar_subject(args, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP) 225 | elif args.cases == 'subject_large': 226 | return prep_domains_shar_subject_large(args, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP) 227 | else: 228 | return 'Error!\n' 229 | 230 | -------------------------------------------------------------------------------- /data_preprocess/data_preprocess_ucihar.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Data Pre-processing on UCIHAR dataset. 3 | 4 | ''' 5 | 6 | import os 7 | import numpy as np 8 | from torch.utils.data import Dataset, DataLoader 9 | from torchvision import transforms 10 | import torch 11 | import pickle as cp 12 | from data_preprocess.data_preprocess_utils import get_sample_weights, train_test_val_split 13 | from data_preprocess.base_loader import base_loader 14 | 15 | def format_data_x(datafile): 16 | x_data = None 17 | for item in datafile: 18 | item_data = np.loadtxt(item, dtype=np.float) 19 | if x_data is None: 20 | x_data = np.zeros((len(item_data), 1)) 21 | x_data = np.hstack((x_data, item_data)) 22 | x_data = x_data[:, 1:] 23 | print('x_data.shape:', x_data.shape) 24 | X = None 25 | for i in range(len(x_data)): 26 | row = np.asarray(x_data[i, :]) 27 | row = row.reshape(9, 128).T 28 | if X is None: 29 | X = np.zeros((len(x_data), 128, 9)) 30 | X[i] = row 31 | print('X.shape:', X.shape) 32 | return X 33 | 34 | def format_data_y(datafile): 35 | data = np.loadtxt(datafile, dtype=np.int) - 1 36 | return data 37 | 38 | def load_domain_data(domain_idx): 39 | """ to load all the data from the specific domain with index domain_idx 40 | :param domain_idx: index of a single domain 41 | :return: X and y data of the entire domain 42 | """ 43 | # data_dir = './data/ucihar/' 44 | data_dir = '/home/tiantian/PCL/data/ucihar/' 45 | saved_filename = 'ucihar_domain_' + domain_idx + '_wd.data' # "wd": with domain label 46 | 47 | if os.path.isfile(data_dir + saved_filename) == True: 48 | data = np.load(data_dir + saved_filename, allow_pickle=True) 49 | X = data[0][0] 50 | y = data[0][1] 51 | d = data[0][2] 52 | else: 53 | if os.path.isdir(data_dir) == False: 54 | os.makedirs(data_dir) 55 | str_folder = './data/UCI HAR Dataset/' 56 | INPUT_SIGNAL_TYPES = [ 57 | "body_acc_x_", 58 | "body_acc_y_", 59 | "body_acc_z_", 60 | "body_gyro_x_", 61 | "body_gyro_y_", 62 | "body_gyro_z_", 63 | "total_acc_x_", 64 | "total_acc_y_", 65 | "total_acc_z_" 66 | ] 67 | str_train_files = [str_folder + 'train/' + 'Inertial Signals/' + item + 'train.txt' for item in INPUT_SIGNAL_TYPES] 68 | str_test_files = [str_folder + 'test/' + 'Inertial Signals/' + item + 'test.txt' for item in INPUT_SIGNAL_TYPES] 69 | str_train_y = str_folder + 'train/y_train.txt' 70 | str_test_y = str_folder + 'test/y_test.txt' 71 | 72 | str_train_id = str_folder + 'train/subject_train.txt' 73 | str_test_id = str_folder + 'test/subject_test.txt' 74 | 75 | X_train = format_data_x(str_train_files) 76 | X_test = format_data_x(str_test_files) 77 | Y_train = format_data_y(str_train_y) 78 | Y_test = format_data_y(str_test_y) 79 | id_train = format_data_y(str_train_id) # origin: array([ 0, 2, 4, 5, 6, 7, 10, 13, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 29]) 80 | id_test = format_data_y(str_test_id) # origin: array([ 1, 3, 8, 9, 11, 12, 17, 19, 23]) 81 | 82 | X_all = np.concatenate((X_train, X_test), axis=0) 83 | y_all = np.concatenate((Y_train, Y_test), axis=0) 84 | id_all = np.concatenate((id_train, id_test), axis=0) 85 | 86 | print('\nProcessing domain {0} files...\n'.format(domain_idx)) 87 | target_idx = np.where(id_all == int(domain_idx)) 88 | X = X_all[target_idx] 89 | y = y_all[target_idx] 90 | d = np.full(y.shape, int(domain_idx), dtype=int) 91 | print('\nProcessing domain {0} files | X: {1} y: {2} d:{3} \n'.format(domain_idx, X.shape, y.shape, d.shape)) 92 | 93 | obj = [(X, y, d)] 94 | f = open(os.path.join(data_dir, saved_filename), 'wb') 95 | cp.dump(obj, f, protocol=cp.HIGHEST_PROTOCOL) 96 | f.close() 97 | return X, y, d 98 | 99 | class data_loader_ucihar(base_loader): 100 | def __init__(self, samples, labels, domains, t): 101 | super(data_loader_ucihar, self).__init__(samples, labels, domains) 102 | self.T = t 103 | 104 | def __getitem__(self, index): 105 | sample, target, domain = self.samples[index], self.labels[index], self.domains[index] 106 | sample = self.T(sample) 107 | return np.squeeze(np.transpose(sample, (1, 0, 2))), target, domain 108 | 109 | 110 | def prep_domains_ucihar_subject(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0): 111 | # todo: make the domain IDs as arguments or a function with args to select the IDs (default, customized, small, etc) 112 | source_domain_list = ['0', '1', '2', '3', '4'] 113 | 114 | source_domain_list.remove(args.target_domain) 115 | 116 | # source domain data prep 117 | x_win_all, y_win_all, d_win_all = np.array([]), np.array([]), np.array([]) 118 | for source_domain in source_domain_list: 119 | print('source_domain:', source_domain) 120 | x, y, d = load_domain_data(source_domain) 121 | 122 | # n_channel should be 9, H: 1, W:128 123 | x = np.transpose(x.reshape((-1, 1, 128, 9)), (0, 2, 1, 3)) 124 | # the UCIHAR dataset is segmented by sliding window as default 125 | print(" ..after sliding window: inputs {0}, targets {1}".format(x.shape, y.shape)) 126 | 127 | x_win_all = np.concatenate((x_win_all, x), axis=0) if x_win_all.size else x 128 | y_win_all = np.concatenate((y_win_all, y), axis=0) if y_win_all.size else y 129 | d_win_all = np.concatenate((d_win_all, d), axis=0) if d_win_all.size else d 130 | 131 | unique_y, counts_y = np.unique(y_win_all, return_counts=True) 132 | print('y_train label distribution: ', dict(zip(unique_y, counts_y))) 133 | weights = 100.0 / torch.Tensor(counts_y) 134 | print('weights of sampler: ', weights) 135 | weights = weights.double() 136 | 137 | sample_weights = get_sample_weights(y_win_all, weights) 138 | 139 | sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=sample_weights, 140 | num_samples=len(sample_weights), replacement=True) 141 | transform = transforms.Compose([ 142 | transforms.ToTensor(), 143 | transforms.Normalize(mean=(0, 0, 0, 0, 0, 0, 0, 0, 0), std=(1, 1, 1, 1, 1, 1, 1, 1, 1)) 144 | ]) 145 | 146 | data_set = data_loader_ucihar(x_win_all, y_win_all, d_win_all, transform) 147 | source_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False, drop_last=True, sampler=sampler) 148 | print('source_loader batch: ', len(source_loader)) 149 | source_loaders = [source_loader] 150 | 151 | # target domain data prep 152 | print('target_domain:', args.target_domain) 153 | x, y, d = load_domain_data(args.target_domain) 154 | 155 | x = np.transpose(x.reshape((-1, 1, 128, 9)), (0, 2, 1, 3)) 156 | 157 | print(" ..after sliding window: inputs {0}, targets {1}".format(x.shape, y.shape)) 158 | 159 | data_set = data_loader_ucihar(x, y, d, transform) 160 | target_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False) 161 | 162 | print('target_loader batch: ', len(target_loader)) 163 | return source_loaders, None, target_loader 164 | 165 | def prep_domains_ucihar_subject_large(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0): 166 | source_domain_list = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29'] 167 | source_domain_list.remove(args.target_domain) 168 | 169 | # source domain data prep 170 | x_win_all, y_win_all, d_win_all = np.array([]), np.array([]), np.array([]) 171 | for source_domain in source_domain_list: 172 | print('source_domain:', source_domain) 173 | x, y, d = load_domain_data(source_domain) 174 | 175 | # n_channel should be 9, H: 1, W:128 176 | x = np.transpose(x.reshape((-1, 1, 128, 9)), (0, 2, 1, 3)) 177 | # the UCIHAR dataset is segmented by sliding window as default 178 | print(" ..after sliding window: inputs {0}, targets {1}".format(x.shape, y.shape)) 179 | 180 | x_win_all = np.concatenate((x_win_all, x), axis=0) if x_win_all.size else x 181 | y_win_all = np.concatenate((y_win_all, y), axis=0) if y_win_all.size else y 182 | d_win_all = np.concatenate((d_win_all, d), axis=0) if d_win_all.size else d 183 | 184 | unique_y, counts_y = np.unique(y_win_all, return_counts=True) 185 | print('y_train label distribution: ', dict(zip(unique_y, counts_y))) 186 | weights = 100.0 / torch.Tensor(counts_y) 187 | print('weights of sampler: ', weights) 188 | weights = weights.double() 189 | 190 | sample_weights = get_sample_weights(y_win_all, weights) 191 | 192 | sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) 193 | transform = transforms.Compose([ 194 | transforms.ToTensor(), 195 | transforms.Normalize(mean=(0, 0, 0, 0, 0, 0, 0, 0, 0), std=(1, 1, 1, 1, 1, 1, 1, 1, 1)) 196 | ]) 197 | 198 | data_set = data_loader_ucihar(x_win_all, y_win_all, d_win_all, transform) 199 | source_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False, drop_last=True, sampler=sampler) 200 | print('source_loader batch: ', len(source_loader)) 201 | source_loaders = [source_loader] 202 | 203 | # target domain data prep 204 | print('target_domain:', args.target_domain) 205 | x, y, d = load_domain_data(args.target_domain) 206 | 207 | x = np.transpose(x.reshape((-1, 1, 128, 9)), (0, 2, 1, 3)) 208 | print(" ..after sliding window: inputs {0}, targets {1}".format(x.shape, y.shape)) 209 | 210 | data_set = data_loader_ucihar(x, y, d, transform) 211 | # todo: the batch size can be different for some ttt models, tbc 212 | target_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False) 213 | print('target_loader batch: ', len(target_loader)) 214 | 215 | return source_loaders, None, target_loader 216 | 217 | 218 | def prep_domains_ucihar_random(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0): 219 | source_domain_list = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29'] 220 | 221 | x_win_all, y_win_all, d_win_all = np.array([]), np.array([]), np.array([]) 222 | n_train, n_test, split_ratio = [], 0, 0.0 223 | 224 | for source_domain in source_domain_list: 225 | # print('source_domain:', source_domain) 226 | x_win, y_win, d_win = load_domain_data(source_domain) 227 | 228 | # n_channel should be 9, H: 1, W:128 229 | x_win = np.transpose(x_win.reshape((-1, 1, 128, 9)), (0, 2, 1, 3)) 230 | # print(" ..after sliding window: inputs {0}, targets {1}".format(x_win.shape, y_win.shape)) 231 | 232 | x_win_all = np.concatenate((x_win_all, x_win), axis=0) if x_win_all.size else x_win 233 | y_win_all = np.concatenate((y_win_all, y_win), axis=0) if y_win_all.size else y_win 234 | d_win_all = np.concatenate((d_win_all, d_win), axis=0) if d_win_all.size else d_win 235 | n_train.append(x_win.shape[0]) 236 | 237 | x_win_train, x_win_val, x_win_test, \ 238 | y_win_train, y_win_val, y_win_test, \ 239 | d_win_train, d_win_val, d_win_test = train_test_val_split(x_win_all, y_win_all, d_win_all, split_ratio=args.split_ratio) 240 | 241 | print(x_win_train.shape) 242 | unique_y, counts_y = np.unique(y_win_train, return_counts=True) 243 | print('y_train label distribution: ', dict(zip(unique_y, counts_y))) 244 | weights = 100.0 / torch.Tensor(counts_y) 245 | print('weights of sampler: ', weights) 246 | weights = weights.double() 247 | sample_weights = get_sample_weights(y_win_train, weights) 248 | sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) 249 | 250 | transform = transforms.Compose([ 251 | transforms.ToTensor(), 252 | transforms.Normalize(mean=(0, 0, 0, 0, 0, 0, 0, 0, 0), std=(1, 1, 1, 1, 1, 1, 1, 1, 1)) 253 | ]) 254 | train_set_r = data_loader_ucihar(x_win_train, y_win_train, d_win_train, transform) 255 | train_loader_r = DataLoader(train_set_r, batch_size=args.batch_size, shuffle=False, drop_last=True, sampler=sampler) 256 | val_set_r = data_loader_ucihar(x_win_val, y_win_val, d_win_val, transform) 257 | val_loader_r = DataLoader(val_set_r, batch_size=args.batch_size, shuffle=False) 258 | test_set_r = data_loader_ucihar(x_win_test, y_win_test, d_win_test, transform) 259 | test_loader_r = DataLoader(test_set_r, batch_size=args.batch_size, shuffle=False) 260 | 261 | return [train_loader_r], val_loader_r, test_loader_r 262 | 263 | 264 | def prep_ucihar(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0): 265 | if args.cases == 'random': 266 | return prep_domains_ucihar_random(args, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP) 267 | elif args.cases == 'subject': 268 | return prep_domains_ucihar_subject(args, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP) 269 | elif args.cases == 'subject_large': 270 | return prep_domains_ucihar_subject_large(args, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP) 271 | elif args.cases == '': 272 | pass 273 | else: 274 | return 'Error! Unknown args.cases!\n' 275 | 276 | -------------------------------------------------------------------------------- /data_preprocess/data_preprocess_wisdm.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | """ 3 | Created on 10:38 2018/12/17 4 | @author: Hangwei Qian 5 | Adapted from: https://github.com/guillaume-chevalier/HAR-stacked-residual-bidir-LSTMs 6 | """ 7 | 8 | import os 9 | import numpy as np 10 | import torch 11 | import pickle as cp 12 | from pandas import Series 13 | import zipfile 14 | import argparse 15 | from io import BytesIO 16 | from torch.utils.data import Dataset, DataLoader 17 | from torchvision import transforms 18 | from data_preprocess.data_preprocess_utils import get_sample_weights, train_test_val_split, opp_sliding_window_w_d 19 | from sklearn.model_selection import StratifiedShuffleSplit 20 | 21 | torch.manual_seed(10) 22 | 23 | NUM_FEATURES = 3 24 | 25 | class data_loader_wisdm(Dataset): 26 | def __init__(self, samples, labels, domains): 27 | self.samples = samples 28 | self.labels = labels 29 | self.domains = domains 30 | 31 | def __getitem__(self, index): 32 | sample, target, domain = self.samples[index], self.labels[index], self.domains[index] 33 | return sample, target, domain 34 | 35 | def __len__(self): 36 | return len(self.samples) 37 | 38 | def load_domain_data(domain_idx): 39 | """ to load all the data from the specific domain 40 | :param domain_idx: 41 | :return: X and y data of the entire domain 42 | """ 43 | data_dir = './data/WISDM_ar_v1.1/' 44 | saved_filename = 'wisdm_domain_' + domain_idx + '_wd.data' 45 | if os.path.isfile(data_dir + saved_filename) == True: 46 | data = np.load(data_dir + saved_filename, allow_pickle=True) 47 | X = data[0][0] 48 | y = data[0][1] 49 | d = data[0][2] 50 | else: 51 | str_folder = './data/WISDM_ar_v1.1/' 52 | data_all = np.genfromtxt(str_folder + 'WISDM_ar_v1.1_raw_hangwei_v2.txt', delimiter=',', usecols=[0,1,3,4,5]) 53 | 54 | X_all = data_all[:, 2:] # data: (1098209, 3) 55 | y_all = data_all[:, 1] - 1 # to map the labels from [1,...,6] to [0, 5] 56 | id_all = data_all[:, 0] 57 | 58 | 59 | print('\nProcessing domain {0} files...\n'.format(domain_idx)) 60 | target_idx = np.where(id_all == int(domain_idx)) 61 | X = X_all[target_idx] 62 | y = y_all[target_idx] 63 | 64 | # change the domain index from string ['1','2','3','4',...]to [0,1,2,3,4] 65 | # todo if further change 66 | domain_idx_map = {'1':0, '3':1, '5':2, '6':3, '7':4, '8':5, 67 | '12':6, '13':7, '18':8, '19':9, '20':10, 68 | '21':11, '24':12, '27':13, '29':14, 69 | '31':15, '32':16, '33':17, '34':18, '36':19} 70 | # domain_idx_now = int(domain_idx[-1]) 71 | # if domain_idx_now < 10: 72 | # domain_idx_int = domain_idx_now - 1 73 | # else: 74 | # domain_idx_int = domain_idx_now - 1 75 | domain_idx_int = domain_idx_map[domain_idx] 76 | 77 | d = np.full(y.shape, domain_idx_int, dtype=int) 78 | print('\nProcessing domain {0} files | X: {1} y: {2} d:{3} \n'.format(domain_idx, X.shape, y.shape, d.shape)) 79 | 80 | obj = [(X, y, d)] 81 | # file is not supported in python3, use open instead, by hangwei 82 | f = open(os.path.join(data_dir, saved_filename), 'wb') 83 | cp.dump(obj, f, protocol=cp.HIGHEST_PROTOCOL) 84 | f.close() 85 | return X, y, d 86 | 87 | 88 | 89 | def prep_domains_wisdm_subject(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0): 90 | 91 | # hangwei: for wisdm data, total domains is [1,..., 36] 92 | # complete data domains source_domain_list = ['1', '3', '5','6','7','8', 93 | # '12', '13','18','19','20', 94 | # '21', '24','27', '29', 95 | # '31', '32', '33', '34','36'] 96 | 97 | # source_domain_list = ['1', '2', '3', '4', '5','6','7','8','9','10', 98 | # '11', '12', '13', '14', '15','16','17','18','19','20', 99 | # '21', '22', '23', '24', '25','26','27','28','29','30', 100 | # '31', '32', '33', '34', '35','36'] 101 | source_domain_list = ['1', '3', '5', '6', '7', '8', 102 | '12', '13', '18', '19', '20', 103 | '21', '24', '27', '29', 104 | '31', '32', '33', '34', '36'] 105 | source_domain_list.remove(args.target_domain) 106 | 107 | # source domain data prep 108 | source_loaders = [] 109 | x_win_all, y_win_all, d_win_all = np.array([]), np.array([]), np.array([]) 110 | for source_domain in source_domain_list: 111 | print('source_domain:', source_domain) 112 | x, y, d = load_domain_data(source_domain) 113 | y = y.astype(int) 114 | x_win, y_win, d_win = opp_sliding_window_w_d(x, y, d, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP) 115 | print(" ..after sliding window: inputs {0}, targets {1}".format(x_win.shape, y_win.shape)) 116 | x_win_all = np.concatenate((x_win_all, x_win), axis=0) if x_win_all.size else x_win 117 | y_win_all = np.concatenate((y_win_all, y_win), axis=0) if y_win_all.size else y_win 118 | d_win_all = np.concatenate((d_win_all, d_win), axis=0) if d_win_all.size else d_win 119 | 120 | # get the info of the dataset, by hangwei. 1.15.2019 121 | unique_y, counts_y = np.unique(y_win_all, return_counts=True) 122 | print('y_train label distribution: ', dict(zip(unique_y, counts_y))) 123 | weights = 100.0 / torch.Tensor(counts_y) 124 | print('weights of sampler: ', weights) 125 | weights = weights.double() 126 | 127 | # updated by hangwei: sample_weights = weights[y_win] 128 | sample_weights = get_sample_weights(y_win, weights) 129 | 130 | sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=sample_weights, 131 | num_samples=len(sample_weights), replacement=True) 132 | data_set = data_loader_wisdm(x_win, y_win, d_win) 133 | source_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False, drop_last=True, sampler=sampler) 134 | print('source_loader batch: ', len(source_loader)) 135 | source_loaders.append(source_loader) 136 | 137 | # target domain data prep 138 | print('target_domain:', args.target_domain) 139 | x, y, d = load_domain_data(args.target_domain) 140 | y = y.astype(int) 141 | x_win, y_win, d_win = opp_sliding_window_w_d(x, y, d, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP) 142 | print(" ..after sliding window: inputs {0}, targets {1}".format(x_win.shape, y_win.shape)) 143 | 144 | data_set = data_loader_wisdm(x_win, y_win, d_win) 145 | # padsequence() to deal with varying length input of each data example 146 | # shuffle is forced to be False when sampler is available 147 | target_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False) 148 | print('target_loader batch: ', len(target_loader)) 149 | 150 | return source_loaders, None, target_loader 151 | 152 | def prep_domains_wisdm_subject_small(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0): 153 | 154 | # hangwei: for wisdm data, total domains is [1,..., 36] 155 | # complete data domains source_domain_list = ['1', '3', '5','6','7','8', 156 | # '12', '13','18','19','20', 157 | # '21', '24','27', '29', 158 | # '31', '32', '33', '34','36'] 159 | 160 | # source_domain_list = ['1', '2', '3', '4', '5','6','7','8','9','10', 161 | # '11', '12', '13', '14', '15','16','17','18','19','20', 162 | # '21', '22', '23', '24', '25','26','27','28','29','30', 163 | # '31', '32', '33', '34', '35','36'] 164 | source_domain_list = ['20', '31', '8', '12', '13'] 165 | source_domain_list.remove(args.target_domain) 166 | 167 | # source domain data prep 168 | source_loaders = [] 169 | x_win_all, y_win_all, d_win_all = np.array([]), np.array([]), np.array([]) 170 | for source_domain in source_domain_list: 171 | print('source_domain:', source_domain) 172 | x, y, d = load_domain_data(source_domain) 173 | y = y.astype(int) 174 | x_win, y_win, d_win = opp_sliding_window_w_d(x, y, d, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP) 175 | print(" ..after sliding window: inputs {0}, targets {1}".format(x_win.shape, y_win.shape)) 176 | x_win_all = np.concatenate((x_win_all, x_win), axis=0) if x_win_all.size else x_win 177 | y_win_all = np.concatenate((y_win_all, y_win), axis=0) if y_win_all.size else y_win 178 | d_win_all = np.concatenate((d_win_all, d_win), axis=0) if d_win_all.size else d_win 179 | 180 | # get the info of the dataset, by hangwei. 1.15.2019 181 | unique_y, counts_y = np.unique(y_win_all, return_counts=True) 182 | print('y_train label distribution: ', dict(zip(unique_y, counts_y))) 183 | weights = 100.0 / torch.Tensor(counts_y) 184 | print('weights of sampler: ', weights) 185 | weights = weights.double() 186 | 187 | # updated by hangwei: sample_weights = weights[y_win] 188 | sample_weights = get_sample_weights(y_win, weights) 189 | 190 | sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=sample_weights, 191 | num_samples=len(sample_weights), replacement=True) 192 | data_set = data_loader_wisdm(x_win, y_win, d_win) 193 | source_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False, drop_last=True, sampler=sampler) 194 | print('source_loader batch: ', len(source_loader)) 195 | source_loaders.append(source_loader) 196 | 197 | # target domain data prep 198 | print('target_domain:', args.target_domain) 199 | x, y, d = load_domain_data(args.target_domain) 200 | y = y.astype(int) 201 | x_win, y_win, d_win = opp_sliding_window_w_d(x, y, d, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP) 202 | print(" ..after sliding window: inputs {0}, targets {1}".format(x_win.shape, y_win.shape)) 203 | 204 | data_set = data_loader_wisdm(x_win, y_win, d_win) 205 | # padsequence() to deal with varying length input of each data example 206 | # shuffle is forced to be False when sampler is available 207 | target_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False) 208 | print('target_loader batch: ', len(target_loader)) 209 | 210 | return source_loaders, None, target_loader 211 | 212 | 213 | 214 | def prep_domains_wisdm_random(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0): 215 | 216 | # hangwei: for wisdm data, total domains is [1,..., 36] 217 | # complete data domains source_domain_list = ['1', '3', '5','6','7','8', 218 | # '12', '13','18','19','20', 219 | # '21', '24','27', '29', 220 | # '31', '32', '33', '34','36'] 221 | # source_domain_list = ['1', '2', '3', '4', '5','6','7','8','9','10', 222 | # '11', '12', '13', '14', '15','16','17','18','19','20', 223 | # '21', '22', '23', '24', '25','26','27','28','29','30', 224 | # '31', '32', '33', '34', '35','36'] 225 | source_domain_list = ['1', '3', '5', '6', '7', '8', 226 | '12', '13', '18', '19', '20', 227 | '21', '24', '27', '29', 228 | '31', '32', '33', '34', '36'] 229 | # source_domain_list.remove(args.target_domain) 230 | 231 | x_win_all, y_win_all, d_win_all = np.array([]), np.array([]), np.array([]) 232 | n_train, n_test, split_ratio = [], 0, 0.0 233 | 234 | for source_domain in source_domain_list: 235 | print('source_domain:', source_domain) 236 | x, y, d = load_domain_data(source_domain) 237 | y = y.astype(int) 238 | x_win, y_win, d_win = opp_sliding_window_w_d(x, y, d, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP) 239 | print(" ..after sliding window: inputs {0}, targets {1}".format(x_win.shape, y_win.shape)) 240 | 241 | x_win_all = np.concatenate((x_win_all, x_win), axis=0) if x_win_all.size else x_win 242 | y_win_all = np.concatenate((y_win_all, y_win), axis=0) if y_win_all.size else y_win 243 | d_win_all = np.concatenate((d_win_all, d_win), axis=0) if d_win_all.size else d_win 244 | 245 | n_train.append(x_win.shape[0]) 246 | 247 | unique_y, counts_y = np.unique(y_win_all, return_counts=True) 248 | print('y_train label distribution: ', dict(zip(unique_y, counts_y))) 249 | 250 | x_win_train, x_win_val, x_win_test, \ 251 | y_win_train, y_win_val, y_win_test, \ 252 | d_win_train, d_win_val, d_win_test = train_test_val_split(x_win_all, y_win_all, d_win_all, 253 | split_ratio=args.split_ratio) 254 | 255 | print('x_win_train', x_win_train.shape) 256 | unique_y, counts_y = np.unique(y_win_train, return_counts=True) 257 | print('y_train label distribution: ', dict(zip(unique_y, counts_y))) 258 | weights = 100.0 / torch.Tensor(counts_y) 259 | print('weights of sampler: ', weights) 260 | weights = weights.double() 261 | sample_weights = get_sample_weights(y_win_train, weights) 262 | sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=sample_weights, 263 | num_samples=len(sample_weights), replacement=True) 264 | train_set_r = data_loader_wisdm(x_win_train, y_win_train, d_win_train) 265 | train_loader_r = DataLoader(train_set_r, batch_size=args.batch_size,shuffle=False, drop_last=True, sampler=sampler) 266 | val_set_r = data_loader_wisdm(x_win_val, y_win_val, d_win_val) 267 | val_loader_r = DataLoader(val_set_r, batch_size=args.batch_size, shuffle=False) 268 | test_set_r = data_loader_wisdm(x_win_test, y_win_test, d_win_test) 269 | test_loader_r = DataLoader(test_set_r, batch_size=args.batch_size, shuffle=False) 270 | 271 | return [train_loader_r], val_loader_r, test_loader_r 272 | 273 | 274 | def prep_wisdm(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0): 275 | if args.cases == 'subject': 276 | return prep_domains_wisdm_subject(args, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP) 277 | if args.cases == 'subject_small': 278 | return prep_domains_wisdm_subject_small(args, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP) 279 | elif args.cases == 'random': 280 | return prep_domains_wisdm_random(args, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP) 281 | else: 282 | return 'Error!\n' 283 | -------------------------------------------------------------------------------- /augmentations.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import scipy 4 | import random 5 | 6 | np.random.seed(10) 7 | 8 | def gen_aug(args, sample, ssh_type): 9 | if ssh_type == 'na': 10 | return sample 11 | elif ssh_type == 'shuffle': 12 | return shuffle(sample) 13 | elif ssh_type == 'jit_scal': 14 | scale_sample = scaling(sample, sigma=2) 15 | return torch.from_numpy(scale_sample) 16 | elif ssh_type == 'perm_jit': 17 | return jitter(permutation(sample, max_segments=10), sigma=0.8) 18 | elif ssh_type == 'resample': 19 | return torch.from_numpy(resample(sample)) 20 | elif ssh_type == 'noise': 21 | return jitter(sample) 22 | elif ssh_type == 'scale': 23 | return torch.from_numpy(scaling(sample)) 24 | elif ssh_type == 'negate': 25 | return negated(sample) 26 | elif ssh_type == 't_flip': 27 | return time_flipped(sample) 28 | elif ssh_type == 'rotation': 29 | if isinstance(multi_rotation(sample), np.ndarray): 30 | return torch.from_numpy(multi_rotation(sample)) 31 | else: 32 | return multi_rotation(sample) 33 | elif ssh_type == 'perm': 34 | return permutation(sample, max_segments=10) 35 | elif ssh_type == 't_warp': 36 | return torch.from_numpy(time_warp(sample)) 37 | # elif ssh_type == 'hfc': 38 | # fft, fd = generate_high(sample, r=(32,2), high=True) 39 | # return fd 40 | # elif ssh_type == 'lfc': 41 | # fft, fd = generate_high(sample, r=(32,2), high=False) 42 | # return fd 43 | elif ssh_type == 'hfc': 44 | return generate_high(sample, high=True) 45 | elif ssh_type == 'lfc': 46 | return generate_high(sample, high=False) 47 | elif ssh_type == 'p_shift': 48 | return ifft_phase_shift(sample) 49 | elif ssh_type == 'ap_p': 50 | return ifft_amp_phase_pert(sample) 51 | elif ssh_type == 'ap_f': 52 | return ifft_amp_phase_pert_fully(sample) 53 | elif ssh_type == 'rand_fourier': 54 | return rand_fourier(sample, args.n_modes) 55 | elif ssh_type == 'shuffle_rand_fourier': 56 | return shuffle_rand_fourier(sample, args.n_modes) 57 | else: 58 | print('The task is not available!\n') 59 | 60 | 61 | 62 | def shuffle(x): 63 | sample_ssh = [] 64 | for data in x: 65 | p = np.random.RandomState(seed=21).permutation(data.shape[1]) 66 | data = data[:, p] 67 | sample_ssh.append(data) 68 | return torch.stack(sample_ssh) 69 | 70 | 71 | def jitter(x, sigma=0.8): 72 | # https://arxiv.org/pdf/1706.00527.pdf 73 | return x + np.random.normal(loc=0., scale=sigma, size=x.shape) 74 | 75 | 76 | def scaling(x, sigma=1.1): # apply same distortion to the signals from each sensor 77 | # https://arxiv.org/pdf/1706.00527.pdf 78 | factor = np.random.normal(loc=2., scale=sigma, size=(x.shape[0], x.shape[1])) 79 | ai = [] 80 | for i in range(x.shape[2]): 81 | xi = x[:, :, i] 82 | ai.append(np.multiply(xi, factor[:, :])[:, :, np.newaxis]) 83 | return np.concatenate((ai), axis=2) 84 | 85 | 86 | def negated(X): 87 | return X * -1 88 | 89 | 90 | def time_flipped(X): 91 | inv_idx = torch.arange(X.size(1) - 1, -1, -1).long() 92 | return X[:, inv_idx, :] 93 | 94 | 95 | def permutation(x, max_segments=5, seg_mode="random"): 96 | orig_steps = np.arange(x.shape[1]) 97 | num_segs = np.random.randint(1, max_segments, size=(x.shape[0])) 98 | ret = np.zeros_like(x) 99 | for i, pat in enumerate(x): 100 | if num_segs[i] > 1: 101 | if seg_mode == "random": 102 | split_points = np.random.choice(x.shape[1] - 2, num_segs[i] - 1, replace=False) 103 | split_points.sort() 104 | splits = np.split(orig_steps, split_points) 105 | else: 106 | splits = np.array_split(orig_steps, num_segs[i]) 107 | np.random.shuffle(splits) 108 | warp = np.concatenate(splits).ravel() 109 | ret[i] = pat[warp, :] 110 | else: 111 | ret[i] = pat 112 | return torch.from_numpy(ret) 113 | 114 | 115 | def resample(x): 116 | from scipy.interpolate import interp1d 117 | orig_steps = np.arange(x.shape[1]) 118 | interp_steps = np.arange(0, orig_steps[-1]+0.001, 1/3) 119 | Interp = interp1d(orig_steps, x, axis=1) 120 | InterpVal = Interp(interp_steps) 121 | start = random.choice(orig_steps) 122 | resample_index = np.arange(start, 3 * x.shape[1], 2)[:x.shape[1]] 123 | return InterpVal[:, resample_index, :] 124 | 125 | 126 | def multi_rotation(x): 127 | n_channel = x.shape[2] 128 | n_rot = n_channel // 3 129 | x_rot = np.array([]) 130 | for i in range(n_rot): 131 | x_rot = np.concatenate((x_rot, rotation(x[:, :, i * 3:i * 3 + 3])), axis=2) if x_rot.size else rotation( 132 | x[:, :, i * 3:i * 3 + 3]) 133 | return x_rot 134 | 135 | def rotation(X): 136 | """ 137 | Applying a random 3D rotation 138 | """ 139 | axes = np.random.uniform(low=-1, high=1, size=(X.shape[0], X.shape[2])) 140 | angles = np.random.uniform(low=-np.pi, high=np.pi, size=(X.shape[0])) 141 | matrices = axis_angle_to_rotation_matrix_3d_vectorized(axes, angles) 142 | return np.matmul(X, matrices) 143 | 144 | def axis_angle_to_rotation_matrix_3d_vectorized(axes, angles): 145 | """ 146 | Get the rotational matrix corresponding to a rotation of (angle) radian around the axes 147 | Reference: the Transforms3d package - transforms3d.axangles.axangle2mat 148 | Formula: http://en.wikipedia.org/wiki/Rotation_matrix#Axis_and_angle 149 | """ 150 | axes = axes / np.linalg.norm(axes, ord=2, axis=1, keepdims=True) 151 | x = axes[:, 0]; y = axes[:, 1]; z = axes[:, 2] 152 | c = np.cos(angles) 153 | s = np.sin(angles) 154 | C = 1 - c 155 | 156 | xs = x*s; ys = y*s; zs = z*s 157 | xC = x*C; yC = y*C; zC = z*C 158 | xyC = x*yC; yzC = y*zC; zxC = z*xC 159 | 160 | m = np.array([ 161 | [ x*xC+c, xyC-zs, zxC+ys ], 162 | [ xyC+zs, y*yC+c, yzC-xs ], 163 | [ zxC-ys, yzC+xs, z*zC+c ]]) 164 | matrix_transposed = np.transpose(m, axes=(2,0,1)) 165 | return matrix_transposed 166 | 167 | def get_cubic_spline_interpolation(x_eval, x_data, y_data): 168 | """ 169 | Get values for the cubic spline interpolation 170 | """ 171 | cubic_spline = scipy.interpolate.CubicSpline(x_data, y_data) 172 | return cubic_spline(x_eval) 173 | 174 | 175 | def time_warp(X, sigma=0.2, num_knots=4): 176 | """ 177 | Stretching and warping the time-series 178 | """ 179 | time_stamps = np.arange(X.shape[1]) 180 | knot_xs = np.arange(0, num_knots + 2, dtype=float) * (X.shape[1] - 1) / (num_knots + 1) 181 | spline_ys = np.random.normal(loc=1.0, scale=sigma, size=(X.shape[0] * X.shape[2], num_knots + 2)) 182 | 183 | spline_values = np.array([get_cubic_spline_interpolation(time_stamps, knot_xs, spline_ys_individual) for spline_ys_individual in spline_ys]) 184 | 185 | cumulative_sum = np.cumsum(spline_values, axis=1) 186 | distorted_time_stamps_all = cumulative_sum / cumulative_sum[:, -1][:, np.newaxis] * (X.shape[1] - 1) 187 | 188 | X_transformed = np.empty(shape=X.shape) 189 | for i, distorted_time_stamps in enumerate(distorted_time_stamps_all): 190 | X_transformed[i // X.shape[2], :, i % X.shape[2]] = np.interp(time_stamps, distorted_time_stamps, X[i // X.shape[2], :, i % X.shape[2]]) 191 | return X_transformed 192 | 193 | 194 | def distance(i, j, imageSize, r): 195 | dis_x = np.sqrt((i - imageSize[0] / 2) ** 2) 196 | dis_y = np.sqrt((j - imageSize[1] / 2) ** 2) 197 | if dis_x < r[0] and dis_y < r[1]: 198 | return 1.0 199 | else: 200 | return 0 201 | 202 | 203 | def mask_radial(img, r): 204 | rows, cols = img.shape 205 | mask = torch.zeros((rows, cols)) 206 | for i in range(rows): 207 | for j in range(cols): 208 | mask[i, j] = distance(i, j, imageSize=(rows, cols), r=r) 209 | return mask 210 | 211 | 212 | # def generate_high(sample, r, high=True): 213 | # # r: int, radius of the mask 214 | # images = torch.unsqueeze(sample, 1) 215 | # mask = mask_radial(torch.zeros([images.shape[2], images.shape[3]]), r) 216 | # bs, c, h, w = images.shape 217 | # x = images.reshape([bs * c, h, w]) 218 | # fd = torch.fft.fftshift(torch.fft.fftn(x, dim=(-2, -1))) # shift: low f in the center 219 | # mask = mask.unsqueeze(0).repeat([bs * c, 1, 1]) 220 | # if high: 221 | # fd = fd * (1.-mask) 222 | # else: 223 | # fd = fd * mask 224 | # fft = torch.real(fd) 225 | # fd = torch.fft.ifftn(torch.fft.ifftshift(fd), dim=(-2, -1)) 226 | # fd = torch.real(fd) 227 | # fd = torch.squeeze(fd.reshape([bs, c, h, w])) 228 | # return fft, fd 229 | 230 | def generate_high(sample, high=True): 231 | x_ft = torch.fft.rfft(sample, dim=-2) 232 | n_components = x_ft.shape[1] 233 | if high: 234 | aug = torch.fft.irfft(x_ft[:, n_components//2:, :], n=sample.shape[-2], dim=-2) 235 | else: 236 | aug = torch.fft.irfft(x_ft[:, :n_components // 2, :], n=sample.shape[-2], dim=-2) 237 | return aug 238 | 239 | def ifft_phase_shift(sample): 240 | images = torch.unsqueeze(sample, 1) 241 | bs, c, h, w = images.shape 242 | x = images.reshape([bs * c, h, w]) 243 | fd = torch.fft.fftshift(torch.fft.fftn(x, dim=(-2, -1))) 244 | 245 | amp = fd.abs() 246 | phase = fd.angle() 247 | 248 | # phase shift 249 | angles = np.repeat(np.expand_dims(np.random.uniform(low=-np.pi, high=np.pi, size=(sample.shape[0], sample.shape[1])), axis=2), sample.shape[2], axis=2) 250 | phase = phase + angles 251 | 252 | cmp = amp * torch.exp(1j * phase) 253 | ifft = torch.squeeze(torch.real(torch.fft.ifftn(torch.fft.ifftshift(cmp), dim=(-2, -1))).reshape([bs, c, h, w])) 254 | 255 | return ifft 256 | 257 | 258 | def ifft_amp_phase_pert(sample): 259 | images = torch.unsqueeze(sample, 1) 260 | bs, c, h, w = images.shape 261 | x = images.reshape([bs * c, h, w]) 262 | fd = torch.fft.fftshift(torch.fft.fftn(x, dim=(-2, -1))) 263 | 264 | amp = fd.abs() 265 | phase = fd.angle() 266 | 267 | # select a segment to conduct perturbations 268 | start = np.random.randint(0, int(0.5 * sample.shape[1])) 269 | end = start + int(0.5 * sample.shape[1]) 270 | 271 | # phase shift 272 | angles = np.repeat(np.expand_dims(np.random.uniform(low=-np.pi, high=np.pi, size=(sample.shape[0], sample.shape[1])), axis=2), sample.shape[2], axis=2) 273 | phase[:, start:end, :] = phase[:, start:end, :] + angles[:, start:end, :] 274 | 275 | # amp shift 276 | amp[:, start:end, :] = amp[:, start:end, :] + np.random.normal(loc=0., scale=0.8, size=sample.shape)[:, start:end, :] 277 | 278 | cmp = amp * torch.exp(1j * phase) 279 | ifft = torch.squeeze(torch.real(torch.fft.ifftn(torch.fft.ifftshift(cmp), dim=(-2, -1))).reshape([bs, c, h, w])) 280 | 281 | return ifft 282 | 283 | 284 | def ifft_amp_phase_pert_fully(sample): 285 | images = torch.unsqueeze(sample, 1) 286 | bs, c, h, w = images.shape 287 | x = images.reshape([bs * c, h, w]) 288 | fd = torch.fft.fftshift(torch.fft.fftn(x, dim=(-2, -1))) 289 | 290 | amp = fd.abs() 291 | phase = fd.angle() 292 | 293 | # phase shift 294 | angles = np.repeat(np.expand_dims(np.random.uniform(low=-np.pi, high=np.pi, size=(sample.shape[0], sample.shape[1])), axis=2), sample.shape[2], axis=2) 295 | phase = phase + angles 296 | 297 | # amp shift 298 | amp = amp + np.random.normal(loc=0., scale=0.8, size=sample.shape) 299 | 300 | cmp = amp * torch.exp(1j * phase) 301 | ifft = torch.squeeze(torch.real(torch.fft.ifftn(torch.fft.ifftshift(cmp), dim=(-2, -1))).reshape([bs, c, h, w])) 302 | 303 | return ifft 304 | 305 | def generate_rand_n_augviews(sample, n): 306 | aug_name_list = ['shuffle', 'jit_scal', 'perm_jit', 'resample', 'noise', 'scale', 'negate', 't_flip', 'rotation', 'perm', 't_warp'] 307 | aug_idxs = np.arange(len(aug_name_list)) 308 | np.random.shuffle(aug_idxs) 309 | aug_idxs = aug_idxs[:n] 310 | augviews = [] 311 | for aug_idx in aug_idxs: 312 | # print(aug_name_list[aug_idx]) 313 | aug_view = gen_aug(sample, aug_name_list[aug_idx]) 314 | augviews.append(aug_view) 315 | return augviews 316 | 317 | # def generate_fixed_n_augviews(sample, n): 318 | # aug_name_list = ['perm_jit', 'perm', 'noise', 'scale'] 319 | # augviews = [] 320 | # for aug_idx in range(n): 321 | # # print(aug_name_list[aug_idx]) 322 | # aug_view = gen_aug(sample, aug_name_list[aug_idx]) 323 | # augviews.append(aug_view) 324 | # return augviews 325 | 326 | def generate_fixed_n_augviews(args, sample, n): 327 | aug_name_list = ['shuffle', 'jit_scal', 'perm_jit', 'resample', 'noise', 'scale', 'negate', 't_flip', 'rotation', 'perm', 't_warp'] 328 | aug_idxs = np.arange(len(aug_name_list)) 329 | np.random.seed(args.rand_seed) 330 | np.random.shuffle(aug_idxs) 331 | aug_idxs = aug_idxs[:n] 332 | args.aug_idxs = aug_idxs 333 | augviews = [] 334 | for aug_idx in aug_idxs: 335 | # print(aug_name_list[aug_idx]) 336 | aug_view = gen_aug(sample, aug_name_list[aug_idx]) 337 | augviews.append(aug_view) 338 | return augviews 339 | 340 | def generate_predefined_n_augviews(args, sample, n): 341 | aug_name_list = ['shuffle', 'jit_scal', 'perm_jit', 'resample', 'noise', 'scale', 'negate', 't_flip', 'rotation', 'perm', 't_warp'] 342 | aug_idxs = [1,2,4,9][:n] 343 | # print(aug_idxs) 344 | args.aug_idxs = aug_idxs 345 | augviews = [] 346 | for aug_idx in aug_idxs: 347 | # print(aug_name_list[aug_idx]) 348 | aug_view = gen_aug(sample, aug_name_list[aug_idx]) 349 | augviews.append(aug_view) 350 | return augviews 351 | 352 | def generate_predefined_n_id_augviews(args, sample, n): 353 | aug_name_list = ['shuffle', 'jit_scal', 'perm_jit', 'resample', 'noise', 'scale', 'negate', 't_flip', 'rotation', 'perm', 't_warp'] 354 | aug_idxs = [1,1,1,1][:n] 355 | # print(aug_idxs) 356 | args.aug_idxs = aug_idxs 357 | augviews = [] 358 | for aug_idx in aug_idxs: 359 | # print(aug_name_list[aug_idx]) 360 | aug_view = gen_aug(sample, aug_name_list[aug_idx]) 361 | augviews.append(aug_view) 362 | return augviews 363 | 364 | def generate_predefined_n_augviews_with_idx(args, sample, n, aug_idxs): 365 | aug_name_list = ['shuffle', 'jit_scal', 'perm_jit', 'resample', 'noise', 'scale', 'negate', 't_flip', 'rotation', 'perm', 't_warp'] 366 | if n != len(aug_idxs): 367 | raise ValueError 368 | # print(aug_idxs) 369 | args.aug_idxs = aug_idxs 370 | list1 = [] 371 | list1[:0] = aug_idxs 372 | list1 = [int(i) for i in list1] 373 | augviews = [] 374 | for aug_idx in list1: 375 | # print(aug_name_list[aug_idx]) 376 | aug_view = gen_aug(sample, aug_name_list[aug_idx]) 377 | augviews.append(aug_view) 378 | return augviews 379 | 380 | def generate_rand_n_augviews_1284(sample, n): 381 | aug_name_list = ['shuffle', 'jit_scal', 'perm_jit', 'resample', 'noise', 'scale', 'negate', 't_flip', 'rotation', 'perm', 't_warp'] 382 | aug_idxs = [1,2,8,4] 383 | np.random.shuffle(aug_idxs) 384 | aug_idxs = aug_idxs[:n] 385 | augviews = [] 386 | for aug_idx in aug_idxs: 387 | # print(aug_name_list[aug_idx]) 388 | aug_view = gen_aug(sample, aug_name_list[aug_idx]) 389 | augviews.append(aug_view) 390 | return augviews 391 | 392 | # def sample_k_aug_idx(k): 393 | # aug_name_list = ['shuffle', 'jit_scal', 'perm_jit', 'resample', 'noise', 'scale', 'negate', 't_flip', 'rotation', 394 | # 'perm', 't_warp'] 395 | # aug_idxs = np.arange(len(aug_name_list)) 396 | # np.random.shuffle(aug_idxs) 397 | # return aug_idxs[:k] 398 | # 399 | # def generate_rand_n_augviews_from_k(sample, aug_idxs, n): # todo assume all augs are equally effective, only number matters 400 | # if n > len(aug_idxs): 401 | # raise ValueError 402 | # aug_name_list = ['shuffle', 'jit_scal', 'perm_jit', 'resample', 'noise', 'scale', 'negate', 't_flip', 'rotation', 'perm', 't_warp'] 403 | # np.random.shuffle(aug_idxs) 404 | # aug_idxs = aug_idxs[:n] 405 | # augviews = [] 406 | # for aug_idx in aug_idxs: 407 | # print(aug_name_list[aug_idx]) 408 | # aug_view = gen_aug(sample, aug_name_list[aug_idx]) 409 | # augviews.append(aug_view) 410 | # return augviews 411 | 412 | def generate_rand_n_augviews_from_k(sample, n, k): 413 | if n > k: 414 | raise ValueError 415 | aug_name_list = ['shuffle', 'jit_scal', 'perm_jit', 'resample', 'noise', 'scale', 'negate', 't_flip', 'rotation', 'perm', 't_warp'] 416 | aug_idxs = np.arange(len(aug_name_list))[:k] # keep only k augs 417 | np.random.shuffle(aug_idxs) 418 | # print(aug_idxs) 419 | aug_idxs = aug_idxs[:n] # randomly sample n from k 420 | augviews = [] 421 | # print(aug_idxs) 422 | for aug_idx in aug_idxs: 423 | # print(aug_name_list[aug_idx]) 424 | aug_view = gen_aug(sample, aug_name_list[aug_idx]) 425 | augviews.append(aug_view) 426 | return augviews 427 | 428 | def rand_fourier(x, n_modes): 429 | n_modes = min(n_modes, x.shape[1]//2) 430 | # print(n_modes) 431 | x_ft = torch.fft.rfft(x, dim=-2) 432 | # print(x_ft.shape) 433 | index = list(range(x.shape[1]//2)) 434 | # np.random.shuffle(index) 435 | index = index[:n_modes] 436 | # print(index) 437 | # print(index) 438 | # Return to time domain 439 | x = torch.fft.irfft(x_ft[:, index, :], n=x.size(1), dim=-2) 440 | return x 441 | 442 | def shuffle_rand_fourier(x, n_modes): 443 | n_modes = min(n_modes, x.shape[1]//2) 444 | # print(n_modes) 445 | x_ft = torch.fft.rfft(x, dim=-2) 446 | # print(x_ft.shape) 447 | index = list(range(x.shape[1]//2)) 448 | np.random.shuffle(index) 449 | index = index[:n_modes] 450 | # print(index) 451 | # print(index) 452 | # Return to time domain 453 | x = torch.fft.irfft(x_ft[:, index, :], n=x.size(1), dim=-2) 454 | return x 455 | 456 | from sklearn.feature_selection import mutual_info_classif as MIC 457 | 458 | def rand_fourier_with_target(x, n_modes, target): 459 | # print(x.shape) 460 | n_modes = min(n_modes, x.shape[1]//2) 461 | x_ft = torch.fft.rfft(x, dim=-2) 462 | # print(x_ft.shape) 463 | MIC_score = [] 464 | for i in range(x_ft.shape[1]): 465 | MI = MIC(torch.abs(x_ft[:, i, :]).cpu().numpy(), target).mean() 466 | # print(MI.shape) 467 | MIC_score.append(MI) 468 | _, index = torch.topk(torch.Tensor(MIC_score).reshape(1,-1), k=n_modes) 469 | index = index[0] 470 | # index = np.argpartition(MIC_score, -n_modes)[-n_modes:] 471 | # print(index) 472 | 473 | # index = list(range(x.shape[1]//2)) 474 | # np.random.shuffle(index) 475 | # index = index[:n_modes] 476 | # print(index) 477 | # print(index) 478 | # Return to time domain 479 | # print(index.shape) 480 | x = torch.fft.irfft(x_ft[:, index, :], n=x.size(1), dim=-2).cuda() 481 | # print(x.shape) 482 | return x 483 | 484 | def shuffle_rand_fourier_me(x, n_modes): 485 | # sample mutually exlusive sets of fourier components for two views 486 | n_modes = min(n_modes, x.shape[1]//4) 487 | # print(n_modes) 488 | x_ft = torch.fft.rfft(x, dim=-2) 489 | # print(x_ft.shape) 490 | index = list(range(x.shape[1]//2)) 491 | np.random.shuffle(index) 492 | index1 = index[:n_modes] 493 | index2 = index[n_modes:2*n_modes] 494 | # print('index1', index1) 495 | # print('index2', index2) 496 | # Return to time domain 497 | x1 = torch.fft.irfft(x_ft[:, index1, :], n=x.size(1), dim=-2) 498 | x2 = torch.fft.irfft(x_ft[:, index2, :], n=x.size(1), dim=-2) 499 | return x1, x2 -------------------------------------------------------------------------------- /main_FreRA.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import math 3 | import os 4 | import shutil 5 | import time 6 | 7 | import torch.nn as nn 8 | import torch.nn.parallel 9 | import torch.backends.cudnn as cudnn 10 | import torch.optim 11 | 12 | import models.builder 13 | from data_loaders import * 14 | from augmentations import * 15 | from models.backbones import FCN 16 | 17 | import fitlog 18 | from sklearn.metrics import f1_score 19 | from copy import deepcopy 20 | from autoaug.fourier import * 21 | 22 | model_names = sorted(name for name in models.__dict__ 23 | if name.islower() and not name.startswith("__") 24 | and callable(models.__dict__[name])) 25 | 26 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') 27 | 28 | # dataset 29 | parser.add_argument('--dataset', type=str, default='ucihar', help='name of dataset') 30 | parser.add_argument('--n_feature', type=int, default=77, help='name of feature dimension') 31 | parser.add_argument('--len_sw', type=int, default=30, help='length of sliding window') 32 | parser.add_argument('--n_class', type=int, default=18, help='number of class') 33 | parser.add_argument('--cases', type=str, default='random', 34 | choices=['random', 'subject', 'large_subject'], help='name of scenarios') 35 | parser.add_argument('--split_ratio', type=float, default=0.2, 36 | help='split ratio of test/val: train(0.64), val(0.16), test(0.2)') 37 | parser.add_argument('--target_domain', type=str, default='0') 38 | 39 | parser.add_argument('--framework', type=str, default='simclr', 40 | choices=['simclr', 'byol', 'simsiam']) 41 | parser.add_argument('-a', '--arch', metavar='ARCH', default='FCN', 42 | choices=model_names, 43 | help='model architecture: ' + 44 | ' | '.join(model_names) + 45 | ' (default: FCN)') 46 | parser.add_argument('--start_epoch', default=0, type=int) 47 | parser.add_argument('--epochs', default=200, type=int, metavar='N', 48 | help='number of total epochs to run') 49 | parser.add_argument('-b', '--batch_size', default=256, type=int, 50 | metavar='N', 51 | help='mini-batch size') 52 | parser.add_argument('--lr', '--learning-rate', default=0.03, type=float, 53 | metavar='LR', help='initial learning rate', dest='lr') 54 | parser.add_argument('--f_lr', default=0.01, type=float, help='initial learning rate for fourier weight') 55 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M', 56 | help='momentum of SGD solver') 57 | parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, 58 | metavar='W', help='weight decay (default: 1e-4)', 59 | dest='weight_decay') 60 | parser.add_argument('--seed', default=None, type=int, 61 | help='seed for initializing training. ') 62 | parser.add_argument('--gpu', default=None, type=int, 63 | help='GPU id to use.') 64 | 65 | parser.add_argument('--low_dim', default=128, type=int, 66 | help='feature dimension (default: 128)') 67 | parser.add_argument('--temperature', default=0.2, type=float, 68 | help='softmax temperature') 69 | 70 | parser.add_argument('--cos', action='store_true', default=True, 71 | help='use cosine lr schedule') 72 | 73 | parser.add_argument('--logdir', default='log', type=str, 74 | help='fitlog directory') 75 | 76 | parser.add_argument('--f_temperature', default=0.1, type=float, 77 | help='temperature for Fourier AutoAug') 78 | parser.add_argument('--l1_weight', default=0.1, type=float, 79 | help='weight of l1-norm of f_aug weight para') 80 | parser.add_argument('--f_aug_mode', default='FreRA', type=str,) 81 | 82 | 83 | def main(): 84 | args = parser.parse_args() 85 | 86 | if args.seed is not None: 87 | random.seed(args.seed) 88 | torch.manual_seed(args.seed) 89 | torch.cuda.manual_seed_all(args.seed) 90 | cudnn.deterministic = True 91 | 92 | best_model = main_worker(args.gpu, args) 93 | 94 | main_worker_cls(args.gpu, best_model, args) 95 | 96 | 97 | def main_worker(gpu, args): 98 | args.gpu = gpu 99 | 100 | if args.gpu is not None: 101 | print("Use GPU: {} for training".format(args.gpu)) 102 | 103 | DEVICE = torch.device('cuda:' + str(args.gpu) if torch.cuda.is_available() else 'cpu') 104 | 105 | train_loader, val_loader, eval_loader = setup_dataloaders(args) 106 | 107 | # create model 108 | print("=> creating model '{}'".format(args.arch)) 109 | if args.framework == 'simclr': 110 | model = models.builder.SimCLR( 111 | DEVICE, 112 | args.dataset, 113 | args.n_feature, 114 | args.batch_size, 115 | args.arch, 116 | args.low_dim, args.temperature) 117 | elif args.framework == 'byol': 118 | model = models.builder.BYOL( 119 | DEVICE, 120 | args.arch, 121 | args.dataset, 122 | args.n_feature, 123 | args.len_sw, 124 | moving_average=0.996) 125 | elif args.framework == 'simsiam': 126 | model = models.builder.BYOL( 127 | DEVICE, 128 | args.arch, 129 | args.dataset, 130 | args.n_feature, 131 | args.len_sw, 132 | moving_average=0.0) 133 | 134 | if args.gpu is not None: 135 | torch.cuda.set_device(args.gpu) 136 | model = model.to(DEVICE) 137 | 138 | # define loss function (criterion) and optimizer 139 | if args.framework in ['simclr']: 140 | criterion = nn.CrossEntropyLoss().to(DEVICE) 141 | elif args.framework in ['byol', 'simsiam']: 142 | criterion = nn.CosineSimilarity(dim=1) 143 | 144 | if args.framework in ['simclr']: 145 | optimizer = torch.optim.SGD(model.parameters(), args.lr, 146 | momentum=args.momentum, 147 | weight_decay=args.weight_decay) 148 | elif args.framework in ['byol', 'simsiam']: 149 | if args.framework == 'byol': 150 | args.weight_decay = 1.5e-6 151 | lr_mul = 10.0 152 | elif args.framework == 'simsiam': 153 | args.weight_decay = 1e-4 154 | lr_mul = 1.0 155 | optimizer1 = torch.optim.Adam(model.encoder_q.parameters(), 156 | args.lr, 157 | weight_decay=args.weight_decay) 158 | optimizer2 = torch.optim.Adam(model.online_predictor.parameters(), 159 | args.lr * lr_mul, 160 | weight_decay=args.weight_decay) 161 | optimizer = [optimizer1, optimizer2] 162 | 163 | cudnn.benchmark = True 164 | 165 | # fitlog 166 | if os.path.isdir(args.logdir) == False: 167 | os.makedirs(args.logdir) 168 | fitlog.set_log_dir(args.logdir) 169 | fitlog.add_hyper(args) 170 | fitlog.add_hyper_in_file(__file__) 171 | 172 | # autoaug 173 | if args.f_aug_mode == 'FreRA': 174 | aug_f = FreRA(len_sw=args.len_sw, device=DEVICE).to(DEVICE) 175 | 176 | f_optimizer = torch.optim.AdamW(aug_f.parameters(), lr=args.f_lr) 177 | 178 | f_weight = [] 179 | 180 | for epoch in range(args.start_epoch, args.epochs): 181 | 182 | if args.framework not in ['byol', 'simsiam']: 183 | adjust_learning_rate(optimizer, epoch, args) 184 | 185 | # train for one epoch 186 | train(epoch, aug_f, f_optimizer, DEVICE, train_loader, model, criterion, optimizer, args, fitlog) 187 | 188 | f_weight.append(aug_f.weight.cpu().detach().numpy()) 189 | 190 | # save weights 191 | fitlog.add_hyper(aug_f.weight, name='fourier weight') 192 | 193 | return deepcopy(model.state_dict()) 194 | 195 | def train(epoch, aug_f, f_optimizer, DEVICE, train_loader, model, criterion, optimizer, args, fitlog=None): 196 | batch_time = AverageMeter('Time', ':6.3f') 197 | data_time = AverageMeter('Data', ':6.3f') 198 | losses = AverageMeter('Loss', ':.4e') 199 | l1_losses = AverageMeter('L1Loss', ':.4e') 200 | acc_inst = AverageMeter('Acc@Inst', ':6.2f') 201 | 202 | progress = ProgressMeter( 203 | len(train_loader), 204 | [batch_time, data_time, losses, acc_inst], 205 | prefix="Epoch: [{}]".format(epoch)) 206 | 207 | # switch to train mode 208 | model.train() 209 | 210 | end = time.time() 211 | for i, (sample, target, domain) in enumerate(train_loader): 212 | # measure data loading time 213 | data_time.update(time.time() - end) 214 | 215 | sample = sample.to(DEVICE) 216 | aug_sample1, aug_sample2 = aug_f(sample, temperature=args.f_temperature).float(), gen_aug(None, sample, 'na').to(DEVICE).float() 217 | 218 | # compute output 219 | if args.framework in ['simclr']: 220 | output, target, z1, z2 = model(im_q=aug_sample1, im_k=aug_sample2) 221 | loss = criterion(output, target) 222 | elif args.framework in ['byol', 'simsiam']: 223 | online_pred_one, online_pred_two, target_proj_one, target_proj_two = model(im_q=aug_sample1, im_k=aug_sample2) 224 | loss = -(criterion(online_pred_one, target_proj_two).mean() + criterion(online_pred_two, target_proj_one).mean()) * 0.5 225 | 226 | # l1-norm loss of weight para 227 | l1_weight_loss = torch.norm(aug_f.para[:, 0], p=1) 228 | loss = loss + l1_weight_loss * args.l1_weight / args.len_sw 229 | 230 | losses.update(loss.item(), aug_sample1.size(0)) 231 | l1_losses.update(l1_weight_loss.item(), aug_sample1.size(0)) 232 | 233 | # update cl framework and fourier weight 234 | if args.framework in ['simclr']: 235 | optimizer.zero_grad() 236 | f_optimizer.zero_grad() 237 | loss.backward() 238 | optimizer.step() 239 | f_optimizer.step() 240 | elif args.framework in ['byol', 'simsiam']: 241 | optimizer[0].zero_grad() 242 | optimizer[1].zero_grad() 243 | f_optimizer.zero_grad() 244 | loss.backward() 245 | optimizer[0].step() 246 | optimizer[1].step() 247 | f_optimizer.step() 248 | 249 | # measure elapsed time 250 | batch_time.update(time.time() - end) 251 | end = time.time() 252 | 253 | fitlog.add_loss(losses.avg, name="InfoNCE loss", step=epoch) 254 | fitlog.add_loss(l1_losses.avg, name="L1 loss", step=epoch) 255 | fitlog.add_metric({"dev": {"Inst Acc": acc_inst.avg}}, step=epoch) 256 | 257 | print( 258 | f'epoch {epoch} InfoNCE loss : {losses.avg:.4f}, L1 loss : {l1_losses.avg:.4f}') 259 | 260 | progress.display(i) 261 | 262 | def main_worker_cls(gpu, best_model, args): 263 | args.gpu = gpu 264 | 265 | if args.gpu is not None: 266 | print("Use GPU: {} for training".format(args.gpu)) 267 | 268 | DEVICE = torch.device('cuda:' + str(args.gpu) if torch.cuda.is_available() else 'cpu') 269 | 270 | # create model 271 | print("=> creating model '{}'".format(args.arch)) 272 | model = FCN(args.dataset, n_channels=args.n_feature, n_classes=args.n_class, backbone=False) 273 | 274 | # freeze all layers but the last fc 275 | for name, param in model.named_parameters(): 276 | if name not in ['logits.weight', 'logits.bias']: 277 | param.requires_grad = False 278 | # init the fc layer 279 | model.logits.weight.data.normal_(mean=0.0, std=0.01) 280 | model.logits.bias.data.zero_() 281 | 282 | # load best model 283 | # rename pre-trained keys 284 | state_dict = deepcopy(best_model) 285 | for k in list(state_dict.keys()): 286 | if 'net' in k: 287 | # retain only encoder_q up to before the embedding layer 288 | if k.startswith('encoder_q.net') and not k.startswith('encoder_q.net.logits'): 289 | # remove prefix 290 | state_dict[k[len("encoder_q.net."):]] = state_dict[k] 291 | # delete renamed or unused k 292 | del state_dict[k] 293 | else: 294 | # retain only encoder_q up to before the embedding layer 295 | if k.startswith('encoder_q') and not k.startswith('encoder_q.logits'): 296 | # remove prefix 297 | state_dict[k[len("encoder_q."):]] = state_dict[k] 298 | # delete renamed or unused k 299 | del state_dict[k] 300 | 301 | args.start_epoch = 0 302 | msg = model.load_state_dict(state_dict, strict=False) 303 | assert set(msg.missing_keys) == {"logits.weight", "logits.bias"} 304 | 305 | print("=> loaded pre-trained model ") 306 | 307 | if args.gpu is not None: 308 | torch.cuda.set_device(args.gpu) 309 | model = model.to(DEVICE) 310 | 311 | # define loss function (criterion) and optimizer 312 | criterion = nn.CrossEntropyLoss().to(DEVICE) 313 | 314 | # optimize only the linear classifier 315 | parameters = list(filter(lambda p: p.requires_grad, model.parameters())) 316 | assert len(parameters) == 2 # fc.weight, fc.bias 317 | optimizer = torch.optim.SGD(parameters, args.lr, 318 | momentum=args.momentum, 319 | weight_decay=args.weight_decay) 320 | 321 | cudnn.benchmark = True 322 | 323 | # Data loading code 324 | train_loader, val_loader, test_loader = setup_dataloaders(args) 325 | 326 | for epoch in range(args.start_epoch, args.epochs): 327 | # train for one epoch 328 | train_cls(DEVICE, train_loader, val_loader, model, criterion, optimizer, epoch, args) 329 | 330 | if epoch == args.start_epoch: 331 | sanity_check(model.state_dict(), best_model) 332 | acc1 = validate_cls(DEVICE, test_loader, model, criterion, args, epoch, val=False) 333 | 334 | 335 | def train_cls(DEVICE, train_loader, val_loader, model, criterion, optimizer, epoch, args): 336 | batch_time = AverageMeter('Time', ':6.3f') 337 | data_time = AverageMeter('Data', ':6.3f') 338 | losses = AverageMeter('Loss', ':.4e') 339 | top1 = AverageMeter('Acc@1', ':6.2f') 340 | progress = ProgressMeter( 341 | len(train_loader), 342 | [batch_time, data_time, losses, top1], 343 | prefix="Epoch: [{}]".format(epoch)) 344 | 345 | """ 346 | Switch to eval mode: 347 | Under the protocol of linear classification on frozen features/models, 348 | it is not legitimate to change any part of the pre-trained model. 349 | BatchNorm in train mode may revise running mean/std (even if it receives 350 | no gradient), which are part of the model parameters too. 351 | """ 352 | model.eval() 353 | 354 | end = time.time() 355 | for i, (sample, target, domain) in enumerate(train_loader): 356 | # measure data loading time 357 | data_time.update(time.time() - end) 358 | 359 | sample = sample.to(DEVICE).float() 360 | target = target.to(DEVICE).long() 361 | 362 | # compute output 363 | output = model(sample) 364 | loss = criterion(output, target) 365 | 366 | # measure accuracy and record loss 367 | acc1 = accuracy(output, target, topk=(1,)) 368 | # print(acc1) 369 | losses.update(loss.item(), sample.size(0)) 370 | top1.update(acc1[0].item(), sample.size(0)) 371 | 372 | # compute gradient and do SGD step 373 | optimizer.zero_grad() 374 | loss.backward() 375 | optimizer.step() 376 | 377 | # measure elapsed time 378 | batch_time.update(time.time() - end) 379 | end = time.time() 380 | 381 | fitlog.add_loss(losses.avg, name="CLS Train loss", step=epoch) 382 | fitlog.add_loss(optimizer.param_groups[0]['lr'], name="CLS lr", step=epoch) 383 | fitlog.add_metric({"dev": {"CLS Train Acc": top1.avg}}, step=epoch) 384 | 385 | progress.display(i) 386 | 387 | if val_loader is not None: 388 | acc1_val = validate_cls(DEVICE, val_loader, model, criterion, args, epoch) 389 | 390 | 391 | def validate_cls(DEVICE, val_loader, model, criterion, args, epoch, val=True): 392 | batch_time = AverageMeter('Time', ':6.3f') 393 | losses = AverageMeter('Loss', ':.4e') 394 | top1 = AverageMeter('Acc@1', ':6.2f') 395 | progress = ProgressMeter( 396 | len(val_loader), 397 | [batch_time, losses, top1], 398 | prefix='Test: ') 399 | 400 | # switch to evaluate mode 401 | model.eval() 402 | 403 | total = 0 404 | correct = 0 405 | trgs = np.array([]) 406 | preds = np.array([]) 407 | feats = None 408 | confusion_matrix = torch.zeros(args.n_class, args.n_class) 409 | 410 | with torch.no_grad(): 411 | end = time.time() 412 | for i, (sample, target, domain) in enumerate(val_loader): 413 | sample = sample.to(DEVICE).float() 414 | target = target.to(DEVICE).long() 415 | 416 | # compute output 417 | output, feat = model(sample, return_feature=True) 418 | loss = criterion(output, target) 419 | 420 | if not val: 421 | _, predicted = torch.max(output.data, 1) 422 | trgs = np.append(trgs, target.data.cpu().numpy()) 423 | preds = np.append(preds, predicted.data.cpu().numpy()) 424 | if feats is None: 425 | feats = feat 426 | else: 427 | feats = torch.cat((feats, feat), 0) 428 | for t, p in zip(target.view(-1), predicted.view(-1)): 429 | confusion_matrix[t.long(), p.long()] += 1 430 | total += target.size(0) 431 | correct += (predicted == target).sum() 432 | 433 | # measure accuracy and record loss 434 | acc1 = accuracy(output, target, topk=(1,)) 435 | losses.update(loss.item(), sample.size(0)) 436 | top1.update(acc1[0].item(), sample.size(0)) 437 | 438 | # measure elapsed time 439 | batch_time.update(time.time() - end) 440 | end = time.time() 441 | 442 | if val: 443 | fitlog.add_loss(losses.avg, name="CLS Val loss", step=epoch) 444 | fitlog.add_metric({"dev": {"CLS Val Acc": top1.avg}}, step=epoch) 445 | 446 | if not val: 447 | acc_test = float(correct) * 100.0 / total 448 | miF = f1_score(trgs, preds, average='micro') * 100 449 | maF = f1_score(trgs, preds, average='macro') * 100 450 | 451 | fitlog.add_best_metric({"dev": {"Test Acc": acc_test}}) 452 | fitlog.add_best_metric({"dev": {"miF": miF}}) 453 | fitlog.add_best_metric({"dev": {"maF": maF}}) 454 | fitlog.add_hyper(confusion_matrix, name='conf_mat') 455 | 456 | progress.display(i) 457 | 458 | # TODO: this should also be done with the ProgressMeter 459 | print(' * Acc@1 {top1.avg:.3f} ' 460 | .format(top1=top1)) 461 | return top1.avg 462 | 463 | 464 | def sanity_check(state_dict, best_model): 465 | """ 466 | Linear classifier should not change any weights other than the linear layer. 467 | This sanity check asserts nothing wrong happens (e.g., BN stats updated). 468 | """ 469 | print("=> loading model for sanity check") 470 | state_dict_pre = best_model 471 | 472 | for k in list(state_dict.keys()): 473 | # only ignore fc layer 474 | if 'logits.weight' in k or 'logits.bias' in k: 475 | continue 476 | # name in pretrained model 477 | k_pre = 'encoder_q.' + k 478 | 479 | if 'net' in list(state_dict_pre.keys())[0]: 480 | k_pre = 'encoder_q.net.' + k 481 | 482 | assert ((state_dict[k].cpu() == state_dict_pre[k_pre].cpu()).all()), \ 483 | '{} is changed in linear classifier training.'.format(k) 484 | 485 | print("=> sanity check passed.") 486 | 487 | 488 | def save_checkpoint(state, is_best=False, filename='checkpoint.pth.tar'): 489 | torch.save(state, filename) 490 | if is_best: 491 | shutil.copyfile(filename, 'model_best.pth.tar') 492 | print(filename) 493 | 494 | 495 | class AverageMeter(object): 496 | """Computes and stores the average and current value""" 497 | 498 | def __init__(self, name, fmt=':f'): 499 | self.name = name 500 | self.fmt = fmt 501 | self.reset() 502 | 503 | def reset(self): 504 | self.val = 0 505 | self.avg = 0 506 | self.sum = 0 507 | self.count = 0 508 | 509 | def update(self, val, n=1): 510 | self.val = val 511 | self.sum += val * n 512 | self.count += n 513 | self.avg = self.sum / self.count 514 | 515 | def __str__(self): 516 | fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' 517 | return fmtstr.format(**self.__dict__) 518 | 519 | 520 | class ProgressMeter(object): 521 | def __init__(self, num_batches, meters, prefix=""): 522 | self.batch_fmtstr = self._get_batch_fmtstr(num_batches) 523 | self.meters = meters 524 | self.prefix = prefix 525 | 526 | def display(self, batch): 527 | entries = [self.prefix + self.batch_fmtstr.format(batch)] 528 | for meter in self.meters: 529 | print(meter) 530 | entries += [str(meter) for meter in self.meters] 531 | print('\t'.join(entries)) 532 | 533 | def _get_batch_fmtstr(self, num_batches): 534 | num_digits = len(str(num_batches // 1)) 535 | fmt = '{:' + str(num_digits) + 'd}' 536 | return '[' + fmt + '/' + fmt.format(num_batches) + ']' 537 | 538 | 539 | def adjust_learning_rate(optimizer, epoch, args): 540 | """Decay the learning rate based on schedule""" 541 | lr = args.lr 542 | if args.cos: # cosine lr schedule 543 | lr *= 0.5 * (1. + math.cos(math.pi * epoch / args.epochs)) 544 | else: # stepwise lr schedule 545 | for milestone in args.schedule: 546 | lr *= 0.1 if epoch >= milestone else 1. 547 | for param_group in optimizer.param_groups: 548 | param_group['lr'] = lr 549 | 550 | 551 | def adjust_learning_rate_cls(optimizer, epoch, args): 552 | """Decay the learning rate based on schedule""" 553 | lr = args.lr 554 | for milestone in args.schedule: 555 | lr *= 0.1 if epoch >= milestone else 1. 556 | for param_group in optimizer.param_groups: 557 | param_group['lr'] = lr 558 | 559 | 560 | def accuracy(output, target, topk=(1,)): 561 | """Computes the accuracy over the k top predictions for the specified values of k""" 562 | with torch.no_grad(): 563 | maxk = max(topk) 564 | batch_size = target.size(0) 565 | 566 | _, pred = output.topk(maxk, 1, True, True) 567 | pred = pred.t() 568 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 569 | 570 | res = [] 571 | for k in topk: 572 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 573 | res.append(correct_k.mul_(100.0 / batch_size)) 574 | return res 575 | 576 | 577 | if __name__ == '__main__': 578 | main() --------------------------------------------------------------------------------