├── models
├── __init__.py
├── NTXent.py
├── backbones.py
├── builder.py
└── builder_utils.py
├── .gitignore
├── figures
└── FreRA.png
├── requirements.txt
├── data_preprocess
├── base_loader.py
├── data_preprocess_uea.py
├── motionsense_raw_preprocess.py
├── preprocess_uea.py
├── data_preprocess_ms.py
├── data_preprocess_utils.py
├── data_preprocess_ucr.py
├── data_preprocess_fd.py
├── data_preprocess_shar.py
├── data_preprocess_ucihar.py
└── data_preprocess_wisdm.py
├── autoaug
└── fourier.py
├── README.md
├── data_loaders.py
├── augmentations.py
└── main_FreRA.py
/models/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | .idea/*
--------------------------------------------------------------------------------
/figures/FreRA.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tian0426/FreRA/HEAD/figures/FreRA.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | scikit-learn==0.22.1
2 | torch==1.11.0
3 | torchvision==0.12.0
4 | einops==0.3.2
5 | pickle5==0.0.11
6 | numpy==1.21.2
7 | fitlog
8 | requests
9 | matplotlib
10 | seaborn
--------------------------------------------------------------------------------
/data_preprocess/base_loader.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from torch.utils.data import Dataset
3 |
4 | class base_loader(Dataset):
5 | def __init__(self, samples, labels, domains):
6 | self.samples = samples
7 | self.labels = labels
8 | self.domains = domains
9 |
10 | def __getitem__(self, index):
11 | sample, target, domain = self.samples[index], self.labels[index], self.domains[index]
12 | return sample, target, domain
13 |
14 | def __len__(self):
15 | return len(self.samples)
16 |
17 |
--------------------------------------------------------------------------------
/autoaug/fourier.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.nn.parameter import Parameter
3 | from torch.nn.modules.module import Module
4 |
5 | class FreRA(Module):
6 | def __init__(self, len_sw, device=None, dtype=None) -> None:
7 | super(FreRA,self).__init__()
8 | print('Initializing FreRA')
9 | factory_kwargs = {'device': device, 'dtype': None}
10 |
11 | n_fourier_comp = len_sw //2 + 1
12 | self.weight = Parameter(torch.empty((n_fourier_comp, 2), **factory_kwargs))
13 | self.reset_parameters()
14 |
15 | def get_sampling(self, weight, temperature=0.1, bias=0.0):
16 |
17 | if self.training:
18 | bias = bias + 0.0001 # If bias is 0, we run into problems
19 | eps = (bias - (1 - bias)) * torch.rand(weight.size()) + (1 - bias)
20 | gate_inputs = torch.log(eps) - torch.log(1 - eps)
21 | gate_inputs = gate_inputs.cuda()
22 | gate_inputs = (gate_inputs + weight) / temperature # todo adaptive temperature
23 | para = torch.sigmoid(gate_inputs)
24 | return para
25 | else:
26 | return torch.sigmoid(weight)
27 |
28 |
29 | def reset_parameters(self) -> None:
30 | # init.kaiming_uniform_(self.weight, a=math.sqrt(5))
31 | torch.nn.init.normal_(self.weight, mean=0.0, std=0.10)
32 | def forward(self,x, temperature):
33 | para = self.get_sampling(self.weight, temperature=temperature)
34 | self.para = para
35 |
36 | noise_para = self.weight.detach().clone() * (-1)
37 | noise_para[noise_para < max(0, noise_para[:, 0].mean())] = 0.0
38 | scaling_factor = 1.0 / noise_para[:, 0][noise_para[:, 0] != 0].mean()
39 |
40 | x_ft = torch.fft.rfft(x, dim=-2)
41 | x_ft = x_ft * torch.unsqueeze(para[:, 0] + noise_para[:, 0]*scaling_factor, -1)
42 | aug = torch.fft.irfft(x_ft, n=x.shape[-2], dim=-2)
43 |
44 | return aug
--------------------------------------------------------------------------------
/models/NTXent.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 |
4 | class NTXentLoss(torch.nn.Module):
5 |
6 | def __init__(self, device, batch_size, temperature=0.1, use_cosine_similarity=True):
7 | super(NTXentLoss, self).__init__()
8 | self.batch_size = batch_size
9 | self.temperature = temperature
10 | print(self.temperature)
11 | self.device = device
12 | self.softmax = torch.nn.Softmax(dim=-1)
13 | self.mask_samples_from_same_repr = self._get_correlated_mask().type(torch.bool)
14 | self.similarity_function = self._get_similarity_function(use_cosine_similarity)
15 |
16 | def _get_similarity_function(self, use_cosine_similarity):
17 | if use_cosine_similarity:
18 | self._cosine_similarity = torch.nn.CosineSimilarity(dim=-1)
19 | return self._cosine_simililarity
20 | else:
21 | return self._dot_simililarity
22 |
23 | def _get_correlated_mask(self):
24 | diag = np.eye(2 * self.batch_size)
25 | l1 = np.eye((2 * self.batch_size), 2 * self.batch_size, k=-self.batch_size)
26 | l2 = np.eye((2 * self.batch_size), 2 * self.batch_size, k=self.batch_size)
27 | mask = torch.from_numpy((diag + l1 + l2))
28 | mask = (1 - mask).type(torch.bool)
29 | return mask.to(self.device)
30 |
31 | @staticmethod
32 | def _dot_simililarity(x, y):
33 | v = torch.tensordot(x.unsqueeze(1), y.T.unsqueeze(0), dims=2)
34 | # x shape: (N, 1, C)
35 | # y shape: (1, C, 2N)
36 | # v shape: (N, 2N)
37 | return v
38 |
39 | def _cosine_simililarity(self, x, y):
40 | # x shape: (N, 1, C)
41 | # y shape: (1, 2N, C)
42 | # v shape: (N, 2N)
43 | v = self._cosine_similarity(x.unsqueeze(1), y.unsqueeze(0))
44 | return v
45 |
46 | def forward(self, zis, zjs):
47 | representations = torch.cat([zjs, zis], dim=0)
48 |
49 | similarity_matrix = self.similarity_function(representations, representations)
50 |
51 | # filter out the scores from the positive samples
52 | l_pos = torch.diag(similarity_matrix, self.batch_size)
53 | r_pos = torch.diag(similarity_matrix, -self.batch_size)
54 | self.positives = torch.cat([l_pos, r_pos]).view(2 * self.batch_size, 1)
55 |
56 | self.negatives = similarity_matrix[self.mask_samples_from_same_repr].view(2 * self.batch_size, -1)
57 |
58 | logits = torch.cat((self.positives, self.negatives), dim=1)
59 | logits /= self.temperature
60 |
61 | labels = torch.zeros(2 * self.batch_size).to(self.device).long()
62 |
63 | return logits, labels
64 |
--------------------------------------------------------------------------------
/models/backbones.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 | class FCN(nn.Module):
5 | def __init__(self, dataset, n_channels, n_classes, out_channels=128, backbone=True):
6 | super(FCN, self).__init__()
7 |
8 | self.backbone = backbone
9 |
10 | kernel_size, stride = 8, 1
11 |
12 | self.conv_block1 = nn.Sequential(
13 | nn.Conv1d(n_channels, 32, kernel_size=kernel_size, stride=stride, bias=False, padding=int(kernel_size / 2)),
14 | nn.BatchNorm1d(32),
15 | nn.ReLU(),
16 | nn.MaxPool1d(kernel_size=2, stride=2, padding=1),
17 | nn.Dropout(0.35))
18 | self.conv_block2 = nn.Sequential(
19 | nn.Conv1d(32, 64, kernel_size=kernel_size, stride=stride, bias=False, padding=int(kernel_size / 2)),
20 | nn.BatchNorm1d(64),
21 | nn.ReLU(),
22 | nn.MaxPool1d(kernel_size=2, stride=2, padding=1))
23 | self.conv_block3 = nn.Sequential(nn.Conv1d(64, out_channels, kernel_size=kernel_size, stride=stride, bias=False,
24 | padding=int(kernel_size / 2)),
25 | nn.BatchNorm1d(out_channels),
26 | nn.ReLU(),
27 | nn.MaxPool1d(kernel_size=2, stride=2, padding=1))
28 |
29 | if dataset == 'ucihar': # ucihar
30 | self.out_len = 18
31 | elif dataset == 'wisdm': # wisdm
32 | self.out_len = 27
33 | elif dataset == 'ms': # and n_classes == 6: # ms
34 | self.out_len = 27
35 | elif dataset == 'fm': # fm
36 | self.out_len = 8
37 | elif dataset == 'FaceDetection':
38 | self.out_len = 10
39 | elif dataset == 'HandMovementDirection':
40 | self.out_len = 52
41 | elif dataset == 'Heartbeat':
42 | self.out_len = 53
43 | elif dataset == 'Libras':
44 | self.out_len = 8
45 |
46 |
47 | self.out_channels = out_channels
48 | self.out_dim = self.out_len * self.out_channels
49 |
50 | if backbone == False:
51 | self.logits = nn.Linear(self.out_dim, n_classes)
52 |
53 | def forward(self, x_in, return_feature=False):
54 | if len(x_in.shape) == 2:
55 | x_in = x_in.unsqueeze(-1)
56 | x_in = x_in.permute(0, 2, 1)
57 | x = self.conv_block1(x_in)
58 | x = self.conv_block2(x)
59 | x = self.conv_block3(x)
60 |
61 | if self.backbone:
62 | return x
63 | else:
64 | x_flat = x.reshape(x.shape[0], -1)
65 | logits = self.logits(x_flat)
66 | if return_feature:
67 | return logits, x_flat
68 | else:
69 | return logits
70 |
--------------------------------------------------------------------------------
/data_preprocess/data_preprocess_uea.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.utils.data import DataLoader
3 | from torch.utils.data import Dataset
4 | import os
5 | import numpy as np
6 | from sklearn.model_selection import train_test_split
7 | from data_preprocess.data_preprocess_utils import get_sample_weights, train_test_val_split
8 | from data_preprocess.base_loader import base_loader
9 |
10 | class data_loader_uea(base_loader):
11 | def __init__(self, samples, labels, domains):
12 | super(data_loader_uea, self).__init__(samples, labels, domains)
13 |
14 | def data_generator(args):
15 | data_path = './data/'+args.dataset+'/'
16 |
17 | train_dataset = torch.load(os.path.join(data_path, args.dataset+"_train.pt"))
18 | x_train, y_train = train_dataset["samples"], train_dataset["labels"]
19 | test_dataset = torch.load(os.path.join(data_path, args.dataset+"_test.pt"))
20 | x_test, y_test = test_dataset["samples"], test_dataset["labels"]
21 |
22 | if isinstance(x_train, np.ndarray):
23 | x_train, x_test = torch.from_numpy(x_train), torch.from_numpy(x_test)
24 | y_train, y_test = torch.from_numpy(y_train).long(), torch.from_numpy(y_test).long()
25 |
26 | d_train = np.full(x_train.shape[0], 0)
27 | d_test = np.full(x_test.shape[0], 0)
28 |
29 | x_all = np.concatenate((x_train, x_test), axis=0)
30 | y_all = np.concatenate((y_train, y_test))
31 | d_all = np.concatenate((d_train, d_test))
32 |
33 | x_win_train, x_win_val, x_win_test, \
34 | y_win_train, y_win_val, y_win_test, \
35 | d_win_train, d_win_val, d_win_test = train_test_val_split(x_all, y_all, d_all, split_ratio=args.split_ratio)
36 |
37 | print(x_win_train.shape, x_win_val.shape, x_win_test.shape)
38 |
39 | unique_y, counts_y = np.unique(y_win_train, return_counts=True)
40 | print('y_train label distribution: ', dict(zip(unique_y, counts_y)))
41 | weights = 100.0 / torch.Tensor(counts_y)
42 | print('weights of sampler: ', weights)
43 | weights = weights.double()
44 | sample_weights = get_sample_weights(y_win_train, weights)
45 | sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights),
46 | replacement=True)
47 |
48 | train_set_r = data_loader_uea(x_win_train, y_win_train, d_win_train)
49 | train_loader_r = DataLoader(train_set_r, batch_size=args.batch_size, shuffle=False, drop_last=True, sampler=sampler)
50 | val_set_r = data_loader_uea(x_win_val, y_win_val, d_win_val)
51 | val_loader_r = DataLoader(val_set_r, batch_size=args.batch_size, shuffle=False)
52 | test_set_r = data_loader_uea(x_test, y_test, d_test)
53 | test_loader_r = DataLoader(test_set_r, batch_size=args.batch_size, shuffle=False)
54 |
55 | return [train_loader_r], val_loader_r, test_loader_r
56 |
57 | def prep_uea(args):
58 | if args.cases == 'random':
59 | return data_generator(args)
--------------------------------------------------------------------------------
/models/builder.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from random import sample
4 | from models.backbones import FCN
5 | from models.NTXent import *
6 | from models.builder_utils import *
7 | import copy
8 |
9 | class SimCLR(nn.Module):
10 | def __init__(self, device, dataset, n_feature, batch_size, base_encoder, dim=128, T=0.1):
11 | super(SimCLR, self).__init__()
12 |
13 | if base_encoder == 'FCN':
14 | self.encoder_q = FCN(dataset, n_channels=n_feature, n_classes=dim, backbone=False)
15 | dim_mlp = self.encoder_q.logits.weight.shape[1]
16 | self.encoder_q.logits = nn.Sequential(nn.Linear(dim_mlp, dim_mlp), nn.ReLU(), self.encoder_q.logits)
17 |
18 |
19 |
20 | self.NTXentLoss = NTXentLoss(device=device, batch_size=batch_size, temperature=T)
21 |
22 | def forward(self, im_q, im_k):
23 | z1 = self.encoder_q(im_q)
24 | z2 = self.encoder_q(im_k)
25 |
26 | z1 = nn.functional.normalize(z1, dim=1)
27 | z2 = nn.functional.normalize(z2, dim=1)
28 |
29 | logits, labels = self.NTXentLoss(z1, z2)
30 |
31 | return logits, labels, z1, z2
32 |
33 | class BYOL(nn.Module):
34 | def __init__(
35 | self,
36 | DEVICE,
37 | base_encoder,
38 | dataset,
39 | n_feature,
40 | window_size,
41 | hidden_layer = -1,
42 | projection_size = 128,
43 | moving_average = 0.99,
44 | use_momentum = True,
45 | ):
46 | super().__init__()
47 |
48 | if base_encoder == 'FCN':
49 | self.encoder_q = FCN(dataset, n_channels=n_feature, n_classes=projection_size, backbone=False)
50 |
51 | dim_mlp = self.encoder_q.logits.weight.shape[1]
52 | self.encoder_q = NetWrapper(self.encoder_q, projection_size, dim_mlp, DEVICE=DEVICE, layer=hidden_layer)
53 |
54 | self.use_momentum = use_momentum
55 | self.target_encoder = None
56 | self.target_ema_updater = EMA(moving_average)
57 |
58 | self.online_predictor = Predictor(model='byol', dim=projection_size, pred_dim=projection_size)
59 |
60 | self.to(DEVICE)
61 |
62 | # send a mock image tensor to instantiate singleton parameters
63 | self.forward(torch.randn(2, window_size, n_feature, device=DEVICE),
64 | torch.randn(2, window_size, n_feature, device=DEVICE))
65 |
66 | @singleton('target_encoder')
67 | def _get_target_encoder(self):
68 | target_encoder = copy.deepcopy(self.encoder_q)
69 | for p in target_encoder.parameters():
70 | p.requires_grad = False
71 | return target_encoder
72 |
73 | def reset_moving_average(self):
74 | del self.target_encoder
75 | self.target_encoder = None
76 |
77 | def update_moving_average(self):
78 | assert self.target_encoder is not None, 'target encoder has not been created yet'
79 | update_moving_average(self.target_ema_updater, self.target_encoder, self.encoder_q)
80 |
81 | def forward(
82 | self,
83 | im_q,
84 | im_k,
85 | ):
86 | assert not (self.training and im_q.shape[0] == 1), 'you must have greater than 1 sample when training, due to the batchnorm in the projection layer'
87 |
88 | online_proj_one, lat1 = self.encoder_q(im_q)
89 | online_proj_two, lat2 = self.encoder_q(im_k)
90 |
91 | online_pred_one = self.online_predictor(online_proj_one)
92 | online_pred_two = self.online_predictor(online_proj_two)
93 |
94 | with torch.no_grad():
95 | target_encoder = self._get_target_encoder() if self.use_momentum else self.encoder_q
96 | target_proj_one, _ = target_encoder(im_q)
97 | target_proj_two, _ = target_encoder(im_k)
98 | target_proj_one.detach_()
99 | target_proj_two.detach_()
100 |
101 | return online_pred_one, online_pred_two, target_proj_one.detach(), target_proj_two.detach()
102 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## FreRA: A Frequency-Refined Augmentation for Contrastive Learning on Time Series Classification
2 |
3 | Code for KDD 2025 paper "FreRA: A Frequency-Refined Augmentation for Contrastive Learning on Time Series Classification".
4 |
5 | ## Abstract
6 |
7 | >Contrastive learning has emerged as a competent approach for unsupervised representation learning. However, the design of an optimal augmentation strategy, although crucial for contrastive learning, is less explored for time series classification tasks. Existing predefined time-domain augmentation methods are primarily adopted from vision and are not specific to time series data. Consequently, this cross-modality incompatibility may distort the semantically relevant information of time series by introducing mismatched patterns into the data. To address this limitation, we present a novel perspective from the frequency domain and identify three advantages for downstream classification: 1) the frequency component naturally encodes global features, 2) the orthogonal nature of the Fourier basis allows easier isolation and independent modifications of critical and unimportant information, and 3) a compact set of frequency components can preserve semantic integrity. To fully utilize the three properties, we propose the lightweight yet effective Frequency Refined Augmentation (FreRA) tailored for time series contrastive learning on classification tasks, which can be seamlessly integrated with contrastive learning frameworks in a plug-and-play manner. Specifically, FreRA automatically separates critical and unimportant frequency components. Accordingly, we propose semantic-aware Identity Modification and semantic-agnostic Self-adaptive Modification to protect semantically relevant information in the critical frequency components and infuse variance into the unimportant ones respectively. Theoretically, we prove that FreRA generates semantic-preserving views. Empirically, we conduct extensive experiments on two benchmark datasets including UCR and UEA archives, as well as five large-scale datasets on diverse applications. FreRA consistently outperforms ten leading baselines on time series classification, anomaly detection, and transfer learning tasks, demonstrating superior capabilities in contrastive representation learning and generalization in transfer learning scenarios across diverse datasets.
8 |
9 | 
10 |
11 | ## Environment Setup
12 | Build an environment with Anaconda to install required packages.
13 | ```
14 | conda create -n FreRA python=3.8.3
15 | conda activate FreRA
16 | pip install -r requirements.txt
17 | ```
18 |
19 | ## Models
20 | The following models are provided under `./models/`.
21 | - contrastive models: SimCLR, BYOL ```./models/builder.py```
22 | - backbone encoder: FCN ```./models/backbones.py```
23 |
24 | ## Main Functions
25 | - ```main_FreRA.py```
26 |
27 | ## Datasets
28 | Datasets can be downloaded from the following websites to folder `./data/` and the datasets will be pre-processed automatically by our codes under `./data_preprocess/`.
29 | - [UCIHAR](https://archive.ics.uci.edu/ml/datasets/human+activity+recognition+using+smartphones)
30 | - [WISDM](https://www.cis.fordham.edu/wisdm/dataset.php)
31 | - [MotionSense](https://github.com/mmalekzadeh/motion-sense)
32 | - [SHAR](http://www.sal.disco.unimib.it/technologies/unimib-shar/)
33 | - [Fault Diagnosis](https://mb.uni-paderborn.de/kat/datacenter)
34 | - [UEA Archive](https://timeseriesclassification.com/dataset.php)
35 | - [UCR Archive](https://www.cs.ucr.edu/~eamonn/time_series_data_2018/)
36 |
37 | ## Usage
38 | Scripts for using the proposed FreRA model.
39 | ```angular2html
40 | python main_FreRA.py --f_aug_mode 'FreRA' --l1_weight 0.003 --framework 'simclr' --dataset 'ucihar' --lr 0.01 --f_lr 0.001 --batch_size 128 --epochs 200 --temperature 0.2 --f_temperature 0.1 --gpu 0
41 | python main_FreRA.py --f_aug_mode 'FreRA' --l1_weight 0.003 --framework 'simclr' --dataset 'wisdm' --lr 0.01 --f_lr 0.001 --batch_size 128 --epochs 200 --temperature 0.2 --f_temperature 0.1 --gpu 0
42 | python main_FreRA.py --f_aug_mode 'FreRA' --l1_weight 0.003 --framework 'simclr' --dataset 'ms' --lr 0.01 --f_lr 0.001 --batch_size 128 --epochs 200 --temperature 0.2 --f_temperature 0.1 --gpu 0
43 | ```
44 |
--------------------------------------------------------------------------------
/data_preprocess/motionsense_raw_preprocess.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import re
3 | import os
4 | import pandas as pd
5 | import numpy as np
6 | import pickle as cp
7 |
8 | __author__ = "C. I. Tang"
9 | __copyright__ = "Copyright (C) 2020 C. I. Tang"
10 |
11 | """
12 | Based on work of Tang et al.: https://arxiv.org/abs/2011.11542
13 | Contact: cit27@cl.cam.ac.uk
14 | License: GNU General Public License v3.0
15 | This program is free software: you can redistribute it and/or modify
16 | it under the terms of the GNU General Public License as published by
17 | the Free Software Foundation, either version 3 of the License, or
18 | (at your option) any later version.
19 | This program is distributed in the hope that it will be useful,
20 | but WITHOUT ANY WARRANTY; without even the implied warranty of
21 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 | GNU General Public License for more details.
23 | You should have received a copy of the GNU General Public License
24 | along with this program. If not, see .
25 | """
26 |
27 |
28 | def process_motion_sense_accelerometer_files(accelerometer_data_folder_path):
29 | """
30 | Preprocess the accelerometer files of the MotionSense dataset into the 'user-list' format
31 | Data files can be found at https://github.com/mmalekzadeh/motion-sense/tree/master/data
32 | Parameters:
33 | accelerometer_data_folder_path (str):
34 | the path to the folder containing the data files (unzipped)
35 | e.g. motionSense/B_Accelerometer_data/
36 | the trial folders should be directly inside it (e.g. motionSense/B_Accelerometer_data/dws_1/)
37 | Return:
38 |
39 | user_datsets (dict of {user_id: [(sensor_values, activity_labels)]})
40 | the processed dataset in a dictionary, of type {user_id: [(sensor_values, activity_labels)]}
41 | the keys of the dictionary is the user_id (participant id)
42 | the values of the dictionary are lists of (sensor_values, activity_labels) pairs
43 | sensor_values are 2D numpy array of shape (length, channels=3)
44 | activity_labels are 1D numpy array of shape (length)
45 | each pair corresponds to a separate trial
46 | (i.e. time is not contiguous between pairs, which is useful for making sliding windows, where it is easy to separate trials)
47 | """
48 |
49 | # label_set = {}
50 | user_datasets = {}
51 | all_trials_folders = sorted(glob.glob(accelerometer_data_folder_path + "/*"))
52 |
53 | # Loop through every trial folder
54 | for trial_folder in all_trials_folders:
55 | trial_name = os.path.split(trial_folder)[-1]
56 |
57 | # label of the trial is given in the folder name, separated by underscore
58 | label = trial_name.split("_")[0]
59 | # label_set[label] = True
60 | print(trial_folder)
61 |
62 | # Loop through files for every user of the trail
63 | for trial_user_file in sorted(glob.glob(trial_folder + "/*.csv")):
64 |
65 | # use regex to match the user id
66 | user_id_match = re.search(r'(?P[0-9]+)\.csv', os.path.split(trial_user_file)[-1])
67 | if user_id_match is not None:
68 | user_id = int(user_id_match.group('user_id'))
69 |
70 | # Read file
71 | user_trial_dataset = pd.read_csv(trial_user_file)
72 | user_trial_dataset.dropna(how="any", inplace=True)
73 |
74 | # Extract the x, y, z channels
75 | values = user_trial_dataset[['attitude.roll', 'attitude.pitch', 'attitude.yaw', 'gravity.x', 'gravity.y', 'gravity.z', 'rotationRate.x', 'rotationRate.y', 'rotationRate.z', 'userAcceleration.x', 'userAcceleration.y', 'userAcceleration.z']].values
76 |
77 | # the label is the same during the entire trial, so it is repeated here to pad to the same length as the values
78 | labels = np.repeat(label, values.shape[0])
79 |
80 | if user_id not in user_datasets:
81 | user_datasets[user_id] = []
82 | user_datasets[user_id].append((values, labels))
83 | else:
84 | print("[ERR] User id not found", trial_user_file)
85 |
86 | return user_datasets
87 |
88 | # accelerometer_data_folder_path = 'data/MotionSense/'
89 | accelerometer_data_folder_path = 'data/MotionSense/A_DeviceMotion_data/'
90 | user_datasets = process_motion_sense_accelerometer_files(accelerometer_data_folder_path)
91 | with open(accelerometer_data_folder_path + 'motion_sense_user_split.pkl', 'wb') as f:
92 | cp.dump({
93 | 'user_split': user_datasets,
94 | }, f)
--------------------------------------------------------------------------------
/data_preprocess/preprocess_uea.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pickle
3 | import numpy as np
4 | import torch
5 | from tqdm import tqdm
6 | # from sktime.utils.data_io import load_from_arff_to_dataframe
7 | from sktime.datasets import load_from_arff_to_dataframe
8 | from sklearn.preprocessing import LabelEncoder
9 |
10 | # original UEA(0,1,2) [instances, length, features/channels]
11 | # UEA(0,1,2) --> later will be permuted in dataloader-->get UEA(0,2,1) [instances, features/channels, length]
12 |
13 | DATA_DIR = '../data'
14 | def mkdir_if_not_exists(loc, file=False):
15 | loc_ = os.path.dirname(loc) if file else loc
16 | if not os.path.exists(loc):
17 | os.makedirs(loc_, exist_ok=True)
18 |
19 | def create_torch_data(train_file, test_file):
20 | # Get arff format
21 | train_data, train_labels = load_from_arff_to_dataframe(train_file)
22 | test_data, test_labels = load_from_arff_to_dataframe(test_file)
23 |
24 | def convert_data(data):
25 | # Expand the series to numpy
26 | data_expand = data.applymap(lambda x: x.values).values
27 | # Single array, then to tensor
28 | data_numpy = np.stack([np.vstack(x).T for x in data_expand])
29 | tensor_data = torch.Tensor(data_numpy)
30 | return tensor_data
31 |
32 | train_data, test_data = convert_data(train_data), convert_data(test_data)
33 |
34 | # Encode labels as often given as strings
35 | encoder = LabelEncoder().fit(train_labels)
36 | train_labels, test_labels = encoder.transform(train_labels), encoder.transform(test_labels)
37 | train_labels, test_labels = torch.Tensor(train_labels), torch.Tensor(test_labels)
38 |
39 | return train_data, test_data, train_labels, test_labels
40 | def save_pickle(obj, filename, protocol=4, create_folder=True):
41 | if create_folder:
42 | mkdir_if_not_exists(filename, file=True)
43 |
44 | # Save
45 | with open(filename, 'wb') as file:
46 | pickle.dump(obj, file, protocol=protocol)
47 |
48 | def convert_all_files(dataset='uea'):
49 | """ Convert all files from a given /raw/{subfolder} into torch data to be stored in /interim. """
50 | assert dataset in ['uea', 'ucr']
51 | arff_folder = DATA_DIR + '/Multivariate_arff'
52 |
53 | # Time for a big for loop
54 | for ds_name in ['DuckDuckGeese', 'FaceDetection', 'HandMovementDirection', 'Handwriting', 'Heartbeat', 'Libras', 'LSST', 'RacketSports', 'SpokenArabicDigits']: # ['Epilepsy', 'FingerMovements', 'ECG5000', ]:
55 | # File locations
56 | print(f'ds_name:{ds_name}')
57 | train_file = arff_folder + '/{}/{}_TRAIN.arff'.format(ds_name, ds_name)
58 | test_file = arff_folder + '/{}/{}_TEST.arff'.format(ds_name, ds_name)
59 |
60 | # Ready save dir
61 | save_dir = DATA_DIR + '/{}/'.format(ds_name)
62 | print(f'save_dir:{save_dir}')
63 | # If files don't exist, skip.
64 | if os.path.isdir(save_dir) == False:
65 | os.makedirs(save_dir)
66 | if any([x.split('/')[-1] not in os.listdir(arff_folder + '/{}'.format(ds_name)) for x in (train_file, test_file)]):
67 | if ds_name not in ['Images', 'Descriptions']:
68 | print('No files found for folder: {}'.format(ds_name))
69 | continue
70 | # elif os.path.isdir(save_dir):
71 | # print('Files already exist for: {}'.format(ds_name))
72 | # continue
73 | else:
74 | train_data, test_data, train_labels, test_labels = create_torch_data(train_file, test_file)
75 |
76 | dat_dict = dict()
77 | dat_dict["samples"] = train_data
78 | dat_dict["labels"] = train_labels
79 | torch.save(dat_dict, save_dir+ds_name+"_train.pt")
80 |
81 | dat_dict = dict()
82 | dat_dict["samples"] = test_data
83 | dat_dict["labels"] = test_labels
84 | torch.save(dat_dict, save_dir+ds_name+"_test.pt")
85 | print(train_data.shape, test_data.shape)
86 |
87 | # # Compile train and test data together
88 | # data = torch.cat([train_data, test_data])
89 | # labels = torch.cat([train_labels, test_labels])
90 | #
91 | # # Save original train test indexes in case we wish to use original splits
92 | # original_idxs = (np.arange(0, train_data.size(0)), np.arange(train_data.size(0), data.size(0)))
93 |
94 | # # Save data
95 | # save_pickle(data, save_dir + '/data.pkl')
96 | # save_pickle(labels, save_dir + '/labels.pkl')
97 | # save_pickle(original_idxs, save_dir + '/original_idxs.pkl')
98 |
99 |
100 | if __name__ == '__main__':
101 | dataset = 'uea'
102 | convert_all_files(dataset)
--------------------------------------------------------------------------------
/data_preprocess/data_preprocess_ms.py:
--------------------------------------------------------------------------------
1 | '''
2 | # Data Pre-processing on UEA datasets.
3 | #
4 | # '''
5 |
6 | import os
7 | import numpy as np
8 | from torch.utils.data import Dataset, DataLoader
9 | from torchvision import transforms
10 | import torch
11 | import pickle as cp
12 | from data_preprocess.data_preprocess_utils import get_sample_weights, train_test_val_split, opp_sliding_window, normalize
13 | from data_preprocess.base_loader import base_loader
14 |
15 |
16 | class data_loader_uea(base_loader):
17 | def __init__(self, samples, labels, domains):
18 | super(data_loader_uea, self).__init__(samples, labels, domains)
19 |
20 | def apply_label_map(y, label_map):
21 | y_mapped = []
22 | for l in y:
23 | y_mapped.append(label_map.get(l))
24 | return np.array(y_mapped)
25 |
26 | def get_windows_dataset_from_user_list_format(user_datasets, window_size=200, shift=100):
27 | user_dataset_windowed = {}
28 | label_list = ['sit', 'std', 'wlk', 'ups', 'dws', 'jog'] # no null class
29 | label_map = dict([(l, i) for i, l in enumerate(label_list)])
30 |
31 | for user_id in user_datasets:
32 | x = []
33 | y = []
34 |
35 | # Loop through each trail of each user
36 | for v, l in user_datasets[user_id]:
37 | # print(l)
38 | l = apply_label_map(l, label_map)
39 | # print(l)
40 | v_sw, l_sw = opp_sliding_window(v, l, window_size, shift)
41 |
42 | if len(v_sw) > 0:
43 | x.append(v_sw)
44 | y.append(l_sw)
45 | # print(f"Data: {v_sw.shape}, Labels: {l_sw.shape}")
46 |
47 | # combine all trials
48 | user_dataset_windowed[user_id] = (np.concatenate(x), np.concatenate(y).squeeze())
49 |
50 | x = []
51 | y = []
52 | d = []
53 | for user_id in user_dataset_windowed:
54 |
55 | v, l = user_dataset_windowed[user_id]
56 | x.append(v)
57 | y.append(l)
58 | d.append(np.full(len(l), user_id))
59 |
60 | x = np.concatenate(x)
61 | y = np.concatenate(y).squeeze()
62 | d = np.concatenate(d).squeeze()
63 |
64 | return x, y, d
65 |
66 | def prep_ms_random(args, sw, ss):
67 | # with open('data/MotionSense/motion_sense_user_split.pkl', 'rb') as f:
68 | with open('data/MotionSense/A_DeviceMotion_data/motion_sense_user_split.pkl', 'rb') as f:
69 | dataset_dict = cp.load(f)
70 | user_datasets = dataset_dict['user_split']
71 |
72 | x, y, d = get_windows_dataset_from_user_list_format(user_datasets, window_size=sw, shift=ss)
73 | x = normalize(x)
74 | print(x.shape, y.shape, d.shape)
75 |
76 | x_train, x_val, x_test, \
77 | y_train, y_val, y_test, \
78 | d_train, d_val, d_test = train_test_val_split(x, y, d, split_ratio=args.split_ratio)
79 |
80 | unique_y, counts_y = np.unique(y_train, return_counts=True)
81 | _, dataset_len_sw, dataset_n_feature = x_train.shape
82 | print('y_train label distribution: ', dict(zip(unique_y, counts_y)))
83 | weights = 100.0 / torch.Tensor(counts_y)
84 | print('weights of sampler: ', weights)
85 | weights = weights.double()
86 | sample_weights = get_sample_weights(y_train, weights)
87 | sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights),
88 | replacement=True)
89 |
90 | # transform = transforms.Compose([
91 | # transforms.ToTensor(),
92 | # transforms.Normalize(mean=(0, 0, 0, 0, 0, 0, 0, 0, 0), std=(1, 1, 1, 1, 1, 1, 1, 1, 1))
93 | # ])
94 |
95 | print(y_train.shape, y_val.shape, y_test.shape)
96 |
97 | print(x_train.shape, x_val.shape, x_test.shape)
98 | train_set_r = data_loader_uea(x_train, y_train, d_train)
99 | train_loader_r = DataLoader(train_set_r, batch_size=args.batch_size, shuffle=False, drop_last=True, sampler=sampler)
100 | val_set_r = data_loader_uea(x_val, y_val, d_val)
101 | val_loader_r = DataLoader(val_set_r, batch_size=args.batch_size, shuffle=False)
102 | test_set_r = data_loader_uea(x_test, y_test, d_test)
103 | test_loader_r = DataLoader(test_set_r, batch_size=args.batch_size, shuffle=False)
104 |
105 | return [train_loader_r], val_loader_r, test_loader_r
106 |
107 |
108 |
109 |
110 | def prep_ms(args, SLIDING_WINDOW_LEN=200, SLIDING_WINDOW_STEP=100):
111 | # todo: to check whether uea dataset belongs to subject or random
112 | if args.cases == 'random':
113 | return prep_ms_random(args, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP)
114 | # elif args.cases == 'subject':
115 | # return prep_domains_ucihar_subject(args, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP)
116 | # elif args.cases == 'subject_large':
117 | # return prep_domains_ucihar_subject_large(args, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP)
118 | elif args.cases == '':
119 | pass
120 | else:
121 | return 'Error! Unknown args.cases!\n'
--------------------------------------------------------------------------------
/data_loaders.py:
--------------------------------------------------------------------------------
1 | from data_preprocess import data_preprocess_ucihar
2 | from data_preprocess import data_preprocess_shar
3 | from data_preprocess import data_preprocess_wisdm
4 | from data_preprocess import data_preprocess_ms
5 | from data_preprocess import data_preprocess_uea
6 | from data_preprocess import data_preprocess_ucr
7 |
8 | uea_list = ['ArticularyWordRecognition','AtrialFibrillation','BasicMotions','CharacterTrajectories','Cricket','DuckDuckGeese','EigenWorms','Epilepsy','ERing','EthanolConcentration','FaceDetection','FingerMovements','HandMovementDirection','Handwriting','Heartbeat','JapaneseVowels','Libras','LSST','MotorImagery','NATOPS','PEMS-SF','PenDigits','PhonemeSpectra','RacketSports','SelfRegulationSCP1','SelfRegulationSCP2','SpokenArabicDigits','StandWalkJump','UWaveGestureLibrary','InsectWingbeat']
9 | ucr_list = ['MoteStrain', 'ScreenType', 'MelbournePedestrian', 'RefrigerationDevices', 'PigArtPressure', 'SemgHandSubjectCh2', 'Car', 'HandOutlines', 'NonInvasiveFetalECGThorax2', 'FreezerRegularTrain', 'ArrowHead', 'FreezerSmallTrain', 'ECG200', 'ChlorineConcentration', 'CricketZ', 'CricketX', 'EOGHorizontalSignal', 'DiatomSizeReduction', 'Herring', 'Missing_value_and_variable_length_datasets_adjusted', 'SonyAIBORobotSurface2', 'PickupGestureWiimoteZ', 'ACSF1', 'EOGVerticalSignal', 'Rock', 'FiftyWords', 'ShakeGestureWiimoteZ', 'Symbols', 'ECGFiveDays', 'ProximalPhalanxTW', 'ProximalPhalanxOutlineAgeGroup', 'SyntheticControl', 'Wafer', 'Worms', 'BME', 'MiddlePhalanxTW', 'InsectWingbeatSound', 'UWaveGestureLibraryX', 'Coffee', 'TwoPatterns', 'ShapeletSim', 'Crop', 'AllGestureWiimoteY', 'PigAirwayPressure', 'Meat', 'StarLightCurves', 'UWaveGestureLibraryY', 'PhalangesOutlinesCorrect', 'DistalPhalanxOutlineCorrect', 'Earthquakes', 'CBF', 'Chinatown', 'AllGestureWiimoteZ', 'LargeKitchenAppliances', 'SmoothSubspace', 'GestureMidAirD2', 'MiddlePhalanxOutlineAgeGroup', 'ShapesAll', 'Computers', 'TwoLeadECG', 'DistalPhalanxTW', 'GestureMidAirD3', 'Lightning2', 'ProximalPhalanxOutlineCorrect', 'Plane', 'FacesUCR', 'DodgerLoopGame', 'ItalyPowerDemand', 'CinCECGTorso', 'GunPoint', 'MixedShapesSmallTrain', 'Fungi', 'MiddlePhalanxOutlineCorrect', 'Adiac', 'Phoneme', 'ElectricDevices', 'CricketY', 'NonInvasiveFetalECGThorax1', 'UWaveGestureLibraryZ', 'Yoga', 'BeetleFly', 'Fish', 'ToeSegmentation2', 'MedicalImages', 'Trace', 'GunPointAgeSpan', 'Beef', 'MixedShapesRegularTrain', 'SonyAIBORobotSurface1', 'FaceFour', 'PLAID', 'GesturePebbleZ2', 'OliveOil', 'ToeSegmentation1', 'SemgHandGenderCh2', 'FordB', 'Strawberry', 'Lightning7', 'UWaveGestureLibraryAll', 'InsectEPGSmallTrain', 'SwedishLeaf', 'BirdChicken', 'HouseTwenty', 'FordA', 'DistalPhalanxOutlineAgeGroup', 'InlineSkate', 'SmallKitchenAppliances', 'PigCVP', 'Mallat', 'GestureMidAirD1', 'WormsTwoClass', 'ECG5000', 'GunPointOldVersusYoung', 'Haptics', 'DodgerLoopDay', 'PowerCons', 'EthanolLevel', 'GunPointMaleVersusFemale', 'UMD', 'DodgerLoopWeekend', 'Ham', 'Wine', 'SemgHandMovementCh2', 'FaceAll', 'GesturePebbleZ1', 'AllGestureWiimoteX', 'OSULeaf', 'InsectEPGRegularTrain', 'WordSynonyms', 'MelbournePedestrian', 'PickupGestureWiimoteZ', 'ShakeGestureWiimoteZ', 'AllGestureWiimoteY', 'AllGestureWiimoteZ', 'GestureMidAirD2', 'GestureMidAirD3', 'DodgerLoopGame', 'PLAID', 'GesturePebbleZ2', 'GestureMidAirD1', 'DodgerLoopDay', 'DodgerLoopWeekend', 'GesturePebbleZ1', 'AllGestureWiimoteX']
10 |
11 | def setup_dataloaders(args):
12 | if args.dataset == 'ucihar':
13 | args.n_feature = 9
14 | args.len_sw = 128
15 | args.n_class = 6
16 | if args.cases not in ['subject', 'subject_large']:
17 | args.target_domain == '0'
18 | train_loaders, val_loader, test_loader = data_preprocess_ucihar.prep_ucihar(args, SLIDING_WINDOW_LEN=args.len_sw, SLIDING_WINDOW_STEP=int( args.len_sw * 0.5))
19 |
20 | elif args.dataset == 'shar':
21 | args.n_feature = 3
22 | args.len_sw = 151
23 | args.n_class = 17
24 | if args.cases not in ['subject', 'subject_large']:
25 | args.target_domain == '1'
26 | train_loaders, val_loader, test_loader = data_preprocess_shar.prep_shar(args, SLIDING_WINDOW_LEN=args.len_sw, SLIDING_WINDOW_STEP=int(args.len_sw * 0.5))
27 |
28 | elif args.dataset == 'ms':
29 | # args.dataset = 'MotionSenseHAR'
30 | args.n_feature = 12
31 | args.len_sw = 200
32 | args.n_class = 6
33 | if args.cases not in ['subject', 'subject_large']:
34 | args.target_domain == '1'
35 | train_loaders, val_loader, test_loader = data_preprocess_ms.prep_ms(args, SLIDING_WINDOW_LEN=args.len_sw, SLIDING_WINDOW_STEP=int(args.len_sw * 0.5))
36 |
37 | elif args.dataset == 'wisdm':
38 | args.n_feature = 3
39 | args.len_sw = 200
40 | args.n_class = 6
41 | if args.cases not in ['subject', 'subject_large']:
42 | args.target_domain == '1'
43 | train_loaders, val_loader, test_loader = data_preprocess_wisdm.prep_wisdm(args, SLIDING_WINDOW_LEN=args.len_sw, SLIDING_WINDOW_STEP=int(args.len_sw * 0.5))
44 |
45 | if args.dataset in uea_list:
46 | train_loaders, val_loader, test_loader = data_preprocess_uea.prep_uea(args)
47 |
48 | if args.dataset in ucr_list:
49 | train_loaders, val_loader, test_loader = data_preprocess_ucr.prep_ucr(args)
50 |
51 |
52 | return train_loaders[0], val_loader, test_loader
--------------------------------------------------------------------------------
/models/builder_utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | # from .MMB import *
4 |
5 | class Classifier(nn.Module):
6 | def __init__(self, bb_dim, n_classes):
7 | super(Classifier, self).__init__()
8 |
9 | self.classifier = nn.Linear(bb_dim, n_classes)
10 |
11 | def forward(self, x):
12 | out = self.classifier(x)
13 |
14 | return out
15 |
16 |
17 | class Projector(nn.Module):
18 | def __init__(self, model, bb_dim, prev_dim, dim):
19 | super(Projector, self).__init__()
20 | if model == 'SimCLR':
21 | self.projector = nn.Sequential(nn.Linear(bb_dim, prev_dim),
22 | nn.ReLU(inplace=True),
23 | nn.Linear(prev_dim, dim))
24 | elif model == 'byol':
25 | self.projector = nn.Sequential(nn.Linear(bb_dim, prev_dim, bias=False),
26 | nn.BatchNorm1d(prev_dim),
27 | nn.ReLU(inplace=True),
28 | nn.Linear(prev_dim, dim, bias=False),
29 | nn.BatchNorm1d(dim, affine=False))
30 | else:
31 | raise NotImplementedError
32 |
33 | def forward(self, x):
34 | x = self.projector(x)
35 | return x
36 |
37 |
38 | class Predictor(nn.Module):
39 | def __init__(self, model, dim, pred_dim):
40 | super(Predictor, self).__init__()
41 | if model == 'SimCLR':
42 | pass
43 | elif model == 'byol':
44 | self.predictor = nn.Sequential(nn.Linear(dim, pred_dim),
45 | nn.BatchNorm1d(pred_dim),
46 | nn.ReLU(inplace=True),
47 | nn.Linear(pred_dim, dim))
48 | else:
49 | raise NotImplementedError
50 |
51 | def forward(self, x):
52 | x = self.predictor(x)
53 | return x
54 |
55 | class EMA():
56 | def __init__(self, beta):
57 | super().__init__()
58 | self.beta = beta
59 |
60 | def update_average(self, old, new):
61 | if old is None:
62 | return new
63 | return old * self.beta + (1 - self.beta) * new
64 |
65 |
66 | def update_moving_average(ema_updater, ma_model, current_model):
67 | for current_params, ma_params in zip(current_model.parameters(), ma_model.parameters()):
68 | old_weight, up_weight = ma_params.data, current_params.data
69 | ma_params.data = ema_updater.update_average(old_weight, up_weight)
70 |
71 |
72 | from functools import wraps
73 |
74 |
75 | def singleton(cache_key):
76 | def inner_fn(fn):
77 | @wraps(fn)
78 | def wrapper(self, *args, **kwargs):
79 | instance = getattr(self, cache_key)
80 | if instance is not None:
81 | return instance
82 |
83 | instance = fn(self, *args, **kwargs)
84 | setattr(self, cache_key, instance)
85 | return instance
86 |
87 | return wrapper
88 |
89 | return inner_fn
90 |
91 |
92 | # a wrapper class for the base neural network
93 | # will manage the interception of the hidden layer output
94 | # and pipe it into the projector and predictor nets
95 |
96 | class NetWrapper(nn.Module):
97 | def __init__(self, net, projection_size, projection_hidden_size, DEVICE, layer=-2):
98 | super().__init__()
99 | self.net = net
100 | self.layer = layer
101 | self.DEVICE = DEVICE
102 |
103 | self.projector = None
104 | self.projection_size = projection_size
105 | self.projection_hidden_size = projection_hidden_size
106 |
107 | self.hidden = {}
108 | self.hook_registered = False
109 |
110 | def _find_layer(self):
111 | children = [*self.net.children()]
112 | print('children[self.layer]:', children[self.layer])
113 | return children[self.layer]
114 | return None
115 |
116 | def _hook(self, _, input, output):
117 | device = input[0].device
118 | self.hidden[device] = output.reshape(output.shape[0], -1)
119 |
120 | def _register_hook(self):
121 | layer = self._find_layer()
122 | assert layer is not None, f'hidden layer ({self.layer}) not found'
123 | handle = layer.register_forward_hook(self._hook)
124 | self.hook_registered = True
125 |
126 | @singleton('projector')
127 | def _get_projector(self, hidden):
128 | _, dim = hidden.shape
129 | projector = Projector(model='byol', bb_dim=dim, prev_dim=self.projection_hidden_size, dim=self.projection_size)
130 | return projector.to(hidden)
131 |
132 | def get_representation(self, x):
133 |
134 | if self.layer == -1:
135 | return self.net(x)
136 |
137 | if not self.hook_registered:
138 | self._register_hook()
139 |
140 | self.hidden.clear()
141 | _ = self.net(x)
142 | hidden = self.hidden[x.device]
143 | self.hidden.clear()
144 |
145 | assert hidden is not None, f'hidden layer {self.layer} never emitted an output'
146 | return hidden
147 |
148 | def forward(self, x):
149 | representation = self.get_representation(x)
150 |
151 | if len(representation.shape) == 3:
152 | representation = representation.reshape(representation.shape[0], -1)
153 |
154 | projector = self._get_projector(representation)
155 | projection = projector(representation)
156 | return projection, representation
157 |
158 |
--------------------------------------------------------------------------------
/data_preprocess/data_preprocess_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from numpy.lib.stride_tricks import as_strided as ast
3 | # from dataclasses import dataclass
4 | from sklearn.model_selection import train_test_split
5 |
6 | # @dataclass
7 | # class Params:
8 | # x: float
9 | # y: float
10 | # z: float
11 |
12 |
13 | def train_test_val_split(x_win_all, y_win_all, d_win_all, split_ratio=0.8):
14 | # split all data into train and test
15 | x_win_train, x_win_test, y_win_train, y_win_test, d_win_train, d_win_test = \
16 | train_test_split(x_win_all, y_win_all, d_win_all, test_size=split_ratio, random_state=0)
17 |
18 | # split train into train and validation with the same ratio
19 | x_win_train, x_win_val, y_win_train, y_win_val, d_win_train, d_win_val = \
20 | train_test_split(x_win_train, y_win_train, d_win_train, test_size=split_ratio, random_state=0)
21 |
22 | return x_win_train, x_win_val, x_win_test, \
23 | y_win_train, y_win_val, y_win_test, \
24 | d_win_train, d_win_val, d_win_test
25 |
26 |
27 | def onehot_to_label(y_onehot):
28 | a = np.argwhere(y_onehot == 1)
29 | return a[:, -1]
30 |
31 | def get_sample_weights(y, weights):
32 | '''
33 | to assign weights to each sample
34 | '''
35 | label_unique = np.unique(y)
36 | sample_weights = []
37 | for val in y:
38 | idx = np.where(label_unique == val)
39 | sample_weights.append(weights[idx])
40 | return sample_weights
41 |
42 |
43 | def normalize(x):
44 | """Normalizes all sensor channels by mean substraction,
45 | dividing by the standard deviation and by 2.
46 |
47 | :param x: numpy integer matrix
48 | Sensor data
49 | :return:
50 | Normalized sensor data
51 | """
52 | x = np.array(x, dtype=np.float32)
53 | m = np.mean(x, axis=0)
54 | x -= m
55 | std = np.std(x, axis=0)
56 | std += 0.000001
57 |
58 | x /= std
59 | return x
60 |
61 | def find_label(labels):
62 | # find the label of a sw given the labels of frames of a sliding window
63 | if np.bincount(labels)[np.argmax(np.bincount(labels))] < len(labels) * 0.5:
64 | return 0 # class 0 will be removed
65 | else:
66 | return np.argmax(np.bincount(labels))
67 |
68 | def opp_sliding_window_w_d(data_x, data_y, d, ws, ss): # window size, step size
69 | data_x = sliding_window(data_x, (ws, data_x.shape[1]), (ss, 1))
70 | data_y = np.asarray([[find_label(i)] for i in sliding_window(data_y, ws, ss)])
71 | data_d = np.asarray([[find_label(i)] for i in sliding_window(d, ws, ss)])
72 | return data_x.astype(np.float32), data_y.reshape(len(data_y)).astype(np.uint8), data_d.reshape(len(data_d)).astype(np.uint8)
73 |
74 |
75 | def sliding_window(a, ws, ss=None, flatten=True):
76 | '''
77 | Return a sliding window over a in any number of dimensions
78 |
79 | Parameters:
80 | a - an n-dimensional numpy array
81 | ws - an int (a is 1D) or tuple (a is 2D or greater) representing the size
82 | of each dimension of the window
83 | ss - an int (a is 1D) or tuple (a is 2D or greater) representing the
84 | amount to slide the window in each dimension. If not specified, it
85 | defaults to ws.
86 | flatten - if True, all slices are flattened, otherwise, there is an
87 | extra dimension for each dimension of the input.
88 |
89 | Returns
90 | an array containing each n-dimensional window from a
91 | '''
92 |
93 | if None is ss:
94 | # ss was not provided. the windows will not overlap in any direction.
95 | ss = ws
96 | ws = norm_shape(ws)
97 | ss = norm_shape(ss)
98 |
99 | # convert ws, ss, and a.shape to numpy arrays so that we can do math in every
100 | # dimension at once.
101 | ws = np.array(ws)
102 | ss = np.array(ss)
103 | shape = np.array(a.shape)
104 |
105 | # ensure that ws, ss, and a.shape all have the same number of dimensions
106 | ls = [len(shape), len(ws), len(ss)]
107 | if 1 != len(set(ls)):
108 | raise ValueError( \
109 | 'a.shape, ws and ss must all have the same length. They were %s' % str(ls))
110 |
111 | # ensure that ws is smaller than a in every dimension
112 | if np.any(ws > shape):
113 | raise ValueError( \
114 | 'ws cannot be larger than a in any dimension.\
115 | a.shape was %s and ws was %s' % (str(a.shape), str(ws)))
116 |
117 | newshape = norm_shape(((shape - ws) // ss) + 1)
118 | # the shape of the strided array will be the number of slices in each dimension
119 | # plus the shape of the window (tuple addition)
120 | newshape += norm_shape(ws)
121 | # the strides tuple will be the array's strides multiplied by step size, plus
122 | # the array's strides (tuple addition)
123 | newstrides = norm_shape(np.array(a.strides) * ss) + a.strides
124 | strided = ast(a, shape=newshape, strides=newstrides)
125 | if not flatten:
126 | return strided
127 |
128 | # Collapse strided so that it has one more dimension than the window. I.e.,
129 | # the new array is a flat list of slices.
130 | meat = len(ws) if ws.shape else 0
131 | firstdim = (np.product(newshape[:-meat]),) if ws.shape else ()
132 | dim = firstdim + (newshape[-meat:])
133 | # remove any dimensions with size 1
134 | # commented by hangwei
135 | # dim = filter(lambda i: i != 1, dim)
136 | return strided.reshape(dim)
137 |
138 | def norm_shape(shape):
139 | '''
140 | Normalize numpy array shapes so they're always expressed as a tuple,
141 | even for one-dimensional shapes.
142 |
143 | Parameters
144 | shape - an int, or a tuple of ints
145 |
146 | Returns
147 | a shape tuple
148 | '''
149 | try:
150 | i = int(shape)
151 | return (i,)
152 | except TypeError:
153 | # shape was not a number
154 | pass
155 |
156 | try:
157 | t = tuple(shape)
158 | return t
159 | except TypeError:
160 | # shape was not iterable
161 | pass
162 |
163 | raise TypeError('shape must be an int, or a tuple of ints')
164 |
165 | def opp_sliding_window(data_x, data_y, ws, ss): # window size, step size
166 | data_x = sliding_window(data_x, (ws, data_x.shape[1]), (ss, 1))
167 | data_y = np.asarray([[find_label(i)] for i in sliding_window(data_y, ws, ss)])
168 | return data_x.astype(np.float32), data_y.reshape(len(data_y)).astype(np.uint8)
169 |
--------------------------------------------------------------------------------
/data_preprocess/data_preprocess_ucr.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import torch
4 | import pandas as pd
5 | import math
6 | import random
7 | from datetime import datetime
8 | import pickle
9 | from scipy.io.arff import loadarff
10 | from sklearn.preprocessing import StandardScaler, MinMaxScaler
11 |
12 | import torch
13 | from torch.utils.data import DataLoader
14 | from torch.utils.data import Dataset
15 | import os
16 | import numpy as np
17 | from sklearn.model_selection import train_test_split
18 | from data_preprocess.data_preprocess_utils import get_sample_weights, train_test_val_split
19 | from data_preprocess.base_loader import base_loader
20 |
21 | ucr_list = ['MoteStrain', 'ScreenType', 'MelbournePedestrian', 'RefrigerationDevices', 'PigArtPressure', 'SemgHandSubjectCh2', 'Car', 'HandOutlines', 'NonInvasiveFetalECGThorax2', 'FreezerRegularTrain', 'ArrowHead', 'FreezerSmallTrain', 'ECG200', 'ChlorineConcentration', 'CricketZ', 'CricketX', 'EOGHorizontalSignal', 'DiatomSizeReduction', 'Herring', 'Missing_value_and_variable_length_datasets_adjusted', 'SonyAIBORobotSurface2', 'PickupGestureWiimoteZ', 'ACSF1', 'EOGVerticalSignal', 'Rock', 'FiftyWords', 'ShakeGestureWiimoteZ', 'Symbols', 'ECGFiveDays', 'ProximalPhalanxTW', 'ProximalPhalanxOutlineAgeGroup', 'SyntheticControl', 'Wafer', 'Worms', 'BME', 'MiddlePhalanxTW', 'InsectWingbeatSound', 'UWaveGestureLibraryX', 'Coffee', 'TwoPatterns', 'ShapeletSim', 'Crop', 'AllGestureWiimoteY', 'PigAirwayPressure', 'Meat', 'StarLightCurves', 'UWaveGestureLibraryY', 'PhalangesOutlinesCorrect', 'DistalPhalanxOutlineCorrect', 'Earthquakes', 'CBF', 'Chinatown', 'AllGestureWiimoteZ', 'LargeKitchenAppliances', 'SmoothSubspace', 'GestureMidAirD2', 'MiddlePhalanxOutlineAgeGroup', 'ShapesAll', 'Computers', 'TwoLeadECG', 'DistalPhalanxTW', 'GestureMidAirD3', 'Lightning2', 'ProximalPhalanxOutlineCorrect', 'Plane', 'FacesUCR', 'DodgerLoopGame', 'ItalyPowerDemand', 'CinCECGTorso', 'GunPoint', 'MixedShapesSmallTrain', 'Fungi', 'MiddlePhalanxOutlineCorrect', 'Adiac', 'Phoneme', 'ElectricDevices', 'CricketY', 'NonInvasiveFetalECGThorax1', 'UWaveGestureLibraryZ', 'Yoga', 'BeetleFly', 'Fish', 'ToeSegmentation2', 'MedicalImages', 'Trace', 'GunPointAgeSpan', 'Beef', 'MixedShapesRegularTrain', 'SonyAIBORobotSurface1', 'FaceFour', 'PLAID', 'GesturePebbleZ2', 'OliveOil', 'ToeSegmentation1', 'SemgHandGenderCh2', 'FordB', 'Strawberry', 'Lightning7', 'UWaveGestureLibraryAll', 'InsectEPGSmallTrain', 'SwedishLeaf', 'BirdChicken', 'HouseTwenty', 'FordA', 'DistalPhalanxOutlineAgeGroup', 'InlineSkate', 'SmallKitchenAppliances', 'PigCVP', 'Mallat', 'GestureMidAirD1', 'WormsTwoClass', 'ECG5000', 'GunPointOldVersusYoung', 'Haptics', 'DodgerLoopDay', 'PowerCons', 'EthanolLevel', 'GunPointMaleVersusFemale', 'UMD', 'DodgerLoopWeekend', 'Ham', 'Wine', 'SemgHandMovementCh2', 'FaceAll', 'GesturePebbleZ1', 'AllGestureWiimoteX', 'OSULeaf', 'InsectEPGRegularTrain', 'WordSynonyms', 'MelbournePedestrian', 'PickupGestureWiimoteZ', 'ShakeGestureWiimoteZ', 'AllGestureWiimoteY', 'AllGestureWiimoteZ', 'GestureMidAirD2', 'GestureMidAirD3', 'DodgerLoopGame', 'PLAID', 'GesturePebbleZ2', 'GestureMidAirD1', 'DodgerLoopDay', 'DodgerLoopWeekend', 'GesturePebbleZ1', 'AllGestureWiimoteX']
22 |
23 | class data_loader_ucr(base_loader):
24 | def __init__(self, samples, labels, domains):
25 | super(data_loader_ucr, self).__init__(samples, labels, domains)
26 |
27 | def load_UCR(dataset,use_fft=False):
28 | train_file = os.path.join('./data/UCR', dataset, dataset + "_TRAIN.tsv")
29 | test_file = os.path.join('./data/UCR', dataset, dataset + "_TEST.tsv")
30 | train_df = pd.read_csv(train_file, sep='\t', header=None)
31 | test_df = pd.read_csv(test_file, sep='\t', header=None)
32 | train_array = np.array(train_df)
33 | test_array = np.array(test_df)
34 |
35 | # Move the labels to {0, ..., L-1}
36 | labels = np.unique(train_array[:, 0])
37 | transform = {}
38 | for i, l in enumerate(labels):
39 | transform[l] = i
40 |
41 | train = train_array[:, 1:].astype(np.float64)
42 | train_labels = np.vectorize(transform.get)(train_array[:, 0])
43 | test = test_array[:, 1:].astype(np.float64)
44 | test_labels = np.vectorize(transform.get)(test_array[:, 0])
45 |
46 | # Normalization for non-normalized data
47 | # To keep the amplitude information, we do not normalize values over
48 | # individual time series, but on the whole dataset
49 | if dataset in [
50 | 'AllGestureWiimoteX',
51 | 'AllGestureWiimoteY',
52 | 'AllGestureWiimoteZ',
53 | 'BME',
54 | 'Chinatown',
55 | 'Crop',
56 | 'EOGHorizontalSignal',
57 | 'EOGVerticalSignal',
58 | 'Fungi',
59 | 'GestureMidAirD1',
60 | 'GestureMidAirD2',
61 | 'GestureMidAirD3',
62 | 'GesturePebbleZ1',
63 | 'GesturePebbleZ2',
64 | 'GunPointAgeSpan',
65 | 'GunPointMaleVersusFemale',
66 | 'GunPointOldVersusYoung',
67 | 'HouseTwenty',
68 | 'InsectEPGRegularTrain',
69 | 'InsectEPGSmallTrain',
70 | 'MelbournePedestrian',
71 | 'PickupGestureWiimoteZ',
72 | 'PigAirwayPressure',
73 | 'PigArtPressure',
74 | 'PigCVP',
75 | 'PLAID',
76 | 'PowerCons',
77 | 'Rock',
78 | 'SemgHandGenderCh2',
79 | 'SemgHandMovementCh2',
80 | 'SemgHandSubjectCh2',
81 | 'ShakeGestureWiimoteZ',
82 | 'SmoothSubspace',
83 | 'UMD'
84 | ]:
85 | mean = np.nanmean(train)
86 | std = np.nanstd(train)
87 | train = (train - mean) / std
88 | test = (test - mean) / std
89 |
90 | return train[..., np.newaxis], train_labels, test[..., np.newaxis], test_labels
91 |
92 | def data_generator(args):
93 | x_train, y_train, x_test, y_test = load_UCR(args.dataset)
94 |
95 | x_train, x_test = torch.Tensor(x_train), torch.Tensor(x_test)
96 | if torch.isnan(x_train).sum() > 0:
97 | x_train = torch.nan_to_num(x_train, nan=0.0)
98 | if torch.isnan(x_test).sum() > 0:
99 | x_test = torch.nan_to_num(x_test, nan=0.0)
100 |
101 | unique_y, counts_y = np.unique(y_train, return_counts=True)
102 |
103 | args.n_feature = x_train.shape[-1]
104 | args.len_sw = x_train.shape[-2]
105 | args.n_class = len(unique_y)
106 |
107 | train_set_r = data_loader_ucr(x_train, y_train, y_train)
108 | train_loader_r = DataLoader(train_set_r, batch_size=args.batch_size, shuffle=True, drop_last=True) # , sampler=sampler)
109 | val_set_r = data_loader_ucr(x_test, y_test, y_test)
110 | val_loader_r = DataLoader(val_set_r, batch_size=args.batch_size, shuffle=False)
111 | test_set_r = data_loader_ucr(x_test, y_test, y_test)
112 | test_loader_r = DataLoader(test_set_r, batch_size=args.batch_size, shuffle=False)
113 |
114 | return [train_loader_r], None, test_loader_r
115 |
116 |
117 | def prep_ucr(args):
118 | if args.cases == 'random':
119 | return data_generator(args)
120 |
121 |
122 |
--------------------------------------------------------------------------------
/data_preprocess/data_preprocess_fd.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | import os
4 | from torch.utils.data import Dataset, DataLoader
5 | from data_preprocess.base_loader import base_loader
6 | from data_preprocess.data_preprocess_utils import get_sample_weights, train_test_val_split
7 | from sklearn.model_selection import StratifiedShuffleSplit
8 |
9 | def load_domain_data(domain_idx):
10 | data_dir = './data/FD/'
11 | filename = domain_idx +'.pt'
12 | print(filename)
13 | if os.path.isfile(data_dir + filename) == True:
14 | data = torch.load(data_dir + filename)
15 | x = data['x']
16 | y = data['y']
17 | else:
18 | for domain in ['a', 'b', 'c', 'd']:
19 | all_x, all_y = None, None
20 | for pre in ['train', 'val', 'test']:
21 | filename = pre + '_' + domain + '.pt'
22 | data = torch.load('./data/FD/' + filename)
23 | x = data['samples']
24 | y = data['labels']
25 | print(filename, x.shape, y.shape)
26 | all_x = torch.cat([all_x, x], axis=0) if all_x is not None else x
27 | all_y = torch.cat([all_y, y], axis=0) if all_y is not None else y
28 | unique_y, counts_y = np.unique(all_y, return_counts=True)
29 | # print(x[0, :10])
30 | print(all_x.shape, all_y.shape)
31 | print('y_train label distribution: ', dict(zip(unique_y, counts_y)))
32 | torch.save({'x': all_x, 'y': all_y}, './data/FD/' + domain + '.pt')
33 | data = torch.load(data_dir + domain + '.pt')
34 | x = data['x']
35 | y = data['y']
36 | # print({'a': 0, 'b': 1, 'c': 2, 'd': 3}[domain_idx])
37 | d = torch.Tensor(np.full(y.shape, {'a': 0, 'b': 1, 'c': 2, 'd': 3}[domain_idx], dtype=int))
38 | print(x.shape, y.shape, d.shape)
39 | unique_y, counts_y = np.unique(y, return_counts=True)
40 | print('y label distribution: ', dict(zip(unique_y, counts_y)))
41 | return x, y, d
42 |
43 | def load_domain_data_bd(domain_idx='bd'):
44 | if domain_idx != 'bd':
45 | return 'Error! Domain idx should be bd\n'
46 | data_dir = './data/FD/'
47 | filename = domain_idx + '.pt'
48 | if os.path.isfile(data_dir + filename) == True:
49 | data = torch.load(data_dir + filename)
50 | x = data['x']
51 | y = data['y']
52 | else:
53 | all_x, all_y = None, None
54 | for domain in ['b', 'd']:
55 | filename = domain +'.pt'
56 | print(filename)
57 | if os.path.isfile(data_dir + filename) == True:
58 | data = torch.load(data_dir + filename)
59 | x = data['x']
60 | y = data['y']
61 | sp = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=0)
62 | for selected_index, _ in sp.split(x, y):
63 | x_selected, y_selected = x[selected_index], y[selected_index]
64 | all_x = torch.cat([all_x, x_selected], axis=0) if all_x is not None else x_selected
65 | all_y = torch.cat([all_y, y_selected], axis=0) if all_y is not None else y_selected
66 | unique_y, counts_y = np.unique(all_y, return_counts=True)
67 | print(all_x.shape, all_y.shape)
68 | print('y_train label distribution: ', dict(zip(unique_y, counts_y)))
69 | torch.save({'x': all_x, 'y': all_y}, './data/FD/' + domain_idx + '.pt')
70 | data = torch.load(data_dir + domain_idx + '.pt')
71 | x = data['x']
72 | y = data['y']
73 | d = torch.Tensor(np.full(y.shape, {'a': 0, 'bd': 1, 'c': 2}[domain_idx], dtype=int))
74 | print(x.shape, y.shape, d.shape)
75 | unique_y, counts_y = np.unique(y, return_counts=True)
76 | print('y label distribution: ', dict(zip(unique_y, counts_y)))
77 | return x, y, d
78 |
79 | class data_loader_fd(base_loader):
80 | def __init__(self, samples, labels, domains):
81 | super(data_loader_fd, self).__init__(samples, labels, domains)
82 |
83 | def prep_domains_fd_comb(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0):
84 | # note: for fd dataset with total 4 domains,
85 | source_domain_list = ['a', 'bd', 'c']
86 |
87 | source_domain_list.remove(args.target_domain)
88 |
89 | # source domain data prep
90 | x_win_all, y_win_all, d_win_all = np.array([]), np.array([]), np.array([])
91 | for source_domain in source_domain_list:
92 | print('source_domain:', source_domain)
93 |
94 | if source_domain == 'bd':
95 | x, y, d = load_domain_data_bd(source_domain)
96 | else:
97 | x, y, d = load_domain_data(source_domain)
98 |
99 | x = x.reshape(-1, 5120, 1)
100 | print(" ..after sliding window: inputs {0}, targets {1}".format(x.shape, y.shape))
101 |
102 | x_win_all = np.concatenate((x_win_all, x), axis=0) if x_win_all.size else x
103 | y_win_all = np.concatenate((y_win_all, y), axis=0) if y_win_all.size else y
104 | d_win_all = np.concatenate((d_win_all, d), axis=0) if d_win_all.size else d
105 |
106 | unique_y, counts_y = np.unique(y_win_all, return_counts=True)
107 | print('y_train label distribution: ', dict(zip(unique_y, counts_y)))
108 | weights = 100.0 / torch.Tensor(counts_y)
109 | print('weights of sampler: ', weights)
110 | weights = weights.double()
111 |
112 | sample_weights = get_sample_weights(y_win_all, weights)
113 |
114 | sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)
115 |
116 | data_set = data_loader_fd(x_win_all, y_win_all, d_win_all)
117 | source_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False, drop_last=True, sampler=sampler)
118 | print('source_loader batch: ', len(source_loader))
119 | source_loaders = [source_loader]
120 |
121 | # target domain data prep
122 | print('target_domain:', args.target_domain)
123 | if args.target_domain == 'bd':
124 | x, y, d = load_domain_data_bd(args.target_domain)
125 | else:
126 | x, y, d = load_domain_data(args.target_domain)
127 |
128 | x = x.reshape(-1, 5120, 1)
129 |
130 | print(" ..after sliding window: inputs {0}, targets {1}".format(x.shape, y.shape))
131 |
132 | data_set = data_loader_fd(x, y, d)
133 | target_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False)
134 | print('target_loader batch: ', len(target_loader))
135 | return source_loaders, None, target_loader
136 |
137 | def prep_domains_fd_random(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0):
138 | # note: for fd dataset with total 4 domains,
139 | source_domain_list = ['a', 'b', 'd', 'c']
140 |
141 | # source domain data prep
142 | x_win_all, y_win_all, d_win_all = np.array([]), np.array([]), np.array([])
143 | for source_domain in source_domain_list:
144 | print('source_domain:', source_domain)
145 |
146 | if source_domain == 'bd':
147 | x, y, d = load_domain_data_bd(source_domain)
148 | else:
149 | x, y, d = load_domain_data(source_domain)
150 |
151 | x = x.reshape(-1, 5120, 1)
152 | print(" ..after sliding window: inputs {0}, targets {1}".format(x.shape, y.shape))
153 |
154 | x_win_all = np.concatenate((x_win_all, x), axis=0) if x_win_all.size else x
155 | y_win_all = np.concatenate((y_win_all, y), axis=0) if y_win_all.size else y
156 | d_win_all = np.concatenate((d_win_all, d), axis=0) if d_win_all.size else d
157 |
158 | x_win_train, x_win_val, x_win_test, \
159 | y_win_train, y_win_val, y_win_test, \
160 | d_win_train, d_win_val, d_win_test = train_test_val_split(x_win_all, y_win_all, d_win_all,
161 | split_ratio=args.split_ratio)
162 |
163 | unique_y, counts_y = np.unique(y_win_train, return_counts=True)
164 | print('y_train label distribution: ', dict(zip(unique_y, counts_y)))
165 | weights = 100.0 / torch.Tensor(counts_y)
166 | print('weights of sampler: ', weights)
167 | weights = weights.double()
168 | sample_weights = get_sample_weights(y_win_train, weights)
169 | sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)
170 |
171 | train_set_r = data_loader_fd(x_win_train, y_win_train, d_win_train)
172 | train_loader_r = DataLoader(train_set_r, batch_size=args.batch_size, shuffle=False, drop_last=True, sampler=sampler)
173 | val_set_r = data_loader_fd(x_win_val, y_win_val, d_win_val)
174 | val_loader_r = DataLoader(val_set_r, batch_size=args.batch_size, shuffle=False)
175 | test_set_r = data_loader_fd(x_win_test, y_win_test, d_win_test)
176 | test_loader_r = DataLoader(test_set_r, batch_size=args.batch_size, shuffle=False)
177 |
178 | return [train_loader_r], val_loader_r, test_loader_r
179 |
180 | def prep_eeg(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0):
181 | if args.cases == 'rich_comb':
182 | return prep_domains_fd_comb(args, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP)
183 | if args.cases == 'random':
184 | return prep_domains_fd_random(args, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP)
185 | elif args.cases == '':
186 | pass
187 | else:
188 | return 'Error!\n'
189 |
--------------------------------------------------------------------------------
/data_preprocess/data_preprocess_shar.py:
--------------------------------------------------------------------------------
1 | '''
2 | Data Pre-processing on SHAR dataset.
3 |
4 | '''
5 |
6 | import os
7 | import numpy as np
8 | from torch.utils.data import Dataset, DataLoader
9 | import torch
10 | import pickle as cp
11 | from data_preprocess.data_preprocess_utils import get_sample_weights, train_test_val_split, normalize
12 | import scipy.io
13 | from data_preprocess.base_loader import base_loader
14 |
15 | torch.manual_seed(10)
16 |
17 | def load_domain_data(domain_idx):
18 | """ to load all the data from the specific domain with index domain_idx
19 | :param domain_idx: index of a single domain
20 | :return: X and y data of the entire domain
21 | """
22 | data_dir = './data/UniMiB-SHAR/'
23 | saved_filename = 'shar_domain_' + domain_idx + '_wd.data' # "wd": with domain label
24 |
25 | if os.path.isfile(data_dir + saved_filename) == True:
26 | data = np.load(data_dir + saved_filename, allow_pickle=True)
27 | X = data[0][0]
28 | y = data[0][1]
29 | d = data[0][2]
30 | else:
31 | str_folder = './data/UniMiB-SHAR/data/'
32 | data_all = scipy.io.loadmat(str_folder + 'acc_data.mat')
33 | y_id_all = scipy.io.loadmat(str_folder + 'acc_labels.mat')
34 | y_id_all = y_id_all['acc_labels'] # (11771, 3)
35 |
36 | X_all = data_all['acc_data'] # data: (11771, 453)
37 | y_all = y_id_all[:, 0] - 1 # to map the labels to [0, 16]
38 | id_all = y_id_all[:, 1]
39 |
40 | print('\nProcessing domain {0} files...\n'.format(domain_idx))
41 |
42 | target_idx = np.where(id_all == int(domain_idx))
43 | X = X_all[target_idx]
44 | y = y_all[target_idx]
45 |
46 | domain_idx_map = {'1':0, '2':1, '3':2, '5':3}
47 | domain_idx_int = domain_idx_map[domain_idx]
48 |
49 | d = np.full(y.shape, domain_idx_int, dtype=int)
50 |
51 | print('\nProcessing domain {0} files | X: {1} y: {2} d:{3} \n'.format(domain_idx, X.shape, y.shape, d.shape))
52 | obj = [(X, y, d)]
53 | f = open(os.path.join(data_dir, saved_filename), 'wb')
54 | cp.dump(obj, f, protocol=cp.HIGHEST_PROTOCOL)
55 | f.close()
56 | return X, y, d
57 |
58 | def load_domain_data_large(domain_idx):
59 | """ to load all the data from the specific domain
60 | :param domain_idx:
61 | :return: X and y data of the entire domain
62 | """
63 | data_dir = './data/UniMiB-SHAR/'
64 | saved_filename = 'shar_domain_' + domain_idx + '_wd.data' # with domain label
65 |
66 | if os.path.isfile(data_dir + saved_filename) == True:
67 | data = np.load(data_dir + saved_filename, allow_pickle=True)
68 | X = data[0][0]
69 | y = data[0][1]
70 | d = data[0][2]
71 | else:
72 | str_folder = './data/UniMiB-SHAR/data/'
73 | data_all = scipy.io.loadmat(str_folder + 'acc_data.mat')
74 | y_id_all = scipy.io.loadmat(str_folder + 'acc_labels.mat')
75 | y_id_all = y_id_all['acc_labels'] # (11771, 3)
76 |
77 | X_all = data_all['acc_data'] # data: (11771, 453)
78 | y_all = y_id_all[:, 0] - 1 # to map the labels to [0, 16]
79 | id_all = y_id_all[:, 1]
80 |
81 | print('\nProcessing domain {0} files...\n'.format(domain_idx))
82 |
83 | target_idx = np.where(id_all == int(domain_idx))
84 | X = X_all[target_idx]
85 | y = y_all[target_idx]
86 | # note: to change domain ID
87 | # source_domain_list = ['1', '2', '3', '5', '6', '9',
88 | # '11', '13', '14', '15', '16', '17', '19', '20',
89 | # '21', '22', '23', '24', '25', '29']
90 | domain_idx_map = {'1':0, '2':1, '3':2, '5':3, '6':4, '9':5,
91 | '11':6, '13':7, '14':8, '15':9, '16':10, '17':11, '19':12, '20':13,
92 | '21':14, '22':15, '23':16, '24':17, '25':18, '29':19}
93 | domain_idx_int = domain_idx_map[domain_idx]
94 |
95 | d = np.full(y.shape, domain_idx_int, dtype=int)
96 |
97 | print('\nProcessing domain {0} files | X: {1} y: {2} d:{3} \n'.format(domain_idx, X.shape, y.shape, d.shape))
98 |
99 | obj = [(X, y, d)]
100 | f = open(os.path.join(data_dir, saved_filename), 'wb')
101 | cp.dump(obj, f, protocol=cp.HIGHEST_PROTOCOL)
102 | f.close()
103 | return X, y, d
104 |
105 |
106 | class data_loader_shar(base_loader):
107 | def __init__(self, samples, labels, domains):
108 | super(data_loader_shar, self).__init__(samples, labels, domains)
109 |
110 | # def __getitem__(self, index):
111 | # sample, target, domain = self.samples[index], self.labels[index], self.domains[index]
112 | # sample = normalize(sample)
113 | # return sample, target, domain
114 |
115 |
116 | def prep_domains_shar_subject(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0):
117 |
118 | # info: for SHAR dataset, the following domains have incomplete classes: 4,7,8,10
119 | source_domain_list = ['1', '2', '3', '5']
120 | source_domain_list.remove(args.target_domain)
121 |
122 | # source domain data prep
123 | x_win_all, y_win_all, d_win_all = np.array([]), np.array([]), np.array([])
124 | for source_domain in source_domain_list:
125 | print('source_domain:', source_domain)
126 | x, y, d = load_domain_data(source_domain)
127 |
128 | x = x.reshape(-1, 151, 3)
129 | print(" ..after sliding window: inputs {0}, targets {1}".format(x.shape, y.shape))
130 |
131 | x_win_all = np.concatenate((x_win_all, x), axis=0) if x_win_all.size else x
132 | y_win_all = np.concatenate((y_win_all, y), axis=0) if y_win_all.size else y
133 | d_win_all = np.concatenate((d_win_all, d), axis=0) if d_win_all.size else d
134 |
135 | unique_y, counts_y = np.unique(y_win_all, return_counts=True)
136 | print('y_train label distribution: ', dict(zip(unique_y, counts_y)))
137 | weights = 100.0 / torch.Tensor(counts_y)
138 | print('weights of sampler: ', weights)
139 | weights = weights.double()
140 |
141 | sample_weights = get_sample_weights(y_win_all, weights)
142 |
143 | sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=sample_weights,
144 | num_samples=len(sample_weights), replacement=True)
145 |
146 | data_set = data_loader_shar(x_win_all, y_win_all, d_win_all)
147 | source_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=True, drop_last=True) # , sampler=sampler)
148 | print('source_loader batch: ', len(source_loader))
149 | source_loaders = [source_loader]
150 |
151 | # target domain data prep
152 | print('target_domain:', args.target_domain)
153 | x, y, d = load_domain_data(args.target_domain)
154 |
155 | x = x.reshape(-1, 151, 3)
156 |
157 | print(" ..after sliding window: inputs {0}, targets {1}".format(x.shape, y.shape))
158 |
159 | unique_y, counts_y = np.unique(y, return_counts=True)
160 | print('y_train label distribution: ', dict(zip(unique_y, counts_y)))
161 | weights = 100.0 / torch.Tensor(counts_y)
162 | print('weights of sampler: ', weights)
163 |
164 | data_set = data_loader_shar(x, y, d)
165 | # shuffle is forced to be False when sampler is available
166 | target_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False)
167 | print('target_loader batch: ', len(target_loader))
168 | return source_loaders, None, target_loader
169 |
170 |
171 | def prep_domains_shar_subject_large(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0):
172 | # note: for SHAR dataset with total 30 domains,
173 | # note: for SHAR dataset, the following domains have incomplete classes: 4, 7, 8, 10, 12, 18, 26, 27, 28, 30
174 | source_domain_list = ['1', '2', '3', '5', '6', '9',
175 | '11', '13', '14', '15', '16', '17', '19', '20',
176 | '21', '22', '23', '24', '25', '29']
177 |
178 | source_domain_list.remove(args.target_domain)
179 |
180 | # source domain data prep
181 | x_win_all, y_win_all, d_win_all = np.array([]), np.array([]), np.array([])
182 | for source_domain in source_domain_list:
183 | print('source_domain:', source_domain)
184 | # todo: index change of domain ID is different from smaller indices; can be combined to a function when time is more available
185 | x, y, d = load_domain_data_large(source_domain)
186 |
187 | x = x.reshape(-1, 151, 3)
188 | print(" ..after sliding window: inputs {0}, targets {1}".format(x.shape, y.shape))
189 |
190 | x_win_all = np.concatenate((x_win_all, x), axis=0) if x_win_all.size else x
191 | y_win_all = np.concatenate((y_win_all, y), axis=0) if y_win_all.size else y
192 | d_win_all = np.concatenate((d_win_all, d), axis=0) if d_win_all.size else d
193 |
194 | unique_y, counts_y = np.unique(y_win_all, return_counts=True)
195 | print('y_train label distribution: ', dict(zip(unique_y, counts_y)))
196 | weights = 100.0 / torch.Tensor(counts_y)
197 | print('weights of sampler: ', weights)
198 | weights = weights.double()
199 |
200 | sample_weights = get_sample_weights(y_win_all, weights)
201 |
202 | sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)
203 |
204 | data_set = data_loader_shar(x_win_all, y_win_all, d_win_all)
205 | source_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False, drop_last=True, sampler=sampler)
206 | print('source_loader batch: ', len(source_loader))
207 | source_loaders = [source_loader]
208 |
209 | # target domain data prep
210 | print('target_domain:', args.target_domain)
211 | x, y, d = load_domain_data_large(args.target_domain)
212 |
213 | x = x.reshape(-1, 151, 3)
214 |
215 | print(" ..after sliding window: inputs {0}, targets {1}".format(x.shape, y.shape))
216 |
217 | data_set = data_loader_shar(x, y, d)
218 | target_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False)
219 | print('target_loader batch: ', len(target_loader))
220 | return source_loaders, None, target_loader
221 |
222 | def prep_shar(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0):
223 | if args.cases == 'subject':
224 | return prep_domains_shar_subject(args, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP)
225 | elif args.cases == 'subject_large':
226 | return prep_domains_shar_subject_large(args, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP)
227 | else:
228 | return 'Error!\n'
229 |
230 |
--------------------------------------------------------------------------------
/data_preprocess/data_preprocess_ucihar.py:
--------------------------------------------------------------------------------
1 | '''
2 | Data Pre-processing on UCIHAR dataset.
3 |
4 | '''
5 |
6 | import os
7 | import numpy as np
8 | from torch.utils.data import Dataset, DataLoader
9 | from torchvision import transforms
10 | import torch
11 | import pickle as cp
12 | from data_preprocess.data_preprocess_utils import get_sample_weights, train_test_val_split
13 | from data_preprocess.base_loader import base_loader
14 |
15 | def format_data_x(datafile):
16 | x_data = None
17 | for item in datafile:
18 | item_data = np.loadtxt(item, dtype=np.float)
19 | if x_data is None:
20 | x_data = np.zeros((len(item_data), 1))
21 | x_data = np.hstack((x_data, item_data))
22 | x_data = x_data[:, 1:]
23 | print('x_data.shape:', x_data.shape)
24 | X = None
25 | for i in range(len(x_data)):
26 | row = np.asarray(x_data[i, :])
27 | row = row.reshape(9, 128).T
28 | if X is None:
29 | X = np.zeros((len(x_data), 128, 9))
30 | X[i] = row
31 | print('X.shape:', X.shape)
32 | return X
33 |
34 | def format_data_y(datafile):
35 | data = np.loadtxt(datafile, dtype=np.int) - 1
36 | return data
37 |
38 | def load_domain_data(domain_idx):
39 | """ to load all the data from the specific domain with index domain_idx
40 | :param domain_idx: index of a single domain
41 | :return: X and y data of the entire domain
42 | """
43 | # data_dir = './data/ucihar/'
44 | data_dir = '/home/tiantian/PCL/data/ucihar/'
45 | saved_filename = 'ucihar_domain_' + domain_idx + '_wd.data' # "wd": with domain label
46 |
47 | if os.path.isfile(data_dir + saved_filename) == True:
48 | data = np.load(data_dir + saved_filename, allow_pickle=True)
49 | X = data[0][0]
50 | y = data[0][1]
51 | d = data[0][2]
52 | else:
53 | if os.path.isdir(data_dir) == False:
54 | os.makedirs(data_dir)
55 | str_folder = './data/UCI HAR Dataset/'
56 | INPUT_SIGNAL_TYPES = [
57 | "body_acc_x_",
58 | "body_acc_y_",
59 | "body_acc_z_",
60 | "body_gyro_x_",
61 | "body_gyro_y_",
62 | "body_gyro_z_",
63 | "total_acc_x_",
64 | "total_acc_y_",
65 | "total_acc_z_"
66 | ]
67 | str_train_files = [str_folder + 'train/' + 'Inertial Signals/' + item + 'train.txt' for item in INPUT_SIGNAL_TYPES]
68 | str_test_files = [str_folder + 'test/' + 'Inertial Signals/' + item + 'test.txt' for item in INPUT_SIGNAL_TYPES]
69 | str_train_y = str_folder + 'train/y_train.txt'
70 | str_test_y = str_folder + 'test/y_test.txt'
71 |
72 | str_train_id = str_folder + 'train/subject_train.txt'
73 | str_test_id = str_folder + 'test/subject_test.txt'
74 |
75 | X_train = format_data_x(str_train_files)
76 | X_test = format_data_x(str_test_files)
77 | Y_train = format_data_y(str_train_y)
78 | Y_test = format_data_y(str_test_y)
79 | id_train = format_data_y(str_train_id) # origin: array([ 0, 2, 4, 5, 6, 7, 10, 13, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 29])
80 | id_test = format_data_y(str_test_id) # origin: array([ 1, 3, 8, 9, 11, 12, 17, 19, 23])
81 |
82 | X_all = np.concatenate((X_train, X_test), axis=0)
83 | y_all = np.concatenate((Y_train, Y_test), axis=0)
84 | id_all = np.concatenate((id_train, id_test), axis=0)
85 |
86 | print('\nProcessing domain {0} files...\n'.format(domain_idx))
87 | target_idx = np.where(id_all == int(domain_idx))
88 | X = X_all[target_idx]
89 | y = y_all[target_idx]
90 | d = np.full(y.shape, int(domain_idx), dtype=int)
91 | print('\nProcessing domain {0} files | X: {1} y: {2} d:{3} \n'.format(domain_idx, X.shape, y.shape, d.shape))
92 |
93 | obj = [(X, y, d)]
94 | f = open(os.path.join(data_dir, saved_filename), 'wb')
95 | cp.dump(obj, f, protocol=cp.HIGHEST_PROTOCOL)
96 | f.close()
97 | return X, y, d
98 |
99 | class data_loader_ucihar(base_loader):
100 | def __init__(self, samples, labels, domains, t):
101 | super(data_loader_ucihar, self).__init__(samples, labels, domains)
102 | self.T = t
103 |
104 | def __getitem__(self, index):
105 | sample, target, domain = self.samples[index], self.labels[index], self.domains[index]
106 | sample = self.T(sample)
107 | return np.squeeze(np.transpose(sample, (1, 0, 2))), target, domain
108 |
109 |
110 | def prep_domains_ucihar_subject(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0):
111 | # todo: make the domain IDs as arguments or a function with args to select the IDs (default, customized, small, etc)
112 | source_domain_list = ['0', '1', '2', '3', '4']
113 |
114 | source_domain_list.remove(args.target_domain)
115 |
116 | # source domain data prep
117 | x_win_all, y_win_all, d_win_all = np.array([]), np.array([]), np.array([])
118 | for source_domain in source_domain_list:
119 | print('source_domain:', source_domain)
120 | x, y, d = load_domain_data(source_domain)
121 |
122 | # n_channel should be 9, H: 1, W:128
123 | x = np.transpose(x.reshape((-1, 1, 128, 9)), (0, 2, 1, 3))
124 | # the UCIHAR dataset is segmented by sliding window as default
125 | print(" ..after sliding window: inputs {0}, targets {1}".format(x.shape, y.shape))
126 |
127 | x_win_all = np.concatenate((x_win_all, x), axis=0) if x_win_all.size else x
128 | y_win_all = np.concatenate((y_win_all, y), axis=0) if y_win_all.size else y
129 | d_win_all = np.concatenate((d_win_all, d), axis=0) if d_win_all.size else d
130 |
131 | unique_y, counts_y = np.unique(y_win_all, return_counts=True)
132 | print('y_train label distribution: ', dict(zip(unique_y, counts_y)))
133 | weights = 100.0 / torch.Tensor(counts_y)
134 | print('weights of sampler: ', weights)
135 | weights = weights.double()
136 |
137 | sample_weights = get_sample_weights(y_win_all, weights)
138 |
139 | sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=sample_weights,
140 | num_samples=len(sample_weights), replacement=True)
141 | transform = transforms.Compose([
142 | transforms.ToTensor(),
143 | transforms.Normalize(mean=(0, 0, 0, 0, 0, 0, 0, 0, 0), std=(1, 1, 1, 1, 1, 1, 1, 1, 1))
144 | ])
145 |
146 | data_set = data_loader_ucihar(x_win_all, y_win_all, d_win_all, transform)
147 | source_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False, drop_last=True, sampler=sampler)
148 | print('source_loader batch: ', len(source_loader))
149 | source_loaders = [source_loader]
150 |
151 | # target domain data prep
152 | print('target_domain:', args.target_domain)
153 | x, y, d = load_domain_data(args.target_domain)
154 |
155 | x = np.transpose(x.reshape((-1, 1, 128, 9)), (0, 2, 1, 3))
156 |
157 | print(" ..after sliding window: inputs {0}, targets {1}".format(x.shape, y.shape))
158 |
159 | data_set = data_loader_ucihar(x, y, d, transform)
160 | target_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False)
161 |
162 | print('target_loader batch: ', len(target_loader))
163 | return source_loaders, None, target_loader
164 |
165 | def prep_domains_ucihar_subject_large(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0):
166 | source_domain_list = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29']
167 | source_domain_list.remove(args.target_domain)
168 |
169 | # source domain data prep
170 | x_win_all, y_win_all, d_win_all = np.array([]), np.array([]), np.array([])
171 | for source_domain in source_domain_list:
172 | print('source_domain:', source_domain)
173 | x, y, d = load_domain_data(source_domain)
174 |
175 | # n_channel should be 9, H: 1, W:128
176 | x = np.transpose(x.reshape((-1, 1, 128, 9)), (0, 2, 1, 3))
177 | # the UCIHAR dataset is segmented by sliding window as default
178 | print(" ..after sliding window: inputs {0}, targets {1}".format(x.shape, y.shape))
179 |
180 | x_win_all = np.concatenate((x_win_all, x), axis=0) if x_win_all.size else x
181 | y_win_all = np.concatenate((y_win_all, y), axis=0) if y_win_all.size else y
182 | d_win_all = np.concatenate((d_win_all, d), axis=0) if d_win_all.size else d
183 |
184 | unique_y, counts_y = np.unique(y_win_all, return_counts=True)
185 | print('y_train label distribution: ', dict(zip(unique_y, counts_y)))
186 | weights = 100.0 / torch.Tensor(counts_y)
187 | print('weights of sampler: ', weights)
188 | weights = weights.double()
189 |
190 | sample_weights = get_sample_weights(y_win_all, weights)
191 |
192 | sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)
193 | transform = transforms.Compose([
194 | transforms.ToTensor(),
195 | transforms.Normalize(mean=(0, 0, 0, 0, 0, 0, 0, 0, 0), std=(1, 1, 1, 1, 1, 1, 1, 1, 1))
196 | ])
197 |
198 | data_set = data_loader_ucihar(x_win_all, y_win_all, d_win_all, transform)
199 | source_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False, drop_last=True, sampler=sampler)
200 | print('source_loader batch: ', len(source_loader))
201 | source_loaders = [source_loader]
202 |
203 | # target domain data prep
204 | print('target_domain:', args.target_domain)
205 | x, y, d = load_domain_data(args.target_domain)
206 |
207 | x = np.transpose(x.reshape((-1, 1, 128, 9)), (0, 2, 1, 3))
208 | print(" ..after sliding window: inputs {0}, targets {1}".format(x.shape, y.shape))
209 |
210 | data_set = data_loader_ucihar(x, y, d, transform)
211 | # todo: the batch size can be different for some ttt models, tbc
212 | target_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False)
213 | print('target_loader batch: ', len(target_loader))
214 |
215 | return source_loaders, None, target_loader
216 |
217 |
218 | def prep_domains_ucihar_random(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0):
219 | source_domain_list = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29']
220 |
221 | x_win_all, y_win_all, d_win_all = np.array([]), np.array([]), np.array([])
222 | n_train, n_test, split_ratio = [], 0, 0.0
223 |
224 | for source_domain in source_domain_list:
225 | # print('source_domain:', source_domain)
226 | x_win, y_win, d_win = load_domain_data(source_domain)
227 |
228 | # n_channel should be 9, H: 1, W:128
229 | x_win = np.transpose(x_win.reshape((-1, 1, 128, 9)), (0, 2, 1, 3))
230 | # print(" ..after sliding window: inputs {0}, targets {1}".format(x_win.shape, y_win.shape))
231 |
232 | x_win_all = np.concatenate((x_win_all, x_win), axis=0) if x_win_all.size else x_win
233 | y_win_all = np.concatenate((y_win_all, y_win), axis=0) if y_win_all.size else y_win
234 | d_win_all = np.concatenate((d_win_all, d_win), axis=0) if d_win_all.size else d_win
235 | n_train.append(x_win.shape[0])
236 |
237 | x_win_train, x_win_val, x_win_test, \
238 | y_win_train, y_win_val, y_win_test, \
239 | d_win_train, d_win_val, d_win_test = train_test_val_split(x_win_all, y_win_all, d_win_all, split_ratio=args.split_ratio)
240 |
241 | print(x_win_train.shape)
242 | unique_y, counts_y = np.unique(y_win_train, return_counts=True)
243 | print('y_train label distribution: ', dict(zip(unique_y, counts_y)))
244 | weights = 100.0 / torch.Tensor(counts_y)
245 | print('weights of sampler: ', weights)
246 | weights = weights.double()
247 | sample_weights = get_sample_weights(y_win_train, weights)
248 | sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)
249 |
250 | transform = transforms.Compose([
251 | transforms.ToTensor(),
252 | transforms.Normalize(mean=(0, 0, 0, 0, 0, 0, 0, 0, 0), std=(1, 1, 1, 1, 1, 1, 1, 1, 1))
253 | ])
254 | train_set_r = data_loader_ucihar(x_win_train, y_win_train, d_win_train, transform)
255 | train_loader_r = DataLoader(train_set_r, batch_size=args.batch_size, shuffle=False, drop_last=True, sampler=sampler)
256 | val_set_r = data_loader_ucihar(x_win_val, y_win_val, d_win_val, transform)
257 | val_loader_r = DataLoader(val_set_r, batch_size=args.batch_size, shuffle=False)
258 | test_set_r = data_loader_ucihar(x_win_test, y_win_test, d_win_test, transform)
259 | test_loader_r = DataLoader(test_set_r, batch_size=args.batch_size, shuffle=False)
260 |
261 | return [train_loader_r], val_loader_r, test_loader_r
262 |
263 |
264 | def prep_ucihar(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0):
265 | if args.cases == 'random':
266 | return prep_domains_ucihar_random(args, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP)
267 | elif args.cases == 'subject':
268 | return prep_domains_ucihar_subject(args, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP)
269 | elif args.cases == 'subject_large':
270 | return prep_domains_ucihar_subject_large(args, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP)
271 | elif args.cases == '':
272 | pass
273 | else:
274 | return 'Error! Unknown args.cases!\n'
275 |
276 |
--------------------------------------------------------------------------------
/data_preprocess/data_preprocess_wisdm.py:
--------------------------------------------------------------------------------
1 | # encoding=utf-8
2 | """
3 | Created on 10:38 2018/12/17
4 | @author: Hangwei Qian
5 | Adapted from: https://github.com/guillaume-chevalier/HAR-stacked-residual-bidir-LSTMs
6 | """
7 |
8 | import os
9 | import numpy as np
10 | import torch
11 | import pickle as cp
12 | from pandas import Series
13 | import zipfile
14 | import argparse
15 | from io import BytesIO
16 | from torch.utils.data import Dataset, DataLoader
17 | from torchvision import transforms
18 | from data_preprocess.data_preprocess_utils import get_sample_weights, train_test_val_split, opp_sliding_window_w_d
19 | from sklearn.model_selection import StratifiedShuffleSplit
20 |
21 | torch.manual_seed(10)
22 |
23 | NUM_FEATURES = 3
24 |
25 | class data_loader_wisdm(Dataset):
26 | def __init__(self, samples, labels, domains):
27 | self.samples = samples
28 | self.labels = labels
29 | self.domains = domains
30 |
31 | def __getitem__(self, index):
32 | sample, target, domain = self.samples[index], self.labels[index], self.domains[index]
33 | return sample, target, domain
34 |
35 | def __len__(self):
36 | return len(self.samples)
37 |
38 | def load_domain_data(domain_idx):
39 | """ to load all the data from the specific domain
40 | :param domain_idx:
41 | :return: X and y data of the entire domain
42 | """
43 | data_dir = './data/WISDM_ar_v1.1/'
44 | saved_filename = 'wisdm_domain_' + domain_idx + '_wd.data'
45 | if os.path.isfile(data_dir + saved_filename) == True:
46 | data = np.load(data_dir + saved_filename, allow_pickle=True)
47 | X = data[0][0]
48 | y = data[0][1]
49 | d = data[0][2]
50 | else:
51 | str_folder = './data/WISDM_ar_v1.1/'
52 | data_all = np.genfromtxt(str_folder + 'WISDM_ar_v1.1_raw_hangwei_v2.txt', delimiter=',', usecols=[0,1,3,4,5])
53 |
54 | X_all = data_all[:, 2:] # data: (1098209, 3)
55 | y_all = data_all[:, 1] - 1 # to map the labels from [1,...,6] to [0, 5]
56 | id_all = data_all[:, 0]
57 |
58 |
59 | print('\nProcessing domain {0} files...\n'.format(domain_idx))
60 | target_idx = np.where(id_all == int(domain_idx))
61 | X = X_all[target_idx]
62 | y = y_all[target_idx]
63 |
64 | # change the domain index from string ['1','2','3','4',...]to [0,1,2,3,4]
65 | # todo if further change
66 | domain_idx_map = {'1':0, '3':1, '5':2, '6':3, '7':4, '8':5,
67 | '12':6, '13':7, '18':8, '19':9, '20':10,
68 | '21':11, '24':12, '27':13, '29':14,
69 | '31':15, '32':16, '33':17, '34':18, '36':19}
70 | # domain_idx_now = int(domain_idx[-1])
71 | # if domain_idx_now < 10:
72 | # domain_idx_int = domain_idx_now - 1
73 | # else:
74 | # domain_idx_int = domain_idx_now - 1
75 | domain_idx_int = domain_idx_map[domain_idx]
76 |
77 | d = np.full(y.shape, domain_idx_int, dtype=int)
78 | print('\nProcessing domain {0} files | X: {1} y: {2} d:{3} \n'.format(domain_idx, X.shape, y.shape, d.shape))
79 |
80 | obj = [(X, y, d)]
81 | # file is not supported in python3, use open instead, by hangwei
82 | f = open(os.path.join(data_dir, saved_filename), 'wb')
83 | cp.dump(obj, f, protocol=cp.HIGHEST_PROTOCOL)
84 | f.close()
85 | return X, y, d
86 |
87 |
88 |
89 | def prep_domains_wisdm_subject(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0):
90 |
91 | # hangwei: for wisdm data, total domains is [1,..., 36]
92 | # complete data domains source_domain_list = ['1', '3', '5','6','7','8',
93 | # '12', '13','18','19','20',
94 | # '21', '24','27', '29',
95 | # '31', '32', '33', '34','36']
96 |
97 | # source_domain_list = ['1', '2', '3', '4', '5','6','7','8','9','10',
98 | # '11', '12', '13', '14', '15','16','17','18','19','20',
99 | # '21', '22', '23', '24', '25','26','27','28','29','30',
100 | # '31', '32', '33', '34', '35','36']
101 | source_domain_list = ['1', '3', '5', '6', '7', '8',
102 | '12', '13', '18', '19', '20',
103 | '21', '24', '27', '29',
104 | '31', '32', '33', '34', '36']
105 | source_domain_list.remove(args.target_domain)
106 |
107 | # source domain data prep
108 | source_loaders = []
109 | x_win_all, y_win_all, d_win_all = np.array([]), np.array([]), np.array([])
110 | for source_domain in source_domain_list:
111 | print('source_domain:', source_domain)
112 | x, y, d = load_domain_data(source_domain)
113 | y = y.astype(int)
114 | x_win, y_win, d_win = opp_sliding_window_w_d(x, y, d, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP)
115 | print(" ..after sliding window: inputs {0}, targets {1}".format(x_win.shape, y_win.shape))
116 | x_win_all = np.concatenate((x_win_all, x_win), axis=0) if x_win_all.size else x_win
117 | y_win_all = np.concatenate((y_win_all, y_win), axis=0) if y_win_all.size else y_win
118 | d_win_all = np.concatenate((d_win_all, d_win), axis=0) if d_win_all.size else d_win
119 |
120 | # get the info of the dataset, by hangwei. 1.15.2019
121 | unique_y, counts_y = np.unique(y_win_all, return_counts=True)
122 | print('y_train label distribution: ', dict(zip(unique_y, counts_y)))
123 | weights = 100.0 / torch.Tensor(counts_y)
124 | print('weights of sampler: ', weights)
125 | weights = weights.double()
126 |
127 | # updated by hangwei: sample_weights = weights[y_win]
128 | sample_weights = get_sample_weights(y_win, weights)
129 |
130 | sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=sample_weights,
131 | num_samples=len(sample_weights), replacement=True)
132 | data_set = data_loader_wisdm(x_win, y_win, d_win)
133 | source_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False, drop_last=True, sampler=sampler)
134 | print('source_loader batch: ', len(source_loader))
135 | source_loaders.append(source_loader)
136 |
137 | # target domain data prep
138 | print('target_domain:', args.target_domain)
139 | x, y, d = load_domain_data(args.target_domain)
140 | y = y.astype(int)
141 | x_win, y_win, d_win = opp_sliding_window_w_d(x, y, d, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP)
142 | print(" ..after sliding window: inputs {0}, targets {1}".format(x_win.shape, y_win.shape))
143 |
144 | data_set = data_loader_wisdm(x_win, y_win, d_win)
145 | # padsequence() to deal with varying length input of each data example
146 | # shuffle is forced to be False when sampler is available
147 | target_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False)
148 | print('target_loader batch: ', len(target_loader))
149 |
150 | return source_loaders, None, target_loader
151 |
152 | def prep_domains_wisdm_subject_small(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0):
153 |
154 | # hangwei: for wisdm data, total domains is [1,..., 36]
155 | # complete data domains source_domain_list = ['1', '3', '5','6','7','8',
156 | # '12', '13','18','19','20',
157 | # '21', '24','27', '29',
158 | # '31', '32', '33', '34','36']
159 |
160 | # source_domain_list = ['1', '2', '3', '4', '5','6','7','8','9','10',
161 | # '11', '12', '13', '14', '15','16','17','18','19','20',
162 | # '21', '22', '23', '24', '25','26','27','28','29','30',
163 | # '31', '32', '33', '34', '35','36']
164 | source_domain_list = ['20', '31', '8', '12', '13']
165 | source_domain_list.remove(args.target_domain)
166 |
167 | # source domain data prep
168 | source_loaders = []
169 | x_win_all, y_win_all, d_win_all = np.array([]), np.array([]), np.array([])
170 | for source_domain in source_domain_list:
171 | print('source_domain:', source_domain)
172 | x, y, d = load_domain_data(source_domain)
173 | y = y.astype(int)
174 | x_win, y_win, d_win = opp_sliding_window_w_d(x, y, d, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP)
175 | print(" ..after sliding window: inputs {0}, targets {1}".format(x_win.shape, y_win.shape))
176 | x_win_all = np.concatenate((x_win_all, x_win), axis=0) if x_win_all.size else x_win
177 | y_win_all = np.concatenate((y_win_all, y_win), axis=0) if y_win_all.size else y_win
178 | d_win_all = np.concatenate((d_win_all, d_win), axis=0) if d_win_all.size else d_win
179 |
180 | # get the info of the dataset, by hangwei. 1.15.2019
181 | unique_y, counts_y = np.unique(y_win_all, return_counts=True)
182 | print('y_train label distribution: ', dict(zip(unique_y, counts_y)))
183 | weights = 100.0 / torch.Tensor(counts_y)
184 | print('weights of sampler: ', weights)
185 | weights = weights.double()
186 |
187 | # updated by hangwei: sample_weights = weights[y_win]
188 | sample_weights = get_sample_weights(y_win, weights)
189 |
190 | sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=sample_weights,
191 | num_samples=len(sample_weights), replacement=True)
192 | data_set = data_loader_wisdm(x_win, y_win, d_win)
193 | source_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False, drop_last=True, sampler=sampler)
194 | print('source_loader batch: ', len(source_loader))
195 | source_loaders.append(source_loader)
196 |
197 | # target domain data prep
198 | print('target_domain:', args.target_domain)
199 | x, y, d = load_domain_data(args.target_domain)
200 | y = y.astype(int)
201 | x_win, y_win, d_win = opp_sliding_window_w_d(x, y, d, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP)
202 | print(" ..after sliding window: inputs {0}, targets {1}".format(x_win.shape, y_win.shape))
203 |
204 | data_set = data_loader_wisdm(x_win, y_win, d_win)
205 | # padsequence() to deal with varying length input of each data example
206 | # shuffle is forced to be False when sampler is available
207 | target_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False)
208 | print('target_loader batch: ', len(target_loader))
209 |
210 | return source_loaders, None, target_loader
211 |
212 |
213 |
214 | def prep_domains_wisdm_random(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0):
215 |
216 | # hangwei: for wisdm data, total domains is [1,..., 36]
217 | # complete data domains source_domain_list = ['1', '3', '5','6','7','8',
218 | # '12', '13','18','19','20',
219 | # '21', '24','27', '29',
220 | # '31', '32', '33', '34','36']
221 | # source_domain_list = ['1', '2', '3', '4', '5','6','7','8','9','10',
222 | # '11', '12', '13', '14', '15','16','17','18','19','20',
223 | # '21', '22', '23', '24', '25','26','27','28','29','30',
224 | # '31', '32', '33', '34', '35','36']
225 | source_domain_list = ['1', '3', '5', '6', '7', '8',
226 | '12', '13', '18', '19', '20',
227 | '21', '24', '27', '29',
228 | '31', '32', '33', '34', '36']
229 | # source_domain_list.remove(args.target_domain)
230 |
231 | x_win_all, y_win_all, d_win_all = np.array([]), np.array([]), np.array([])
232 | n_train, n_test, split_ratio = [], 0, 0.0
233 |
234 | for source_domain in source_domain_list:
235 | print('source_domain:', source_domain)
236 | x, y, d = load_domain_data(source_domain)
237 | y = y.astype(int)
238 | x_win, y_win, d_win = opp_sliding_window_w_d(x, y, d, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP)
239 | print(" ..after sliding window: inputs {0}, targets {1}".format(x_win.shape, y_win.shape))
240 |
241 | x_win_all = np.concatenate((x_win_all, x_win), axis=0) if x_win_all.size else x_win
242 | y_win_all = np.concatenate((y_win_all, y_win), axis=0) if y_win_all.size else y_win
243 | d_win_all = np.concatenate((d_win_all, d_win), axis=0) if d_win_all.size else d_win
244 |
245 | n_train.append(x_win.shape[0])
246 |
247 | unique_y, counts_y = np.unique(y_win_all, return_counts=True)
248 | print('y_train label distribution: ', dict(zip(unique_y, counts_y)))
249 |
250 | x_win_train, x_win_val, x_win_test, \
251 | y_win_train, y_win_val, y_win_test, \
252 | d_win_train, d_win_val, d_win_test = train_test_val_split(x_win_all, y_win_all, d_win_all,
253 | split_ratio=args.split_ratio)
254 |
255 | print('x_win_train', x_win_train.shape)
256 | unique_y, counts_y = np.unique(y_win_train, return_counts=True)
257 | print('y_train label distribution: ', dict(zip(unique_y, counts_y)))
258 | weights = 100.0 / torch.Tensor(counts_y)
259 | print('weights of sampler: ', weights)
260 | weights = weights.double()
261 | sample_weights = get_sample_weights(y_win_train, weights)
262 | sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=sample_weights,
263 | num_samples=len(sample_weights), replacement=True)
264 | train_set_r = data_loader_wisdm(x_win_train, y_win_train, d_win_train)
265 | train_loader_r = DataLoader(train_set_r, batch_size=args.batch_size,shuffle=False, drop_last=True, sampler=sampler)
266 | val_set_r = data_loader_wisdm(x_win_val, y_win_val, d_win_val)
267 | val_loader_r = DataLoader(val_set_r, batch_size=args.batch_size, shuffle=False)
268 | test_set_r = data_loader_wisdm(x_win_test, y_win_test, d_win_test)
269 | test_loader_r = DataLoader(test_set_r, batch_size=args.batch_size, shuffle=False)
270 |
271 | return [train_loader_r], val_loader_r, test_loader_r
272 |
273 |
274 | def prep_wisdm(args, SLIDING_WINDOW_LEN=0, SLIDING_WINDOW_STEP=0):
275 | if args.cases == 'subject':
276 | return prep_domains_wisdm_subject(args, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP)
277 | if args.cases == 'subject_small':
278 | return prep_domains_wisdm_subject_small(args, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP)
279 | elif args.cases == 'random':
280 | return prep_domains_wisdm_random(args, SLIDING_WINDOW_LEN, SLIDING_WINDOW_STEP)
281 | else:
282 | return 'Error!\n'
283 |
--------------------------------------------------------------------------------
/augmentations.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import scipy
4 | import random
5 |
6 | np.random.seed(10)
7 |
8 | def gen_aug(args, sample, ssh_type):
9 | if ssh_type == 'na':
10 | return sample
11 | elif ssh_type == 'shuffle':
12 | return shuffle(sample)
13 | elif ssh_type == 'jit_scal':
14 | scale_sample = scaling(sample, sigma=2)
15 | return torch.from_numpy(scale_sample)
16 | elif ssh_type == 'perm_jit':
17 | return jitter(permutation(sample, max_segments=10), sigma=0.8)
18 | elif ssh_type == 'resample':
19 | return torch.from_numpy(resample(sample))
20 | elif ssh_type == 'noise':
21 | return jitter(sample)
22 | elif ssh_type == 'scale':
23 | return torch.from_numpy(scaling(sample))
24 | elif ssh_type == 'negate':
25 | return negated(sample)
26 | elif ssh_type == 't_flip':
27 | return time_flipped(sample)
28 | elif ssh_type == 'rotation':
29 | if isinstance(multi_rotation(sample), np.ndarray):
30 | return torch.from_numpy(multi_rotation(sample))
31 | else:
32 | return multi_rotation(sample)
33 | elif ssh_type == 'perm':
34 | return permutation(sample, max_segments=10)
35 | elif ssh_type == 't_warp':
36 | return torch.from_numpy(time_warp(sample))
37 | # elif ssh_type == 'hfc':
38 | # fft, fd = generate_high(sample, r=(32,2), high=True)
39 | # return fd
40 | # elif ssh_type == 'lfc':
41 | # fft, fd = generate_high(sample, r=(32,2), high=False)
42 | # return fd
43 | elif ssh_type == 'hfc':
44 | return generate_high(sample, high=True)
45 | elif ssh_type == 'lfc':
46 | return generate_high(sample, high=False)
47 | elif ssh_type == 'p_shift':
48 | return ifft_phase_shift(sample)
49 | elif ssh_type == 'ap_p':
50 | return ifft_amp_phase_pert(sample)
51 | elif ssh_type == 'ap_f':
52 | return ifft_amp_phase_pert_fully(sample)
53 | elif ssh_type == 'rand_fourier':
54 | return rand_fourier(sample, args.n_modes)
55 | elif ssh_type == 'shuffle_rand_fourier':
56 | return shuffle_rand_fourier(sample, args.n_modes)
57 | else:
58 | print('The task is not available!\n')
59 |
60 |
61 |
62 | def shuffle(x):
63 | sample_ssh = []
64 | for data in x:
65 | p = np.random.RandomState(seed=21).permutation(data.shape[1])
66 | data = data[:, p]
67 | sample_ssh.append(data)
68 | return torch.stack(sample_ssh)
69 |
70 |
71 | def jitter(x, sigma=0.8):
72 | # https://arxiv.org/pdf/1706.00527.pdf
73 | return x + np.random.normal(loc=0., scale=sigma, size=x.shape)
74 |
75 |
76 | def scaling(x, sigma=1.1): # apply same distortion to the signals from each sensor
77 | # https://arxiv.org/pdf/1706.00527.pdf
78 | factor = np.random.normal(loc=2., scale=sigma, size=(x.shape[0], x.shape[1]))
79 | ai = []
80 | for i in range(x.shape[2]):
81 | xi = x[:, :, i]
82 | ai.append(np.multiply(xi, factor[:, :])[:, :, np.newaxis])
83 | return np.concatenate((ai), axis=2)
84 |
85 |
86 | def negated(X):
87 | return X * -1
88 |
89 |
90 | def time_flipped(X):
91 | inv_idx = torch.arange(X.size(1) - 1, -1, -1).long()
92 | return X[:, inv_idx, :]
93 |
94 |
95 | def permutation(x, max_segments=5, seg_mode="random"):
96 | orig_steps = np.arange(x.shape[1])
97 | num_segs = np.random.randint(1, max_segments, size=(x.shape[0]))
98 | ret = np.zeros_like(x)
99 | for i, pat in enumerate(x):
100 | if num_segs[i] > 1:
101 | if seg_mode == "random":
102 | split_points = np.random.choice(x.shape[1] - 2, num_segs[i] - 1, replace=False)
103 | split_points.sort()
104 | splits = np.split(orig_steps, split_points)
105 | else:
106 | splits = np.array_split(orig_steps, num_segs[i])
107 | np.random.shuffle(splits)
108 | warp = np.concatenate(splits).ravel()
109 | ret[i] = pat[warp, :]
110 | else:
111 | ret[i] = pat
112 | return torch.from_numpy(ret)
113 |
114 |
115 | def resample(x):
116 | from scipy.interpolate import interp1d
117 | orig_steps = np.arange(x.shape[1])
118 | interp_steps = np.arange(0, orig_steps[-1]+0.001, 1/3)
119 | Interp = interp1d(orig_steps, x, axis=1)
120 | InterpVal = Interp(interp_steps)
121 | start = random.choice(orig_steps)
122 | resample_index = np.arange(start, 3 * x.shape[1], 2)[:x.shape[1]]
123 | return InterpVal[:, resample_index, :]
124 |
125 |
126 | def multi_rotation(x):
127 | n_channel = x.shape[2]
128 | n_rot = n_channel // 3
129 | x_rot = np.array([])
130 | for i in range(n_rot):
131 | x_rot = np.concatenate((x_rot, rotation(x[:, :, i * 3:i * 3 + 3])), axis=2) if x_rot.size else rotation(
132 | x[:, :, i * 3:i * 3 + 3])
133 | return x_rot
134 |
135 | def rotation(X):
136 | """
137 | Applying a random 3D rotation
138 | """
139 | axes = np.random.uniform(low=-1, high=1, size=(X.shape[0], X.shape[2]))
140 | angles = np.random.uniform(low=-np.pi, high=np.pi, size=(X.shape[0]))
141 | matrices = axis_angle_to_rotation_matrix_3d_vectorized(axes, angles)
142 | return np.matmul(X, matrices)
143 |
144 | def axis_angle_to_rotation_matrix_3d_vectorized(axes, angles):
145 | """
146 | Get the rotational matrix corresponding to a rotation of (angle) radian around the axes
147 | Reference: the Transforms3d package - transforms3d.axangles.axangle2mat
148 | Formula: http://en.wikipedia.org/wiki/Rotation_matrix#Axis_and_angle
149 | """
150 | axes = axes / np.linalg.norm(axes, ord=2, axis=1, keepdims=True)
151 | x = axes[:, 0]; y = axes[:, 1]; z = axes[:, 2]
152 | c = np.cos(angles)
153 | s = np.sin(angles)
154 | C = 1 - c
155 |
156 | xs = x*s; ys = y*s; zs = z*s
157 | xC = x*C; yC = y*C; zC = z*C
158 | xyC = x*yC; yzC = y*zC; zxC = z*xC
159 |
160 | m = np.array([
161 | [ x*xC+c, xyC-zs, zxC+ys ],
162 | [ xyC+zs, y*yC+c, yzC-xs ],
163 | [ zxC-ys, yzC+xs, z*zC+c ]])
164 | matrix_transposed = np.transpose(m, axes=(2,0,1))
165 | return matrix_transposed
166 |
167 | def get_cubic_spline_interpolation(x_eval, x_data, y_data):
168 | """
169 | Get values for the cubic spline interpolation
170 | """
171 | cubic_spline = scipy.interpolate.CubicSpline(x_data, y_data)
172 | return cubic_spline(x_eval)
173 |
174 |
175 | def time_warp(X, sigma=0.2, num_knots=4):
176 | """
177 | Stretching and warping the time-series
178 | """
179 | time_stamps = np.arange(X.shape[1])
180 | knot_xs = np.arange(0, num_knots + 2, dtype=float) * (X.shape[1] - 1) / (num_knots + 1)
181 | spline_ys = np.random.normal(loc=1.0, scale=sigma, size=(X.shape[0] * X.shape[2], num_knots + 2))
182 |
183 | spline_values = np.array([get_cubic_spline_interpolation(time_stamps, knot_xs, spline_ys_individual) for spline_ys_individual in spline_ys])
184 |
185 | cumulative_sum = np.cumsum(spline_values, axis=1)
186 | distorted_time_stamps_all = cumulative_sum / cumulative_sum[:, -1][:, np.newaxis] * (X.shape[1] - 1)
187 |
188 | X_transformed = np.empty(shape=X.shape)
189 | for i, distorted_time_stamps in enumerate(distorted_time_stamps_all):
190 | X_transformed[i // X.shape[2], :, i % X.shape[2]] = np.interp(time_stamps, distorted_time_stamps, X[i // X.shape[2], :, i % X.shape[2]])
191 | return X_transformed
192 |
193 |
194 | def distance(i, j, imageSize, r):
195 | dis_x = np.sqrt((i - imageSize[0] / 2) ** 2)
196 | dis_y = np.sqrt((j - imageSize[1] / 2) ** 2)
197 | if dis_x < r[0] and dis_y < r[1]:
198 | return 1.0
199 | else:
200 | return 0
201 |
202 |
203 | def mask_radial(img, r):
204 | rows, cols = img.shape
205 | mask = torch.zeros((rows, cols))
206 | for i in range(rows):
207 | for j in range(cols):
208 | mask[i, j] = distance(i, j, imageSize=(rows, cols), r=r)
209 | return mask
210 |
211 |
212 | # def generate_high(sample, r, high=True):
213 | # # r: int, radius of the mask
214 | # images = torch.unsqueeze(sample, 1)
215 | # mask = mask_radial(torch.zeros([images.shape[2], images.shape[3]]), r)
216 | # bs, c, h, w = images.shape
217 | # x = images.reshape([bs * c, h, w])
218 | # fd = torch.fft.fftshift(torch.fft.fftn(x, dim=(-2, -1))) # shift: low f in the center
219 | # mask = mask.unsqueeze(0).repeat([bs * c, 1, 1])
220 | # if high:
221 | # fd = fd * (1.-mask)
222 | # else:
223 | # fd = fd * mask
224 | # fft = torch.real(fd)
225 | # fd = torch.fft.ifftn(torch.fft.ifftshift(fd), dim=(-2, -1))
226 | # fd = torch.real(fd)
227 | # fd = torch.squeeze(fd.reshape([bs, c, h, w]))
228 | # return fft, fd
229 |
230 | def generate_high(sample, high=True):
231 | x_ft = torch.fft.rfft(sample, dim=-2)
232 | n_components = x_ft.shape[1]
233 | if high:
234 | aug = torch.fft.irfft(x_ft[:, n_components//2:, :], n=sample.shape[-2], dim=-2)
235 | else:
236 | aug = torch.fft.irfft(x_ft[:, :n_components // 2, :], n=sample.shape[-2], dim=-2)
237 | return aug
238 |
239 | def ifft_phase_shift(sample):
240 | images = torch.unsqueeze(sample, 1)
241 | bs, c, h, w = images.shape
242 | x = images.reshape([bs * c, h, w])
243 | fd = torch.fft.fftshift(torch.fft.fftn(x, dim=(-2, -1)))
244 |
245 | amp = fd.abs()
246 | phase = fd.angle()
247 |
248 | # phase shift
249 | angles = np.repeat(np.expand_dims(np.random.uniform(low=-np.pi, high=np.pi, size=(sample.shape[0], sample.shape[1])), axis=2), sample.shape[2], axis=2)
250 | phase = phase + angles
251 |
252 | cmp = amp * torch.exp(1j * phase)
253 | ifft = torch.squeeze(torch.real(torch.fft.ifftn(torch.fft.ifftshift(cmp), dim=(-2, -1))).reshape([bs, c, h, w]))
254 |
255 | return ifft
256 |
257 |
258 | def ifft_amp_phase_pert(sample):
259 | images = torch.unsqueeze(sample, 1)
260 | bs, c, h, w = images.shape
261 | x = images.reshape([bs * c, h, w])
262 | fd = torch.fft.fftshift(torch.fft.fftn(x, dim=(-2, -1)))
263 |
264 | amp = fd.abs()
265 | phase = fd.angle()
266 |
267 | # select a segment to conduct perturbations
268 | start = np.random.randint(0, int(0.5 * sample.shape[1]))
269 | end = start + int(0.5 * sample.shape[1])
270 |
271 | # phase shift
272 | angles = np.repeat(np.expand_dims(np.random.uniform(low=-np.pi, high=np.pi, size=(sample.shape[0], sample.shape[1])), axis=2), sample.shape[2], axis=2)
273 | phase[:, start:end, :] = phase[:, start:end, :] + angles[:, start:end, :]
274 |
275 | # amp shift
276 | amp[:, start:end, :] = amp[:, start:end, :] + np.random.normal(loc=0., scale=0.8, size=sample.shape)[:, start:end, :]
277 |
278 | cmp = amp * torch.exp(1j * phase)
279 | ifft = torch.squeeze(torch.real(torch.fft.ifftn(torch.fft.ifftshift(cmp), dim=(-2, -1))).reshape([bs, c, h, w]))
280 |
281 | return ifft
282 |
283 |
284 | def ifft_amp_phase_pert_fully(sample):
285 | images = torch.unsqueeze(sample, 1)
286 | bs, c, h, w = images.shape
287 | x = images.reshape([bs * c, h, w])
288 | fd = torch.fft.fftshift(torch.fft.fftn(x, dim=(-2, -1)))
289 |
290 | amp = fd.abs()
291 | phase = fd.angle()
292 |
293 | # phase shift
294 | angles = np.repeat(np.expand_dims(np.random.uniform(low=-np.pi, high=np.pi, size=(sample.shape[0], sample.shape[1])), axis=2), sample.shape[2], axis=2)
295 | phase = phase + angles
296 |
297 | # amp shift
298 | amp = amp + np.random.normal(loc=0., scale=0.8, size=sample.shape)
299 |
300 | cmp = amp * torch.exp(1j * phase)
301 | ifft = torch.squeeze(torch.real(torch.fft.ifftn(torch.fft.ifftshift(cmp), dim=(-2, -1))).reshape([bs, c, h, w]))
302 |
303 | return ifft
304 |
305 | def generate_rand_n_augviews(sample, n):
306 | aug_name_list = ['shuffle', 'jit_scal', 'perm_jit', 'resample', 'noise', 'scale', 'negate', 't_flip', 'rotation', 'perm', 't_warp']
307 | aug_idxs = np.arange(len(aug_name_list))
308 | np.random.shuffle(aug_idxs)
309 | aug_idxs = aug_idxs[:n]
310 | augviews = []
311 | for aug_idx in aug_idxs:
312 | # print(aug_name_list[aug_idx])
313 | aug_view = gen_aug(sample, aug_name_list[aug_idx])
314 | augviews.append(aug_view)
315 | return augviews
316 |
317 | # def generate_fixed_n_augviews(sample, n):
318 | # aug_name_list = ['perm_jit', 'perm', 'noise', 'scale']
319 | # augviews = []
320 | # for aug_idx in range(n):
321 | # # print(aug_name_list[aug_idx])
322 | # aug_view = gen_aug(sample, aug_name_list[aug_idx])
323 | # augviews.append(aug_view)
324 | # return augviews
325 |
326 | def generate_fixed_n_augviews(args, sample, n):
327 | aug_name_list = ['shuffle', 'jit_scal', 'perm_jit', 'resample', 'noise', 'scale', 'negate', 't_flip', 'rotation', 'perm', 't_warp']
328 | aug_idxs = np.arange(len(aug_name_list))
329 | np.random.seed(args.rand_seed)
330 | np.random.shuffle(aug_idxs)
331 | aug_idxs = aug_idxs[:n]
332 | args.aug_idxs = aug_idxs
333 | augviews = []
334 | for aug_idx in aug_idxs:
335 | # print(aug_name_list[aug_idx])
336 | aug_view = gen_aug(sample, aug_name_list[aug_idx])
337 | augviews.append(aug_view)
338 | return augviews
339 |
340 | def generate_predefined_n_augviews(args, sample, n):
341 | aug_name_list = ['shuffle', 'jit_scal', 'perm_jit', 'resample', 'noise', 'scale', 'negate', 't_flip', 'rotation', 'perm', 't_warp']
342 | aug_idxs = [1,2,4,9][:n]
343 | # print(aug_idxs)
344 | args.aug_idxs = aug_idxs
345 | augviews = []
346 | for aug_idx in aug_idxs:
347 | # print(aug_name_list[aug_idx])
348 | aug_view = gen_aug(sample, aug_name_list[aug_idx])
349 | augviews.append(aug_view)
350 | return augviews
351 |
352 | def generate_predefined_n_id_augviews(args, sample, n):
353 | aug_name_list = ['shuffle', 'jit_scal', 'perm_jit', 'resample', 'noise', 'scale', 'negate', 't_flip', 'rotation', 'perm', 't_warp']
354 | aug_idxs = [1,1,1,1][:n]
355 | # print(aug_idxs)
356 | args.aug_idxs = aug_idxs
357 | augviews = []
358 | for aug_idx in aug_idxs:
359 | # print(aug_name_list[aug_idx])
360 | aug_view = gen_aug(sample, aug_name_list[aug_idx])
361 | augviews.append(aug_view)
362 | return augviews
363 |
364 | def generate_predefined_n_augviews_with_idx(args, sample, n, aug_idxs):
365 | aug_name_list = ['shuffle', 'jit_scal', 'perm_jit', 'resample', 'noise', 'scale', 'negate', 't_flip', 'rotation', 'perm', 't_warp']
366 | if n != len(aug_idxs):
367 | raise ValueError
368 | # print(aug_idxs)
369 | args.aug_idxs = aug_idxs
370 | list1 = []
371 | list1[:0] = aug_idxs
372 | list1 = [int(i) for i in list1]
373 | augviews = []
374 | for aug_idx in list1:
375 | # print(aug_name_list[aug_idx])
376 | aug_view = gen_aug(sample, aug_name_list[aug_idx])
377 | augviews.append(aug_view)
378 | return augviews
379 |
380 | def generate_rand_n_augviews_1284(sample, n):
381 | aug_name_list = ['shuffle', 'jit_scal', 'perm_jit', 'resample', 'noise', 'scale', 'negate', 't_flip', 'rotation', 'perm', 't_warp']
382 | aug_idxs = [1,2,8,4]
383 | np.random.shuffle(aug_idxs)
384 | aug_idxs = aug_idxs[:n]
385 | augviews = []
386 | for aug_idx in aug_idxs:
387 | # print(aug_name_list[aug_idx])
388 | aug_view = gen_aug(sample, aug_name_list[aug_idx])
389 | augviews.append(aug_view)
390 | return augviews
391 |
392 | # def sample_k_aug_idx(k):
393 | # aug_name_list = ['shuffle', 'jit_scal', 'perm_jit', 'resample', 'noise', 'scale', 'negate', 't_flip', 'rotation',
394 | # 'perm', 't_warp']
395 | # aug_idxs = np.arange(len(aug_name_list))
396 | # np.random.shuffle(aug_idxs)
397 | # return aug_idxs[:k]
398 | #
399 | # def generate_rand_n_augviews_from_k(sample, aug_idxs, n): # todo assume all augs are equally effective, only number matters
400 | # if n > len(aug_idxs):
401 | # raise ValueError
402 | # aug_name_list = ['shuffle', 'jit_scal', 'perm_jit', 'resample', 'noise', 'scale', 'negate', 't_flip', 'rotation', 'perm', 't_warp']
403 | # np.random.shuffle(aug_idxs)
404 | # aug_idxs = aug_idxs[:n]
405 | # augviews = []
406 | # for aug_idx in aug_idxs:
407 | # print(aug_name_list[aug_idx])
408 | # aug_view = gen_aug(sample, aug_name_list[aug_idx])
409 | # augviews.append(aug_view)
410 | # return augviews
411 |
412 | def generate_rand_n_augviews_from_k(sample, n, k):
413 | if n > k:
414 | raise ValueError
415 | aug_name_list = ['shuffle', 'jit_scal', 'perm_jit', 'resample', 'noise', 'scale', 'negate', 't_flip', 'rotation', 'perm', 't_warp']
416 | aug_idxs = np.arange(len(aug_name_list))[:k] # keep only k augs
417 | np.random.shuffle(aug_idxs)
418 | # print(aug_idxs)
419 | aug_idxs = aug_idxs[:n] # randomly sample n from k
420 | augviews = []
421 | # print(aug_idxs)
422 | for aug_idx in aug_idxs:
423 | # print(aug_name_list[aug_idx])
424 | aug_view = gen_aug(sample, aug_name_list[aug_idx])
425 | augviews.append(aug_view)
426 | return augviews
427 |
428 | def rand_fourier(x, n_modes):
429 | n_modes = min(n_modes, x.shape[1]//2)
430 | # print(n_modes)
431 | x_ft = torch.fft.rfft(x, dim=-2)
432 | # print(x_ft.shape)
433 | index = list(range(x.shape[1]//2))
434 | # np.random.shuffle(index)
435 | index = index[:n_modes]
436 | # print(index)
437 | # print(index)
438 | # Return to time domain
439 | x = torch.fft.irfft(x_ft[:, index, :], n=x.size(1), dim=-2)
440 | return x
441 |
442 | def shuffle_rand_fourier(x, n_modes):
443 | n_modes = min(n_modes, x.shape[1]//2)
444 | # print(n_modes)
445 | x_ft = torch.fft.rfft(x, dim=-2)
446 | # print(x_ft.shape)
447 | index = list(range(x.shape[1]//2))
448 | np.random.shuffle(index)
449 | index = index[:n_modes]
450 | # print(index)
451 | # print(index)
452 | # Return to time domain
453 | x = torch.fft.irfft(x_ft[:, index, :], n=x.size(1), dim=-2)
454 | return x
455 |
456 | from sklearn.feature_selection import mutual_info_classif as MIC
457 |
458 | def rand_fourier_with_target(x, n_modes, target):
459 | # print(x.shape)
460 | n_modes = min(n_modes, x.shape[1]//2)
461 | x_ft = torch.fft.rfft(x, dim=-2)
462 | # print(x_ft.shape)
463 | MIC_score = []
464 | for i in range(x_ft.shape[1]):
465 | MI = MIC(torch.abs(x_ft[:, i, :]).cpu().numpy(), target).mean()
466 | # print(MI.shape)
467 | MIC_score.append(MI)
468 | _, index = torch.topk(torch.Tensor(MIC_score).reshape(1,-1), k=n_modes)
469 | index = index[0]
470 | # index = np.argpartition(MIC_score, -n_modes)[-n_modes:]
471 | # print(index)
472 |
473 | # index = list(range(x.shape[1]//2))
474 | # np.random.shuffle(index)
475 | # index = index[:n_modes]
476 | # print(index)
477 | # print(index)
478 | # Return to time domain
479 | # print(index.shape)
480 | x = torch.fft.irfft(x_ft[:, index, :], n=x.size(1), dim=-2).cuda()
481 | # print(x.shape)
482 | return x
483 |
484 | def shuffle_rand_fourier_me(x, n_modes):
485 | # sample mutually exlusive sets of fourier components for two views
486 | n_modes = min(n_modes, x.shape[1]//4)
487 | # print(n_modes)
488 | x_ft = torch.fft.rfft(x, dim=-2)
489 | # print(x_ft.shape)
490 | index = list(range(x.shape[1]//2))
491 | np.random.shuffle(index)
492 | index1 = index[:n_modes]
493 | index2 = index[n_modes:2*n_modes]
494 | # print('index1', index1)
495 | # print('index2', index2)
496 | # Return to time domain
497 | x1 = torch.fft.irfft(x_ft[:, index1, :], n=x.size(1), dim=-2)
498 | x2 = torch.fft.irfft(x_ft[:, index2, :], n=x.size(1), dim=-2)
499 | return x1, x2
--------------------------------------------------------------------------------
/main_FreRA.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import math
3 | import os
4 | import shutil
5 | import time
6 |
7 | import torch.nn as nn
8 | import torch.nn.parallel
9 | import torch.backends.cudnn as cudnn
10 | import torch.optim
11 |
12 | import models.builder
13 | from data_loaders import *
14 | from augmentations import *
15 | from models.backbones import FCN
16 |
17 | import fitlog
18 | from sklearn.metrics import f1_score
19 | from copy import deepcopy
20 | from autoaug.fourier import *
21 |
22 | model_names = sorted(name for name in models.__dict__
23 | if name.islower() and not name.startswith("__")
24 | and callable(models.__dict__[name]))
25 |
26 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
27 |
28 | # dataset
29 | parser.add_argument('--dataset', type=str, default='ucihar', help='name of dataset')
30 | parser.add_argument('--n_feature', type=int, default=77, help='name of feature dimension')
31 | parser.add_argument('--len_sw', type=int, default=30, help='length of sliding window')
32 | parser.add_argument('--n_class', type=int, default=18, help='number of class')
33 | parser.add_argument('--cases', type=str, default='random',
34 | choices=['random', 'subject', 'large_subject'], help='name of scenarios')
35 | parser.add_argument('--split_ratio', type=float, default=0.2,
36 | help='split ratio of test/val: train(0.64), val(0.16), test(0.2)')
37 | parser.add_argument('--target_domain', type=str, default='0')
38 |
39 | parser.add_argument('--framework', type=str, default='simclr',
40 | choices=['simclr', 'byol', 'simsiam'])
41 | parser.add_argument('-a', '--arch', metavar='ARCH', default='FCN',
42 | choices=model_names,
43 | help='model architecture: ' +
44 | ' | '.join(model_names) +
45 | ' (default: FCN)')
46 | parser.add_argument('--start_epoch', default=0, type=int)
47 | parser.add_argument('--epochs', default=200, type=int, metavar='N',
48 | help='number of total epochs to run')
49 | parser.add_argument('-b', '--batch_size', default=256, type=int,
50 | metavar='N',
51 | help='mini-batch size')
52 | parser.add_argument('--lr', '--learning-rate', default=0.03, type=float,
53 | metavar='LR', help='initial learning rate', dest='lr')
54 | parser.add_argument('--f_lr', default=0.01, type=float, help='initial learning rate for fourier weight')
55 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
56 | help='momentum of SGD solver')
57 | parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
58 | metavar='W', help='weight decay (default: 1e-4)',
59 | dest='weight_decay')
60 | parser.add_argument('--seed', default=None, type=int,
61 | help='seed for initializing training. ')
62 | parser.add_argument('--gpu', default=None, type=int,
63 | help='GPU id to use.')
64 |
65 | parser.add_argument('--low_dim', default=128, type=int,
66 | help='feature dimension (default: 128)')
67 | parser.add_argument('--temperature', default=0.2, type=float,
68 | help='softmax temperature')
69 |
70 | parser.add_argument('--cos', action='store_true', default=True,
71 | help='use cosine lr schedule')
72 |
73 | parser.add_argument('--logdir', default='log', type=str,
74 | help='fitlog directory')
75 |
76 | parser.add_argument('--f_temperature', default=0.1, type=float,
77 | help='temperature for Fourier AutoAug')
78 | parser.add_argument('--l1_weight', default=0.1, type=float,
79 | help='weight of l1-norm of f_aug weight para')
80 | parser.add_argument('--f_aug_mode', default='FreRA', type=str,)
81 |
82 |
83 | def main():
84 | args = parser.parse_args()
85 |
86 | if args.seed is not None:
87 | random.seed(args.seed)
88 | torch.manual_seed(args.seed)
89 | torch.cuda.manual_seed_all(args.seed)
90 | cudnn.deterministic = True
91 |
92 | best_model = main_worker(args.gpu, args)
93 |
94 | main_worker_cls(args.gpu, best_model, args)
95 |
96 |
97 | def main_worker(gpu, args):
98 | args.gpu = gpu
99 |
100 | if args.gpu is not None:
101 | print("Use GPU: {} for training".format(args.gpu))
102 |
103 | DEVICE = torch.device('cuda:' + str(args.gpu) if torch.cuda.is_available() else 'cpu')
104 |
105 | train_loader, val_loader, eval_loader = setup_dataloaders(args)
106 |
107 | # create model
108 | print("=> creating model '{}'".format(args.arch))
109 | if args.framework == 'simclr':
110 | model = models.builder.SimCLR(
111 | DEVICE,
112 | args.dataset,
113 | args.n_feature,
114 | args.batch_size,
115 | args.arch,
116 | args.low_dim, args.temperature)
117 | elif args.framework == 'byol':
118 | model = models.builder.BYOL(
119 | DEVICE,
120 | args.arch,
121 | args.dataset,
122 | args.n_feature,
123 | args.len_sw,
124 | moving_average=0.996)
125 | elif args.framework == 'simsiam':
126 | model = models.builder.BYOL(
127 | DEVICE,
128 | args.arch,
129 | args.dataset,
130 | args.n_feature,
131 | args.len_sw,
132 | moving_average=0.0)
133 |
134 | if args.gpu is not None:
135 | torch.cuda.set_device(args.gpu)
136 | model = model.to(DEVICE)
137 |
138 | # define loss function (criterion) and optimizer
139 | if args.framework in ['simclr']:
140 | criterion = nn.CrossEntropyLoss().to(DEVICE)
141 | elif args.framework in ['byol', 'simsiam']:
142 | criterion = nn.CosineSimilarity(dim=1)
143 |
144 | if args.framework in ['simclr']:
145 | optimizer = torch.optim.SGD(model.parameters(), args.lr,
146 | momentum=args.momentum,
147 | weight_decay=args.weight_decay)
148 | elif args.framework in ['byol', 'simsiam']:
149 | if args.framework == 'byol':
150 | args.weight_decay = 1.5e-6
151 | lr_mul = 10.0
152 | elif args.framework == 'simsiam':
153 | args.weight_decay = 1e-4
154 | lr_mul = 1.0
155 | optimizer1 = torch.optim.Adam(model.encoder_q.parameters(),
156 | args.lr,
157 | weight_decay=args.weight_decay)
158 | optimizer2 = torch.optim.Adam(model.online_predictor.parameters(),
159 | args.lr * lr_mul,
160 | weight_decay=args.weight_decay)
161 | optimizer = [optimizer1, optimizer2]
162 |
163 | cudnn.benchmark = True
164 |
165 | # fitlog
166 | if os.path.isdir(args.logdir) == False:
167 | os.makedirs(args.logdir)
168 | fitlog.set_log_dir(args.logdir)
169 | fitlog.add_hyper(args)
170 | fitlog.add_hyper_in_file(__file__)
171 |
172 | # autoaug
173 | if args.f_aug_mode == 'FreRA':
174 | aug_f = FreRA(len_sw=args.len_sw, device=DEVICE).to(DEVICE)
175 |
176 | f_optimizer = torch.optim.AdamW(aug_f.parameters(), lr=args.f_lr)
177 |
178 | f_weight = []
179 |
180 | for epoch in range(args.start_epoch, args.epochs):
181 |
182 | if args.framework not in ['byol', 'simsiam']:
183 | adjust_learning_rate(optimizer, epoch, args)
184 |
185 | # train for one epoch
186 | train(epoch, aug_f, f_optimizer, DEVICE, train_loader, model, criterion, optimizer, args, fitlog)
187 |
188 | f_weight.append(aug_f.weight.cpu().detach().numpy())
189 |
190 | # save weights
191 | fitlog.add_hyper(aug_f.weight, name='fourier weight')
192 |
193 | return deepcopy(model.state_dict())
194 |
195 | def train(epoch, aug_f, f_optimizer, DEVICE, train_loader, model, criterion, optimizer, args, fitlog=None):
196 | batch_time = AverageMeter('Time', ':6.3f')
197 | data_time = AverageMeter('Data', ':6.3f')
198 | losses = AverageMeter('Loss', ':.4e')
199 | l1_losses = AverageMeter('L1Loss', ':.4e')
200 | acc_inst = AverageMeter('Acc@Inst', ':6.2f')
201 |
202 | progress = ProgressMeter(
203 | len(train_loader),
204 | [batch_time, data_time, losses, acc_inst],
205 | prefix="Epoch: [{}]".format(epoch))
206 |
207 | # switch to train mode
208 | model.train()
209 |
210 | end = time.time()
211 | for i, (sample, target, domain) in enumerate(train_loader):
212 | # measure data loading time
213 | data_time.update(time.time() - end)
214 |
215 | sample = sample.to(DEVICE)
216 | aug_sample1, aug_sample2 = aug_f(sample, temperature=args.f_temperature).float(), gen_aug(None, sample, 'na').to(DEVICE).float()
217 |
218 | # compute output
219 | if args.framework in ['simclr']:
220 | output, target, z1, z2 = model(im_q=aug_sample1, im_k=aug_sample2)
221 | loss = criterion(output, target)
222 | elif args.framework in ['byol', 'simsiam']:
223 | online_pred_one, online_pred_two, target_proj_one, target_proj_two = model(im_q=aug_sample1, im_k=aug_sample2)
224 | loss = -(criterion(online_pred_one, target_proj_two).mean() + criterion(online_pred_two, target_proj_one).mean()) * 0.5
225 |
226 | # l1-norm loss of weight para
227 | l1_weight_loss = torch.norm(aug_f.para[:, 0], p=1)
228 | loss = loss + l1_weight_loss * args.l1_weight / args.len_sw
229 |
230 | losses.update(loss.item(), aug_sample1.size(0))
231 | l1_losses.update(l1_weight_loss.item(), aug_sample1.size(0))
232 |
233 | # update cl framework and fourier weight
234 | if args.framework in ['simclr']:
235 | optimizer.zero_grad()
236 | f_optimizer.zero_grad()
237 | loss.backward()
238 | optimizer.step()
239 | f_optimizer.step()
240 | elif args.framework in ['byol', 'simsiam']:
241 | optimizer[0].zero_grad()
242 | optimizer[1].zero_grad()
243 | f_optimizer.zero_grad()
244 | loss.backward()
245 | optimizer[0].step()
246 | optimizer[1].step()
247 | f_optimizer.step()
248 |
249 | # measure elapsed time
250 | batch_time.update(time.time() - end)
251 | end = time.time()
252 |
253 | fitlog.add_loss(losses.avg, name="InfoNCE loss", step=epoch)
254 | fitlog.add_loss(l1_losses.avg, name="L1 loss", step=epoch)
255 | fitlog.add_metric({"dev": {"Inst Acc": acc_inst.avg}}, step=epoch)
256 |
257 | print(
258 | f'epoch {epoch} InfoNCE loss : {losses.avg:.4f}, L1 loss : {l1_losses.avg:.4f}')
259 |
260 | progress.display(i)
261 |
262 | def main_worker_cls(gpu, best_model, args):
263 | args.gpu = gpu
264 |
265 | if args.gpu is not None:
266 | print("Use GPU: {} for training".format(args.gpu))
267 |
268 | DEVICE = torch.device('cuda:' + str(args.gpu) if torch.cuda.is_available() else 'cpu')
269 |
270 | # create model
271 | print("=> creating model '{}'".format(args.arch))
272 | model = FCN(args.dataset, n_channels=args.n_feature, n_classes=args.n_class, backbone=False)
273 |
274 | # freeze all layers but the last fc
275 | for name, param in model.named_parameters():
276 | if name not in ['logits.weight', 'logits.bias']:
277 | param.requires_grad = False
278 | # init the fc layer
279 | model.logits.weight.data.normal_(mean=0.0, std=0.01)
280 | model.logits.bias.data.zero_()
281 |
282 | # load best model
283 | # rename pre-trained keys
284 | state_dict = deepcopy(best_model)
285 | for k in list(state_dict.keys()):
286 | if 'net' in k:
287 | # retain only encoder_q up to before the embedding layer
288 | if k.startswith('encoder_q.net') and not k.startswith('encoder_q.net.logits'):
289 | # remove prefix
290 | state_dict[k[len("encoder_q.net."):]] = state_dict[k]
291 | # delete renamed or unused k
292 | del state_dict[k]
293 | else:
294 | # retain only encoder_q up to before the embedding layer
295 | if k.startswith('encoder_q') and not k.startswith('encoder_q.logits'):
296 | # remove prefix
297 | state_dict[k[len("encoder_q."):]] = state_dict[k]
298 | # delete renamed or unused k
299 | del state_dict[k]
300 |
301 | args.start_epoch = 0
302 | msg = model.load_state_dict(state_dict, strict=False)
303 | assert set(msg.missing_keys) == {"logits.weight", "logits.bias"}
304 |
305 | print("=> loaded pre-trained model ")
306 |
307 | if args.gpu is not None:
308 | torch.cuda.set_device(args.gpu)
309 | model = model.to(DEVICE)
310 |
311 | # define loss function (criterion) and optimizer
312 | criterion = nn.CrossEntropyLoss().to(DEVICE)
313 |
314 | # optimize only the linear classifier
315 | parameters = list(filter(lambda p: p.requires_grad, model.parameters()))
316 | assert len(parameters) == 2 # fc.weight, fc.bias
317 | optimizer = torch.optim.SGD(parameters, args.lr,
318 | momentum=args.momentum,
319 | weight_decay=args.weight_decay)
320 |
321 | cudnn.benchmark = True
322 |
323 | # Data loading code
324 | train_loader, val_loader, test_loader = setup_dataloaders(args)
325 |
326 | for epoch in range(args.start_epoch, args.epochs):
327 | # train for one epoch
328 | train_cls(DEVICE, train_loader, val_loader, model, criterion, optimizer, epoch, args)
329 |
330 | if epoch == args.start_epoch:
331 | sanity_check(model.state_dict(), best_model)
332 | acc1 = validate_cls(DEVICE, test_loader, model, criterion, args, epoch, val=False)
333 |
334 |
335 | def train_cls(DEVICE, train_loader, val_loader, model, criterion, optimizer, epoch, args):
336 | batch_time = AverageMeter('Time', ':6.3f')
337 | data_time = AverageMeter('Data', ':6.3f')
338 | losses = AverageMeter('Loss', ':.4e')
339 | top1 = AverageMeter('Acc@1', ':6.2f')
340 | progress = ProgressMeter(
341 | len(train_loader),
342 | [batch_time, data_time, losses, top1],
343 | prefix="Epoch: [{}]".format(epoch))
344 |
345 | """
346 | Switch to eval mode:
347 | Under the protocol of linear classification on frozen features/models,
348 | it is not legitimate to change any part of the pre-trained model.
349 | BatchNorm in train mode may revise running mean/std (even if it receives
350 | no gradient), which are part of the model parameters too.
351 | """
352 | model.eval()
353 |
354 | end = time.time()
355 | for i, (sample, target, domain) in enumerate(train_loader):
356 | # measure data loading time
357 | data_time.update(time.time() - end)
358 |
359 | sample = sample.to(DEVICE).float()
360 | target = target.to(DEVICE).long()
361 |
362 | # compute output
363 | output = model(sample)
364 | loss = criterion(output, target)
365 |
366 | # measure accuracy and record loss
367 | acc1 = accuracy(output, target, topk=(1,))
368 | # print(acc1)
369 | losses.update(loss.item(), sample.size(0))
370 | top1.update(acc1[0].item(), sample.size(0))
371 |
372 | # compute gradient and do SGD step
373 | optimizer.zero_grad()
374 | loss.backward()
375 | optimizer.step()
376 |
377 | # measure elapsed time
378 | batch_time.update(time.time() - end)
379 | end = time.time()
380 |
381 | fitlog.add_loss(losses.avg, name="CLS Train loss", step=epoch)
382 | fitlog.add_loss(optimizer.param_groups[0]['lr'], name="CLS lr", step=epoch)
383 | fitlog.add_metric({"dev": {"CLS Train Acc": top1.avg}}, step=epoch)
384 |
385 | progress.display(i)
386 |
387 | if val_loader is not None:
388 | acc1_val = validate_cls(DEVICE, val_loader, model, criterion, args, epoch)
389 |
390 |
391 | def validate_cls(DEVICE, val_loader, model, criterion, args, epoch, val=True):
392 | batch_time = AverageMeter('Time', ':6.3f')
393 | losses = AverageMeter('Loss', ':.4e')
394 | top1 = AverageMeter('Acc@1', ':6.2f')
395 | progress = ProgressMeter(
396 | len(val_loader),
397 | [batch_time, losses, top1],
398 | prefix='Test: ')
399 |
400 | # switch to evaluate mode
401 | model.eval()
402 |
403 | total = 0
404 | correct = 0
405 | trgs = np.array([])
406 | preds = np.array([])
407 | feats = None
408 | confusion_matrix = torch.zeros(args.n_class, args.n_class)
409 |
410 | with torch.no_grad():
411 | end = time.time()
412 | for i, (sample, target, domain) in enumerate(val_loader):
413 | sample = sample.to(DEVICE).float()
414 | target = target.to(DEVICE).long()
415 |
416 | # compute output
417 | output, feat = model(sample, return_feature=True)
418 | loss = criterion(output, target)
419 |
420 | if not val:
421 | _, predicted = torch.max(output.data, 1)
422 | trgs = np.append(trgs, target.data.cpu().numpy())
423 | preds = np.append(preds, predicted.data.cpu().numpy())
424 | if feats is None:
425 | feats = feat
426 | else:
427 | feats = torch.cat((feats, feat), 0)
428 | for t, p in zip(target.view(-1), predicted.view(-1)):
429 | confusion_matrix[t.long(), p.long()] += 1
430 | total += target.size(0)
431 | correct += (predicted == target).sum()
432 |
433 | # measure accuracy and record loss
434 | acc1 = accuracy(output, target, topk=(1,))
435 | losses.update(loss.item(), sample.size(0))
436 | top1.update(acc1[0].item(), sample.size(0))
437 |
438 | # measure elapsed time
439 | batch_time.update(time.time() - end)
440 | end = time.time()
441 |
442 | if val:
443 | fitlog.add_loss(losses.avg, name="CLS Val loss", step=epoch)
444 | fitlog.add_metric({"dev": {"CLS Val Acc": top1.avg}}, step=epoch)
445 |
446 | if not val:
447 | acc_test = float(correct) * 100.0 / total
448 | miF = f1_score(trgs, preds, average='micro') * 100
449 | maF = f1_score(trgs, preds, average='macro') * 100
450 |
451 | fitlog.add_best_metric({"dev": {"Test Acc": acc_test}})
452 | fitlog.add_best_metric({"dev": {"miF": miF}})
453 | fitlog.add_best_metric({"dev": {"maF": maF}})
454 | fitlog.add_hyper(confusion_matrix, name='conf_mat')
455 |
456 | progress.display(i)
457 |
458 | # TODO: this should also be done with the ProgressMeter
459 | print(' * Acc@1 {top1.avg:.3f} '
460 | .format(top1=top1))
461 | return top1.avg
462 |
463 |
464 | def sanity_check(state_dict, best_model):
465 | """
466 | Linear classifier should not change any weights other than the linear layer.
467 | This sanity check asserts nothing wrong happens (e.g., BN stats updated).
468 | """
469 | print("=> loading model for sanity check")
470 | state_dict_pre = best_model
471 |
472 | for k in list(state_dict.keys()):
473 | # only ignore fc layer
474 | if 'logits.weight' in k or 'logits.bias' in k:
475 | continue
476 | # name in pretrained model
477 | k_pre = 'encoder_q.' + k
478 |
479 | if 'net' in list(state_dict_pre.keys())[0]:
480 | k_pre = 'encoder_q.net.' + k
481 |
482 | assert ((state_dict[k].cpu() == state_dict_pre[k_pre].cpu()).all()), \
483 | '{} is changed in linear classifier training.'.format(k)
484 |
485 | print("=> sanity check passed.")
486 |
487 |
488 | def save_checkpoint(state, is_best=False, filename='checkpoint.pth.tar'):
489 | torch.save(state, filename)
490 | if is_best:
491 | shutil.copyfile(filename, 'model_best.pth.tar')
492 | print(filename)
493 |
494 |
495 | class AverageMeter(object):
496 | """Computes and stores the average and current value"""
497 |
498 | def __init__(self, name, fmt=':f'):
499 | self.name = name
500 | self.fmt = fmt
501 | self.reset()
502 |
503 | def reset(self):
504 | self.val = 0
505 | self.avg = 0
506 | self.sum = 0
507 | self.count = 0
508 |
509 | def update(self, val, n=1):
510 | self.val = val
511 | self.sum += val * n
512 | self.count += n
513 | self.avg = self.sum / self.count
514 |
515 | def __str__(self):
516 | fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
517 | return fmtstr.format(**self.__dict__)
518 |
519 |
520 | class ProgressMeter(object):
521 | def __init__(self, num_batches, meters, prefix=""):
522 | self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
523 | self.meters = meters
524 | self.prefix = prefix
525 |
526 | def display(self, batch):
527 | entries = [self.prefix + self.batch_fmtstr.format(batch)]
528 | for meter in self.meters:
529 | print(meter)
530 | entries += [str(meter) for meter in self.meters]
531 | print('\t'.join(entries))
532 |
533 | def _get_batch_fmtstr(self, num_batches):
534 | num_digits = len(str(num_batches // 1))
535 | fmt = '{:' + str(num_digits) + 'd}'
536 | return '[' + fmt + '/' + fmt.format(num_batches) + ']'
537 |
538 |
539 | def adjust_learning_rate(optimizer, epoch, args):
540 | """Decay the learning rate based on schedule"""
541 | lr = args.lr
542 | if args.cos: # cosine lr schedule
543 | lr *= 0.5 * (1. + math.cos(math.pi * epoch / args.epochs))
544 | else: # stepwise lr schedule
545 | for milestone in args.schedule:
546 | lr *= 0.1 if epoch >= milestone else 1.
547 | for param_group in optimizer.param_groups:
548 | param_group['lr'] = lr
549 |
550 |
551 | def adjust_learning_rate_cls(optimizer, epoch, args):
552 | """Decay the learning rate based on schedule"""
553 | lr = args.lr
554 | for milestone in args.schedule:
555 | lr *= 0.1 if epoch >= milestone else 1.
556 | for param_group in optimizer.param_groups:
557 | param_group['lr'] = lr
558 |
559 |
560 | def accuracy(output, target, topk=(1,)):
561 | """Computes the accuracy over the k top predictions for the specified values of k"""
562 | with torch.no_grad():
563 | maxk = max(topk)
564 | batch_size = target.size(0)
565 |
566 | _, pred = output.topk(maxk, 1, True, True)
567 | pred = pred.t()
568 | correct = pred.eq(target.view(1, -1).expand_as(pred))
569 |
570 | res = []
571 | for k in topk:
572 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
573 | res.append(correct_k.mul_(100.0 / batch_size))
574 | return res
575 |
576 |
577 | if __name__ == '__main__':
578 | main()
--------------------------------------------------------------------------------