├── src ├── __init__.py ├── utils.py ├── classify_embeddings.py ├── main.py ├── conv_auto_encoder.py ├── ConvLSTMAE.py └── data_utils.py ├── report-ganand.pdf └── results ├── CNN-AE.png ├── RCNN-AE.png └── t-SNE ├── wafer_20.png ├── ECG5000_100.png ├── Strawberry_7.png ├── ProximalPhalanxTW_7.png ├── synthetic_control_20.png └── uWaveGestureLibrary_Z_100.png /src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /report-ganand.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gauravanand25/cnn-convlstm-time-series/HEAD/report-ganand.pdf -------------------------------------------------------------------------------- /results/CNN-AE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gauravanand25/cnn-convlstm-time-series/HEAD/results/CNN-AE.png -------------------------------------------------------------------------------- /results/RCNN-AE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gauravanand25/cnn-convlstm-time-series/HEAD/results/RCNN-AE.png -------------------------------------------------------------------------------- /results/t-SNE/wafer_20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gauravanand25/cnn-convlstm-time-series/HEAD/results/t-SNE/wafer_20.png -------------------------------------------------------------------------------- /results/t-SNE/ECG5000_100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gauravanand25/cnn-convlstm-time-series/HEAD/results/t-SNE/ECG5000_100.png -------------------------------------------------------------------------------- /results/t-SNE/Strawberry_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gauravanand25/cnn-convlstm-time-series/HEAD/results/t-SNE/Strawberry_7.png -------------------------------------------------------------------------------- /results/t-SNE/ProximalPhalanxTW_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gauravanand25/cnn-convlstm-time-series/HEAD/results/t-SNE/ProximalPhalanxTW_7.png -------------------------------------------------------------------------------- /results/t-SNE/synthetic_control_20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gauravanand25/cnn-convlstm-time-series/HEAD/results/t-SNE/synthetic_control_20.png -------------------------------------------------------------------------------- /results/t-SNE/uWaveGestureLibrary_Z_100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gauravanand25/cnn-convlstm-time-series/HEAD/results/t-SNE/uWaveGestureLibrary_Z_100.png -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | from matplotlib import pyplot 2 | import os.path 3 | 4 | 5 | def plot_tsne(X, y, f_name): 6 | fig = pyplot.figure() 7 | ax = fig.add_subplot(111) 8 | ax.scatter(X[:, 0], X[:, 1], c=y, cmap=pyplot.cm.viridis) 9 | # ax.xaxis.set_major_formatter(NullFormatter()) 10 | # ax.yaxis.set_major_formatter(NullFormatter()) 11 | ax.axis('tight') 12 | # pyplot.show(block=False) 13 | pyplot.savefig(f_name + '.png') 14 | 15 | 16 | def plot_losses(loss, val_loss): 17 | fig = pyplot.figure() 18 | pyplot.plot(loss) 19 | pyplot.plot(val_loss) 20 | pyplot.show(block=True) 21 | pyplot.savefig('loss.png') 22 | 23 | 24 | def args_to_string(args): 25 | all_args = [str(k) + str(v) for k, v in vars(args).items()] 26 | str_args = '_'.join(str(arg) for arg in all_args) 27 | return str_args 28 | 29 | 30 | def make_directory(dir_name): 31 | if not os.path.exists(dir_name): 32 | os.makedirs(dir_name) 33 | -------------------------------------------------------------------------------- /src/classify_embeddings.py: -------------------------------------------------------------------------------- 1 | from sklearn import svm 2 | from sklearn.manifold import TSNE 3 | 4 | import pickle 5 | 6 | from data_utils import * 7 | from utils import plot_tsne 8 | 9 | import ConvLSTMAE 10 | 11 | def tsne(embedded_X_train, y, f_name, perplexity=10): 12 | # tsne 13 | for perpex in [7, 20, 50, 100]: 14 | X_embedded = TSNE(n_components=2, perplexity=perpex).fit_transform(embedded_X_train) # perplexity 15 | plot_tsne(X_embedded, y, f_name + '_' + str(perpex)) 16 | 17 | 18 | def fit_conv_ae(model, out_dir): 19 | svm_data = MultipleDatasets(directory="./UCR_TS_Archive_2015", 20 | datasets=test_datasets, merge_train_test=False, data_length=512, val_percentage=0.2) 21 | svm_data.load_data() 22 | 23 | best_result = 0 24 | better_than_timenet = 0 25 | for dataset_name in test_datasets: 26 | data = svm_data.get_dataset(dataset_name) 27 | embedded_X_train = model.embeddings(data['X_train']) 28 | 29 | tsne(embedded_X_train, data['Y_train'], out_dir + '/' + dataset_name) 30 | 31 | # fit svm C, gamma, rbf 32 | clf = svm.SVC() 33 | clf.fit(embedded_X_train, data['Y_train']) 34 | 35 | # save 36 | output = open(out_dir + '/' + dataset_name + '.pkl', 'wb') 37 | pickle.dump(clf, output) 38 | 39 | y_val = data['Y_val'] 40 | X_val = model.embeddings(data['X_val']) 41 | 42 | y_pred = clf.predict(X_val) 43 | accuracy = np.mean(y_pred == y_val) 44 | print 1 - accuracy <= Best_Results[dataset_name], 1 - accuracy <= Timenet_Results[ 45 | dataset_name], dataset_name, 1 - accuracy, Timenet_Results[dataset_name], Best_Results[dataset_name] 46 | 47 | 48 | def fit_lstm(args, model, out_dir): 49 | svm_data = MultipleDatasets(directory="./UCR_TS_Archive_2015", 50 | datasets=test_datasets, merge_train_test=False, val_percentage=0.2) 51 | svm_data.load_data() 52 | 53 | best_result = 0 54 | better_than_timenet = 0 55 | for dataset_name in test_datasets: 56 | data = svm_data.get_dataset(dataset_name) 57 | embedded_X_train = ConvLSTMAE.embeddings(args, svm_data, data['X_train'], model) 58 | 59 | print 'svm getting', embedded_X_train.shape 60 | tsne(embedded_X_train, data['Y_train'], out_dir + '/' + dataset_name) 61 | 62 | # fit svm C, gamma, rbf 63 | clf = svm.SVC(C=1.0) 64 | clf.fit(embedded_X_train, data['Y_train']) 65 | 66 | # save 67 | output = open(out_dir + '/' + dataset_name + '.pkl', 'wb') 68 | pickle.dump(clf, output) 69 | 70 | y_val = data['Y_val'] 71 | X_val = ConvLSTMAE.embeddings(args, svm_data, data['X_val'], model) 72 | 73 | y_pred = clf.predict(X_val) 74 | accuracy = np.mean(y_pred == y_val) 75 | print 1 - accuracy <= Best_Results[dataset_name], 1 - accuracy <= Timenet_Results[ 76 | dataset_name], dataset_name, 1 - accuracy, Timenet_Results[dataset_name], Best_Results[dataset_name] 77 | -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | import os.path 4 | 5 | import ConvLSTMAE 6 | import conv_auto_encoder 7 | from conv_auto_encoder import ConvAutoEncoder 8 | 9 | import classify_embeddings as svm_classify 10 | from data_utils import MultipleDatasets, train_datasets, test_datasets, my_train_datasets, timenet_train_datasets, timenet_val_datasets 11 | from utils import args_to_string, make_directory 12 | 13 | def conv_lstm_ae(args, out_dir, str_args): 14 | 15 | ucr = MultipleDatasets(directory="./UCR_TS_Archive_2015", batch_size=args.batch_size, 16 | datasets=timenet_train_datasets, merge_train_test=True, val_percentage=0) 17 | ucr.load_data() 18 | 19 | val_ucr = MultipleDatasets(directory="./UCR_TS_Archive_2015", batch_size=args.batch_size, 20 | datasets=timenet_val_datasets, merge_train_test=True, val_percentage=0) 21 | val_ucr.load_data() 22 | 23 | conv_lstm_encoder, conv_lstm_decoder = ConvLSTMAE.fit(args, ucr, val_ucr) 24 | 25 | svm_classify.fit_lstm(args, conv_lstm_encoder, out_dir) 26 | 27 | def conv_ae(args, out_dir, str_args): 28 | model_file = './results/best_trained_model.pt' 29 | train = not os.path.exists(model_file) 30 | if train: 31 | conv_model = ConvAutoEncoder(hidden_channels=[32, 16, 4], kernel_sizes=[20, 11, 8], strides=[2, 4, 2], padding=[0, 0, 0]) 32 | else: 33 | conv_model = torch.load(model_file) 34 | 35 | conv_ucr = MultipleDatasets(directory="./UCR_TS_Archive_2015", batch_size=args.batch_size, 36 | datasets=timenet_train_datasets, data_length=512, merge_train_test=True, val_percentage=0) 37 | conv_ucr.load_data() 38 | conv_collated_data = conv_ucr.collate() 39 | 40 | val_data = MultipleDatasets(directory="./UCR_TS_Archive_2015", batch_size=args.batch_size, 41 | datasets=timenet_val_datasets, data_length=512, merge_train_test=True, 42 | val_percentage=0) 43 | val_data.load_data() 44 | val_data = val_data.collate() 45 | 46 | conv_model.train() #for dropout and batchnorm 47 | conv_auto_encoder.fit(args, conv_collated_data, val_data, conv_model) 48 | 49 | if train: # save model 50 | torch.save(conv_model, out_dir + '/' + str_args + '.pt') 51 | 52 | conv_model.eval() #for dropout and batchnorm 53 | svm_classify.fit_conv_ae(conv_model, out_dir) 54 | 55 | 56 | if __name__ == '__main__': 57 | parser = argparse.ArgumentParser(description='Conv AE') 58 | # parser.add_argument('data', metavar='DIR', help='path to dataset') 59 | parser.add_argument('--model', default='ConvAE', type=str, metavar='N', help='Which model "ConvAE" or "ConvLSTMAE"?') 60 | parser.add_argument('--epochs', default=6, type=int, metavar='N', help='number of total epochs to run') 61 | parser.add_argument('--filter_size', default=9, type=int, metavar='N', help='kernel size') 62 | parser.add_argument('--conv_len', default=32, type=int, metavar='N', help='conv len in LSTM') 63 | parser.add_argument('-b', '--batch-size', default=32, type=int, metavar='N', help='mini-batch size (default: 32)') 64 | parser.add_argument('--lr', '--learning-rate', default=0.001, type=float, metavar='LR', 65 | help='initial learning rate') 66 | args = parser.parse_args() 67 | 68 | str_args = args_to_string(args) 69 | out_dir = './search/' + str_args 70 | make_directory(out_dir) 71 | print str_args 72 | 73 | if args.model == 'ConvAE': 74 | conv_ae(args, out_dir, str_args) 75 | elif args.model == 'ConvLSTMAE': 76 | conv_lstm_ae(args, out_dir, str_args) 77 | -------------------------------------------------------------------------------- /src/conv_auto_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.optim as optim 3 | import torch.nn as nn 4 | import torch.utils.data as torch_utils 5 | from torch.autograd import Variable 6 | 7 | from utils import plot_losses 8 | 9 | class ConvAutoEncoder(nn.Module): 10 | def __init__(self, hidden_channels, kernel_sizes, strides, padding): 11 | super(ConvAutoEncoder, self).__init__() 12 | self.hidden_channels = hidden_channels 13 | self.kernel_sizes = kernel_sizes 14 | self.strides = strides 15 | self.padding = padding 16 | self.encode = nn.Sequential( 17 | # conv channel = 1, H = 1, W = 512 18 | nn.Conv1d(out_channels=self.hidden_channels[0], in_channels=1, kernel_size=self.kernel_sizes[0], 19 | stride=self.strides[0], padding=self.padding[0]), 20 | 21 | nn.Conv1d(out_channels=hidden_channels[1], in_channels=hidden_channels[0], kernel_size=self.kernel_sizes[1], 22 | stride=self.strides[1], padding=self.padding[1]), 23 | 24 | nn.Conv1d(out_channels=hidden_channels[2], in_channels=hidden_channels[1], kernel_size=self.kernel_sizes[2], 25 | stride=self.strides[2], padding=self.padding[2]), 26 | ) 27 | 28 | self.decode = nn.Sequential( 29 | # channel = 16, H = N/A, W = 16 30 | nn.ConvTranspose1d(out_channels=self.hidden_channels[1], in_channels=hidden_channels[2], 31 | kernel_size=self.kernel_sizes[2], stride=self.strides[2], padding=self.padding[2]), 32 | 33 | nn.ConvTranspose1d(out_channels=self.hidden_channels[0], in_channels=self.hidden_channels[1], 34 | kernel_size=self.kernel_sizes[1], stride=self.strides[1], 35 | padding=self.padding[1]), 36 | 37 | nn.ConvTranspose1d(out_channels=1, in_channels=self.hidden_channels[0], kernel_size=self.kernel_sizes[0], 38 | stride=self.strides[0], padding=self.padding[0]) 39 | ) 40 | 41 | def forward(self, x): 42 | x = self.encode(x) 43 | x = self.decode(x) 44 | return x 45 | 46 | def embeddings(self, data): 47 | N, L = data.shape 48 | batch = torch.from_numpy(data).contiguous() 49 | batch = Variable(batch.float(), requires_grad=False) 50 | X = self.encode(batch.view(N, 1, L)) 51 | X = X.data.numpy().reshape(N, -1) 52 | return X 53 | 54 | 55 | def fit(args, data, val_data, model): 56 | # dtype = torch.FloatTensor or #change to torch.cuda.FloatTensor to make it run on GPU 57 | 58 | dataloader = torch_utils.DataLoader(data, batch_size=args.batch_size, shuffle=True) 59 | val_dataloader = torch_utils.DataLoader(val_data, batch_size=args.batch_size, shuffle=True) 60 | 61 | criterion = nn.MSELoss() 62 | optimizer = optim.Adam(model.parameters(), lr=args.lr) 63 | 64 | plot_loss = [] 65 | plot_val_loss = [] 66 | 67 | for epoch in range(args.epochs): 68 | for i, batch in enumerate(dataloader): 69 | N, L = batch.shape 70 | batch = Variable(batch.float(), requires_grad=False) 71 | output = model(batch.view(N, 1, L)) 72 | loss = criterion(output, target=batch) 73 | if i % 100 == 0: 74 | val_batch = next(iter(val_dataloader)) 75 | val_batch = Variable(val_batch.float(), requires_grad=False) 76 | val_output = model(val_batch.view(N, 1, L)) 77 | val_loss = criterion(val_output, target=val_batch) 78 | plot_loss.append(loss.data.numpy()[0]) 79 | plot_val_loss.append(val_loss.data.numpy()[0]) 80 | print 'epoch', epoch, 'num', i, 'loss', loss.data.numpy()[0], 'val loss', val_loss.data.numpy()[0] 81 | 82 | optimizer.zero_grad() # zero the gradient buffers 83 | loss.backward() 84 | optimizer.step() # Does the update 85 | 86 | plot_losses(plot_loss, plot_val_loss) -------------------------------------------------------------------------------- /src/ConvLSTMAE.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | 5 | from utils import plot_losses 6 | 7 | 8 | class ConvLSTMCell(nn.Module): 9 | def __init__(self, input_channels, hidden_channels, kernel_size, bias=True): 10 | super(ConvLSTMCell, self).__init__() 11 | 12 | # assert hidden_channels % 2 == 0 13 | 14 | self.input_channels = input_channels 15 | self.hidden_channels = hidden_channels 16 | self.bias = bias 17 | self.kernel_size = kernel_size 18 | self.num_features = 4 19 | 20 | self.padding = (kernel_size - 1) / 2 21 | self.conv = nn.Conv1d(in_channels=self.input_channels + self.hidden_channels, 22 | out_channels=4 * self.hidden_channels, kernel_size=self.kernel_size, stride=1, 23 | padding=self.padding) 24 | 25 | def forward(self, input, h, c): 26 | combined = torch.cat((input, h), dim=1) 27 | A = self.conv(combined) 28 | (ai, af, ao, ag) = torch.split(A, A.size()[1] / self.num_features, dim=1) 29 | i = torch.sigmoid(ai) 30 | f = torch.sigmoid(af) 31 | o = torch.sigmoid(ao) 32 | g = torch.tanh(ag) 33 | 34 | new_c = f * c + i * g 35 | new_h = o * torch.tanh(new_c) 36 | return new_h, new_c 37 | 38 | @staticmethod 39 | def init_hidden(batch_size, hidden_channels, width): 40 | return (Variable(torch.zeros(batch_size, hidden_channels, width)), 41 | Variable(torch.zeros(batch_size, hidden_channels, width))) 42 | 43 | @staticmethod 44 | def init_input(batch_size, input_channels, width): 45 | return Variable(torch.zeros(batch_size, input_channels, width)) 46 | 47 | 48 | class EncoderConvLSTM(nn.Module): 49 | def __init__(self, input_channels, hidden_channels, filter_sizes): 50 | super(EncoderConvLSTM, self).__init__() 51 | self.hidden_channels = hidden_channels 52 | self.input_channels = input_channels 53 | self.filter_sizes = filter_sizes 54 | self.lstm1 = ConvLSTMCell(input_channels=self.input_channels, hidden_channels=hidden_channels[0], 55 | kernel_size=self.filter_sizes[0]) 56 | self.lstm2 = ConvLSTMCell(input_channels=hidden_channels[0], hidden_channels=hidden_channels[1], 57 | kernel_size=self.filter_sizes[1]) 58 | self.lstm3 = ConvLSTMCell(input_channels=hidden_channels[1], hidden_channels=hidden_channels[2], 59 | kernel_size=self.filter_sizes[2]) 60 | 61 | def forward(self, input, time_step, h1, c1, h2, c2, h3, c3): 62 | if time_step == 0: 63 | bsize, _, width = input.size() 64 | (h1, c1) = ConvLSTMCell.init_hidden(bsize, self.hidden_channels[0], width) 65 | h1, c1 = self.lstm1(input, h1, c1) 66 | 67 | if time_step == 0: 68 | bsize, _, width = h1.size() 69 | (h2, c2) = ConvLSTMCell.init_hidden(bsize, self.hidden_channels[1], width) 70 | h2, c2 = self.lstm2(h1, h2, c2) 71 | 72 | if time_step == 0: 73 | bsize, _, width = h2.size() 74 | (h3, c3) = ConvLSTMCell.init_hidden(bsize, self.hidden_channels[2], width) 75 | h3, c3 = self.lstm3(h2, h3, c3) 76 | return h1, c1, h2, c2, h3, c3 77 | 78 | 79 | class DecoderConvLSTM(nn.Module): 80 | def __init__(self, input_channels, hidden_channels, filter_sizes): 81 | super(DecoderConvLSTM, self).__init__() 82 | self.input_channels = input_channels 83 | self.hidden_channels = hidden_channels 84 | self.filter_sizes = filter_sizes 85 | self.lstm1 = ConvLSTMCell(input_channels=self.input_channels, hidden_channels=self.hidden_channels[0], 86 | kernel_size=self.filter_sizes[0]) 87 | self.lstm2 = ConvLSTMCell(input_channels=self.hidden_channels[0], hidden_channels=self.hidden_channels[1], 88 | kernel_size=self.filter_sizes[1]) 89 | self.lstm3 = ConvLSTMCell(input_channels=self.hidden_channels[1], hidden_channels=self.hidden_channels[2], 90 | kernel_size=self.filter_sizes[2]) 91 | 92 | def forward(self, input, time_step, h1, c1, h2, c2, h3, c3): 93 | if time_step == 0: 94 | bsize, _, width = input.size() 95 | (h1, c1) = ConvLSTMCell.init_hidden(bsize, self.hidden_channels[0], width) 96 | else: 97 | bsize, _, width = h1.size() 98 | input = ConvLSTMCell.init_input(bsize, self.input_channels, width) 99 | 100 | h1, c1 = self.lstm1(input, h1, c1) 101 | 102 | if time_step == 0: 103 | bsize, _, width = h1.size() 104 | (h2, c2) = ConvLSTMCell.init_hidden(bsize, self.hidden_channels[1], width) 105 | h2, c2 = self.lstm2(h1, h2, c2) 106 | 107 | if time_step == 0: 108 | bsize, _, width = h2.size() 109 | (h3, c3) = ConvLSTMCell.init_hidden(bsize, self.hidden_channels[2], width) 110 | h3, c3 = self.lstm3(h2, h3, c3) 111 | 112 | return h1, c1, h2, c2, h3, c3 113 | 114 | 115 | def fit(args, data_loader, val_data_loader): 116 | criterion = nn.MSELoss() 117 | 118 | encoder = EncoderConvLSTM(input_channels=1, hidden_channels=[36, 12, 4], filter_sizes=[21, 11, 7]) 119 | decoder = DecoderConvLSTM(input_channels=4, hidden_channels=[12, 36, 1], filter_sizes=[7, 11, 21]) 120 | 121 | encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=args.lr) 122 | decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=args.lr) 123 | 124 | plot_loss = [] 125 | plot_val_loss = [] 126 | 127 | iterations = data_loader.get_length() / args.batch_size 128 | for epoch in range(args.epochs): 129 | for itr in range(iterations): 130 | batch, _ = data_loader.get_batch() 131 | val_batch, _ = val_data_loader.get_batch() # for validation loss 132 | 133 | len = args.conv_len # args.filter_size 134 | batch = data_loader.fix_batch_len(batch, len, zeros=True) 135 | val_batch = val_data_loader.fix_batch_len(val_batch, len, zeros=True) 136 | N, L = batch.shape 137 | 138 | inputs = [] 139 | loss = 0 140 | h1_en, c1_en, h2_en, c2_en, h3_en, c3_en = None, None, None, None, None, None 141 | for i in range(0, L, len): 142 | batch_curr_time = Variable(torch.FloatTensor(batch[:, i:i + len]), requires_grad=False) 143 | h1_en, c1_en, h2_en, c2_en, h3_en, c3_en = encoder(batch_curr_time.view(N, 1, len), i / len, h1_en, 144 | c1_en, h2_en, c2_en, h3_en, c3_en) 145 | inputs.append(batch_curr_time) 146 | 147 | h1_dec, c1_dec, h2_dec, c2_dec, h3_dec, c3_dec = None, None, None, None, None, None 148 | # dec_input = h_en.clone() 149 | for i in range(0, L, len): 150 | h1_dec, c1_dec, h2_dec, c2_dec, h3_dec, c3_dec = decoder(h3_en if i == 0 else None, i / len, h1_dec, 151 | c1_dec, h2_dec, c2_dec, h3_dec, c3_dec) 152 | loss += criterion(h3_dec, target=inputs[L / len - 1 - i / len]) 153 | 154 | loss /= L / 100.0 # normalize loss 155 | 156 | if itr % 100 == 0: 157 | N_val, L_val = val_batch.shape 158 | val_inputs = [] 159 | val_loss = 0 160 | val_h1_en, val_c1_en, val_h2_en, val_c2_en, val_h3_en, val_c3_en = None, None, None, None, None, None 161 | for j in range(0, L_val, len): 162 | val_batch_curr_time = Variable(torch.FloatTensor(val_batch[:, j:j + len]), requires_grad=False) 163 | val_h1_en, val_c1_en, val_h2_en, val_c2_en, val_h3_en, val_c3_en = encoder( 164 | val_batch_curr_time.view(N, 1, len), j / len, 165 | val_h1_en, val_c1_en, val_h2_en, 166 | val_c2_en, val_h3_en, val_c3_en) 167 | val_inputs.append(val_batch_curr_time) 168 | 169 | val_h1_dec, val_c1_dec, val_h2_dec, val_c2_dec, val_h3_dec, val_c3_dec = None, None, None, None, None, None 170 | # dec_input = h_en.clone() 171 | for j in range(0, L_val, len): 172 | val_h1_dec, val_c1_dec, val_h2_dec, val_c2_dec, val_h3_dec, val_c3_dec = decoder( 173 | val_h3_en if j == 0 else None, j / len, 174 | val_h1_dec, val_c1_dec, val_h2_dec, 175 | val_c2_dec, val_h3_dec, val_c3_dec) 176 | val_loss += criterion(val_h3_dec, target=val_inputs[L_val / len - 1 - j / len]) 177 | 178 | val_loss /= L_val / 100.0 179 | plot_loss.append(loss.data.numpy()[0]) 180 | plot_val_loss.append(val_loss.data.numpy()[0]) 181 | print 'epoch', epoch, 'num', itr, 'loss', loss.data.numpy()[0], 'val loss', val_loss.data.numpy()[0] 182 | 183 | encoder_optimizer.zero_grad() # zero the gradient buffers 184 | decoder_optimizer.zero_grad() 185 | 186 | loss.backward() 187 | 188 | encoder_optimizer.step() # Does the update 189 | decoder_optimizer.step() 190 | 191 | plot_losses(plot_loss, plot_val_loss) 192 | return encoder, decoder 193 | 194 | 195 | def embeddings(args, data_util, data, encoder): 196 | len = args.conv_len 197 | data = data_util.fix_batch_len(data, len, zeros=False) 198 | N, L = data.shape 199 | 200 | h1_en, c1_en, h2_en, c2_en, h3_en, c3_en = None, None, None, None, None, None 201 | for i in range(0, L, len): 202 | data_curr_time = Variable(torch.FloatTensor(data[:, i:i + len]), requires_grad=False) 203 | h1_en, c1_en, h2_en, c2_en, h3_en, c3_en = encoder(data_curr_time.view(N, 1, len), i / len, h1_en, c1_en, h2_en, 204 | c2_en, h3_en, c3_en) 205 | 206 | return h3_en.data.numpy().reshape(N, -1) 207 | -------------------------------------------------------------------------------- /src/data_utils.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | # import os 5 | # datasets = filter(lambda x: os.path.isdir(x), os.listdir('.')) 6 | # missing data sets ['Industrial Multivariate'] 7 | 8 | all_datasets = ['50words', 'Adiac', 'ArrowHead', 'Beef', 'BeetleFly', 'BirdChicken', 'CBF', 'Car', 9 | 'ChlorineConcentration', 'CinC_ECG_torso', 'Coffee', 'Computers', 'Cricket_X', 'Cricket_Y', 'Cricket_Z', 10 | 'DiatomSizeReduction', 'DistalPhalanxOutlineAgeGroup', 'DistalPhalanxOutlineCorrect', 'DistalPhalanxTW', 11 | 'ECG200', 'ECG5000', 'ECGFiveDays', 'Earthquakes', 'ElectricDevices', 'FISH', 'FaceAll', 'FaceFour', 12 | 'FacesUCR', 'FordA', 'FordB', 'Gun_Point', 'Ham', 'HandOutlines', 'Haptics', 'Herring', 'InlineSkate', 13 | 'InsectWingbeatSound', 'ItalyPowerDemand', 'LargeKitchenAppliances', 'Lighting2', 'Lighting7', 'MALLAT', 14 | 'Meat', 'MedicalImages', 'MiddlePhalanxOutlineAgeGroup', 'MiddlePhalanxOutlineCorrect', 15 | 'MiddlePhalanxTW', 'MoteStrain', 'NonInvasiveFatalECG_Thorax1', 'NonInvasiveFatalECG_Thorax2', 16 | 'OSULeaf', 'OliveOil', 'PhalangesOutlinesCorrect', 'Phoneme', 'Plane', 'ProximalPhalanxOutlineAgeGroup', 17 | 'ProximalPhalanxOutlineCorrect', 'ProximalPhalanxTW', 'RefrigerationDevices', 'ScreenType', 18 | 'ShapeletSim', 'ShapesAll', 'SmallKitchenAppliances', 'SonyAIBORobotSurface', 'SonyAIBORobotSurfaceII', 19 | 'StarLightCurves', 'Strawberry', 'SwedishLeaf', 'Symbols', 'ToeSegmentation1', 'ToeSegmentation2', 20 | 'Trace', 'TwoLeadECG', 'Two_Patterns', 'UWaveGestureLibraryAll', 'Wine', 'WordsSynonyms', 'Worms', 21 | 'WormsTwoClass', 'synthetic_control', 'uWaveGestureLibrary_X', 'uWaveGestureLibrary_Y', 22 | 'uWaveGestureLibrary_Z', 'wafer', 'yoga'] 23 | 24 | timenet_train_datasets = ['ArrowHead', 'ItalyPowerDemand', 'SonyAIBORobotSurface', 'SonyAIBORobotSurfaceII', 'TwoLeadECG', 25 | 'FacesUCR', 'Plane', 'Gun_Point', 'WordsSynonyms', 'ToeSegmentation1', 'ToeSegmentation2', 26 | 'Lighting7', 'DiatomSizeReduction', 'OSULeaf', 'Ham', 'FISH', 'ShapeletSim', 'ShapesAll'] 27 | 28 | timenet_val_datasets = ['MoteStrain', 'CBF', 'Trace', 'Symbols', 'Herring', 'Earthquakes'] 29 | 30 | train_datasets = ['50words', 'ArrowHead', 'Beef', 'BeetleFly', 'BirdChicken', 'CBF', 'Car', 'CinC_ECG_torso', 'Coffee', 31 | 'Computers', 'DiatomSizeReduction', 'ECG200', 'Earthquakes', 'FISH', 'FaceAll', 'FaceFour', 32 | 'FacesUCR', 'Gun_Point', 'Ham', 'HandOutlines', 'Haptics', 'Herring', 'InlineSkate', 33 | 'InsectWingbeatSound', 34 | 'ItalyPowerDemand', 'LargeKitchenAppliances', 'Lighting2', 'Lighting7', 'MALLAT', 'Meat', 35 | 'MoteStrain', 'NonInvasiveFatalECG_Thorax1', 'NonInvasiveFatalECG_Thorax2', 'OSULeaf', 'OliveOil', 36 | 'Phoneme', 37 | 'Plane', 'RefrigerationDevices', 'ScreenType', 'ShapeletSim', 'ShapesAll', 'SmallKitchenAppliances', 38 | 'SonyAIBORobotSurface', 'SonyAIBORobotSurfaceII', 'StarLightCurves', 39 | 'Symbols', 'ToeSegmentation1', 'ToeSegmentation2', 'Trace', 'TwoLeadECG', 'UWaveGestureLibraryAll', 40 | 'Wine', 'WordsSynonyms', 'Worms', 'WormsTwoClass'] 41 | 42 | test_datasets = ['Adiac', 'ChlorineConcentration', 'Cricket_X', 'Cricket_Y', 'Cricket_Z', 43 | 'DistalPhalanxOutlineAgeGroup', 'DistalPhalanxOutlineCorrect', 'DistalPhalanxTW', 'ECG5000', 44 | 'ECGFiveDays', 'ElectricDevices', 'FordA', 'FordB', 'MedicalImages', 'MiddlePhalanxOutlineAgeGroup', 45 | 'MiddlePhalanxOutlineCorrect', 'MiddlePhalanxTW', 'PhalangesOutlinesCorrect', 46 | 'ProximalPhalanxOutlineAgeGroup', 'ProximalPhalanxOutlineCorrect', 'ProximalPhalanxTW', 'Strawberry', 47 | 'SwedishLeaf', 'Two_Patterns', 'wafer', 'yoga', 'synthetic_control', 'uWaveGestureLibrary_X', 48 | 'uWaveGestureLibrary_Y', 'uWaveGestureLibrary_Z'] 49 | 50 | my_train_datasets = ['DistalPhalanxOutlineAgeGroup', 'DistalPhalanxOutlineCorrect', 'DistalPhalanxTW', 51 | 'MiddlePhalanxOutlineAgeGroup', 'MiddlePhalanxOutlineCorrect', 'MiddlePhalanxTW', 52 | 'PhalangesOutlinesCorrect'] 53 | my_test_datasets = ['ProximalPhalanxOutlineAgeGroup', 'ProximalPhalanxOutlineCorrect', 'ProximalPhalanxTW'] 54 | 55 | my_dataset = ['ChlorineConcentration', 'Cricket_X'] 56 | 57 | Timenet_Results = {'Adiac': 0.322, 'ChlorineConcentration': 0.269, 'Cricket_X': 0.300, 'Cricket_Y': 0.338, 58 | 'Cricket_Z': 0.308, 'DistalPhalanxOutlineAgeGroup': 0.223, 'DistalPhalanxOutlineCorrect': 0.188, 59 | 'DistalPhalanxTW': 0.208, 'ECG5000': 0.069, 'ECGFiveDays': 0.074, 'ElectricDevices': 0.267, 60 | 'FordA': 0.219, 'FordB': 0.263, 'MedicalImages': 0.250, 'MiddlePhalanxOutlineAgeGroup': 0.210, 61 | 'MiddlePhalanxOutlineCorrect': 0.270, 'MiddlePhalanxTW': 0.363, 'PhalangesOutlinesCorrect': 0.207, 62 | 'ProximalPhalanxOutlineAgeGroup': 0.146, 'ProximalPhalanxOutlineCorrect': 0.175, 63 | 'ProximalPhalanxTW': 0.195, 'Strawberry': 0.062, 'SwedishLeaf': 0.102, 'Two_Patterns': 0.0, 64 | 'wafer': 0.005, 'yoga': 0.160, 'synthetic_control': 0.013, 'uWaveGestureLibrary_X': 0.214, 65 | 'uWaveGestureLibrary_Y': 0.311, 'uWaveGestureLibrary_Z': 0.281} 66 | 67 | Best_Results = {'Adiac': 0.322, 'ChlorineConcentration': 0.269, 'Cricket_X': 0.236, 'Cricket_Y': 0.197, 68 | 'Cricket_Z': 0.180, 'DistalPhalanxOutlineAgeGroup': 0.160, 'DistalPhalanxOutlineCorrect': 0.187, 69 | 'DistalPhalanxTW': 0.208, 'ECG5000': 0.066, 'ECGFiveDays': 0.063, 'ElectricDevices': 0.267, 70 | 'FordA': 0.219, 'FordB': 0.263, 'MedicalImages': 0.247, 'MiddlePhalanxOutlineAgeGroup': 0.210, 71 | 'MiddlePhalanxOutlineCorrect': 0.270, 'MiddlePhalanxTW': 0.363, 'PhalangesOutlinesCorrect': 0.207, 72 | 'ProximalPhalanxOutlineAgeGroup': 0.137, 'ProximalPhalanxOutlineCorrect': 0.175, 73 | 'ProximalPhalanxTW': 0.188, 'Strawberry': 0.062, 'SwedishLeaf': 0.099, 'Two_Patterns': 0.0, 74 | 'wafer': 0.005, 'yoga': 0.155, 'synthetic_control': 0.013, 'uWaveGestureLibrary_X': 0.211, 75 | 'uWaveGestureLibrary_Y': 0.291, 'uWaveGestureLibrary_Z': 0.280} 76 | 77 | 78 | class MultipleDatasets(object): 79 | def __init__(self, directory, datasets=[], batch_size=1, merge_train_test=False, length_constraint=512, data_length=0, val_percentage=0): 80 | self.data = {} 81 | self.directory = directory 82 | self.datasets = datasets 83 | self.batch_size = batch_size 84 | self.merge_train_test = merge_train_test 85 | 86 | self.length_constraint = length_constraint 87 | 88 | self.force_length = False 89 | self.desired_length = data_length 90 | if self.desired_length > 0: 91 | self.force_length = True 92 | 93 | self.total_length = 35973 94 | 95 | self.need_validation_data = False 96 | self.val_percentage = val_percentage 97 | if self.val_percentage > 0: 98 | self.need_validation_data = True 99 | 100 | def load_data(self, verbose=False): 101 | """Input: 102 | dir: location of the UCR archive 103 | ratio: ratio to split training and testset 104 | dataset: name of the dataset in the UCR archive""" 105 | data = {} 106 | remove_datasets = [] 107 | for dataset_name in self.datasets: 108 | datadir = self.directory + '/' + dataset_name + '/' + dataset_name 109 | train = np.loadtxt(datadir + '_TRAIN', delimiter=',') 110 | test = np.loadtxt(datadir + '_TEST', delimiter=',') 111 | 112 | if train.shape[1] >= self.length_constraint: 113 | remove_datasets.append(dataset_name) 114 | continue 115 | 116 | dataset_data = {} 117 | dataset_data['X_train'] = train[:, 1:] 118 | dataset_data['X_test'] = test[:, 1:] 119 | 120 | dataset_data['Y_train'] = train[:, 0] 121 | dataset_data['Y_test'] = test[:, 0] 122 | 123 | mean_var_data = np.vstack((dataset_data['X_train'], dataset_data['X_test'])) 124 | mean_data = np.mean(mean_var_data, axis=0) 125 | var_data = np.std(mean_var_data, axis=0) 126 | 127 | dataset_data['X_train'] -= mean_data 128 | dataset_data['X_test'] -= mean_data 129 | 130 | dataset_data['X_train'] /= var_data 131 | dataset_data['X_test'] /= var_data 132 | 133 | data[dataset_name] = dataset_data 134 | if verbose: 135 | print dataset_name, ', N = ', train.shape[0], ', L = ', train.shape[1], ', min_label = ', np.min( 136 | train[:, 0]), ', max_label = ', np.max(train[:, 0]) 137 | 138 | for x in remove_datasets: #remove incompatible datasets 139 | self.datasets.remove(x) 140 | 141 | self.data = data 142 | 143 | if self.force_length: 144 | self.make_same_size(verbose) 145 | 146 | if self.merge_train_test: 147 | self.combine_train_test(verbose) 148 | 149 | if self.need_validation_data: 150 | self.make_validation_set(verbose) 151 | 152 | def make_validation_set(self, verbose): 153 | for dataset_name in self.datasets: 154 | dataset = self.data[dataset_name] 155 | num_train = dataset['X_train'].shape[0] 156 | mask = np.random.choice([0, 1], size=num_train, p=[self.val_percentage , 1-self.val_percentage]) 157 | dataset['X_val'] = dataset['X_train'][mask == 0] 158 | dataset['X_train'] = dataset['X_train'][mask == 1] 159 | 160 | dataset['Y_val'] = dataset['Y_train'][mask == 0] 161 | dataset['Y_train'] = dataset['Y_train'][mask == 1] 162 | 163 | assert dataset['X_val'].shape[0] + dataset['X_train'].shape[0] == num_train, \ 164 | "error in splitting validation set" 165 | 166 | def combine_train_test(self, verbose): 167 | """ 168 | Used to train auto-encoder and those datasets are used only for training purposes, 169 | hence collating train and test data. 170 | :return: 171 | """ 172 | for dataset_name in self.data: 173 | dataset = self.data[dataset_name] 174 | dataset['X_train'] = np.vstack((dataset['X_train'], dataset['X_test'])) 175 | dataset['Y_train'] = np.concatenate((dataset['Y_train'], dataset['Y_test']), axis=0) 176 | dataset['X_test'] = {} 177 | dataset['Y_test'] = {} 178 | 179 | def make_same_size(self, verbose=False): 180 | for dataset_name in self.data: 181 | dataset = self.data[dataset_name] 182 | for key in ['X_train', 'X_test']: 183 | curr_len = dataset[key].shape[1] 184 | rep = self.desired_length / curr_len + 1 185 | dataset[key] = np.tile(dataset[key], (1, rep)) 186 | dataset[key] = np.delete(dataset[key], range(self.desired_length, dataset[key].shape[1]), axis=1) 187 | 188 | assert dataset[key].shape[1] == self.desired_length, 'error in make_same_size' 189 | if verbose: 190 | print dataset[key].shape 191 | 192 | def fix_batch_len(self, data, divby_len, zeros=False, verbose=False): 193 | 194 | curr_len = data.shape[1] 195 | add_len = divby_len - curr_len%divby_len 196 | make_len = curr_len + add_len 197 | if not zeros: 198 | rep = make_len / curr_len + 1 199 | data = np.tile(data, (1, rep)) 200 | data = np.delete(data, range(make_len, data.shape[1]), axis=1) 201 | else: 202 | data = np.hstack((data, np.zeros(shape=(data.shape[0], add_len)))) 203 | assert data.shape[1] == make_len, 'error in fix_batch_len' 204 | return data 205 | 206 | def get_dataset(self, dataset): 207 | return self.data[dataset] 208 | 209 | def collate(self): 210 | assert self.force_length == True, 'Length not equal' 211 | ret = np.zeros(shape=(1, self.desired_length)) 212 | for dataset in self.data: 213 | ret = np.append(ret, self.data[dataset]['X_train'], axis=0) 214 | return ret[1:, ] 215 | 216 | def get_length(self): 217 | return self.total_length 218 | 219 | def get_batch(self): 220 | # Make a minibatch of training data 221 | dataset_name = np.random.choice(self.datasets) 222 | # print dataset_name, self.data[dataset_name]['X_train'].shape 223 | num_train = self.data[dataset_name]['X_train'].shape[0] 224 | batch_mask = np.random.choice(num_train, self.batch_size) 225 | X_batch = self.data[dataset_name]['X_train'][batch_mask] 226 | y_batch = self.data[dataset_name]['Y_train'][batch_mask] 227 | return X_batch, y_batch 228 | 229 | 230 | def my_plot(data): 231 | plt.figure() 232 | plt.imshow(data) 233 | plt.show() 234 | 235 | 236 | def ts_plot(data): 237 | x = data['X_train'] 238 | if isinstance(x, dict): 239 | for key, value in x.iteritems(): 240 | my_plot(x[key]) 241 | else: 242 | my_plot(x) 243 | 244 | 245 | if __name__ == "__main__": 246 | ucr = MultipleDatasets(directory="/home/gauravanand25/Dropbox/umass/682-nn/Project/UCR_TS_Archive_2015", 247 | datasets=my_train_datasets, data_length=512, merge_train_test=True, val_percentage=0) 248 | ucr.load_data() 249 | data = ucr.collate() 250 | for dataset in my_train_datasets: 251 | ts_plot(ucr.get_dataset(dataset)) 252 | --------------------------------------------------------------------------------