├── .gitignore ├── .idea ├── .gitignore ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml ├── sequential_model_pytorch_rnn_1dcnn.iml └── vcs.xml ├── README.md ├── dataset.py ├── dataset2.py ├── dataset_list.py ├── main2.py ├── main_softmax.py ├── model.py ├── plot_log.py ├── test.py ├── train.py └── train_2dcnn.py /.gitignore: -------------------------------------------------------------------------------- 1 | outputs*/ 2 | eval/ 3 | models/ 4 | __pycache__/ 5 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Editor-based HTTP Client requests 5 | /httpRequests/ 6 | # Datasource local storage ignored files 7 | /dataSources/ 8 | /dataSources.local.xml 9 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/sequential_model_pytorch_rnn_1dcnn.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Sequential Model Pytorch 2 | 3 | The repo provides library supporting: 4 | - RNN, LSTM, GRU, bi-GRU, 1d-CNN, RCNN, etc. 5 | - Adaptable to variable length input sequence. 6 | 7 | ## Quick start 8 | 9 | Choose the type of neural networks in `train.py` 10 | 11 | To train: 12 | 13 | ```angular2html 14 | python train.py 15 | ``` 16 | 17 | To test: 18 | 19 | ```angular2html 20 | python test.py 21 | ``` -------------------------------------------------------------------------------- /dataset.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch.utils.data as data 6 | 7 | import os 8 | import os.path 9 | import re 10 | import torch 11 | import pickle 12 | 13 | 14 | def default_loader(path): 15 | with open(path, 'rb') as fp: 16 | lm_list = pickle.load(fp) 17 | fp.close() 18 | return lm_list 19 | 20 | def default_list_reader(fileList): 21 | lmList = [] 22 | with open(fileList, 'r') as file: 23 | for line in file.readlines(): 24 | lmPath=line.strip()[:-2].strip() 25 | label=line.strip()[-1] 26 | 27 | lmList.append((lmPath, int(label))) 28 | return lmList 29 | 30 | 31 | class LandmarkList(data.Dataset): 32 | def __init__(self, root, fileList, transform=None, list_reader=default_list_reader, loader=default_loader): 33 | self.root = root 34 | self.lmList = list_reader(fileList) 35 | self.transform = transform 36 | self.loader = loader 37 | 38 | def __getitem__(self, index): 39 | lmPath, target = self.lmList[index] 40 | lm = self.loader(os.path.join(self.root, lmPath)) 41 | if self.transform is not None: 42 | lm = self.transform(lm) 43 | return lm, target, lm.shape[0] 44 | 45 | def __len__(self): 46 | return len(self.lmList) 47 | 48 | 49 | class LandmarkListTest(data.Dataset): 50 | def __init__(self, root, fileList, transform=None, list_reader=default_list_reader, loader=default_loader): 51 | self.root = root 52 | self.lmList = list_reader(fileList) 53 | self.transform = transform 54 | self.loader = loader 55 | 56 | def __getitem__(self, index): 57 | lmPath, target = self.lmList[index] 58 | lm = self.loader(os.path.join(self.root, lmPath)) 59 | if self.transform is not None: 60 | lm = self.transform(lm) 61 | return lm, target, lm.shape[0], lmPath 62 | 63 | def __len__(self): 64 | return len(self.lmList) -------------------------------------------------------------------------------- /dataset2.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch.utils.data as data 6 | 7 | import os 8 | import os.path 9 | import re 10 | import torch 11 | import pickle 12 | 13 | 14 | def default_loader(path): 15 | with open(path, 'rb') as fp: 16 | lm_list = pickle.load(fp) 17 | fp.close() 18 | return lm_list 19 | 20 | def default_list_reader(fileList): 21 | lmList = [] 22 | with open(fileList, 'r') as file: 23 | for line in file.readlines(): 24 | lmPath=line.strip()[:-2].strip() 25 | label=line.strip()[-1] 26 | 27 | lmList.append((lmPath, int(label))) 28 | return lmList 29 | 30 | 31 | class LandmarkList(data.Dataset): 32 | def __init__(self, root, fileList, transform=None, list_reader=default_list_reader, loader=default_loader): 33 | self.root = root 34 | self.lmList = list_reader(fileList) 35 | self.transform = transform 36 | self.loader = loader 37 | 38 | def __getitem__(self, index): 39 | lmPath, target = self.lmList[index] 40 | lm = self.loader(os.path.join(self.root, lmPath)) 41 | if self.transform is not None: 42 | lm = self.transform(lm) 43 | return lm, target, lm.shape[0] 44 | 45 | def __len__(self): 46 | return len(self.lmList) -------------------------------------------------------------------------------- /dataset_list.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch.utils.data as data 6 | 7 | import os 8 | import os.path 9 | import re 10 | import torch 11 | import pickle 12 | 13 | 14 | def default_loader(path): 15 | with open(path, 'rb') as fp: 16 | lm_list = pickle.load(fp) 17 | fp.close() 18 | return lm_list 19 | 20 | def default_list_reader(fileList): 21 | lmList = [] 22 | with open(fileList, 'r') as file: 23 | for line in file.readlines(): 24 | lmPath=line.strip()[:-2].strip() 25 | label=line.strip()[-1] 26 | 27 | lmList.append((lmPath, int(label))) 28 | return lmList 29 | 30 | 31 | class LandmarkList(data.Dataset): 32 | def __init__(self, root, fileList, transform=None, list_reader=default_list_reader, loader=default_loader): 33 | self.root = root 34 | self.lmList = list_reader(fileList) 35 | self.transform = transform 36 | self.loader = loader 37 | 38 | def __getitem__(self, index): 39 | lmPath, target = self.lmList[index] 40 | lm = self.loader(os.path.join(self.root, lmPath)) 41 | if self.transform is not None: 42 | lm = self.transform(lm) 43 | return lm, target, len(lm) 44 | 45 | def __len__(self): 46 | return len(self.lmList) -------------------------------------------------------------------------------- /main2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.optim as optim 5 | from dataset2 import LandmarkList 6 | from torch.utils import data 7 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence 8 | import argparse 9 | 10 | # parser.add_argument('--root_path', type=str, default='/home/guosheng/Liveness/Code/FaceFlashing/Data/', 11 | # metavar='H', 12 | # help='Dir Head') 13 | # parser.add_argument('--trainFile', type=str, default='TrainList_4sources_13082019.txt', metavar='TRF', help='training file name') 14 | 15 | 16 | EMBEDDING_DIM = 68*2 17 | HIDDEN_DIM = 68*4 18 | MAX_EPOCH = 10 19 | DEVICES = 2 20 | torch.cuda.set_device(DEVICES) 21 | 22 | 23 | def pad_collate(batch): 24 | batch.sort(key=lambda x: x[2], reverse=True) 25 | lms, tgs, lens = zip(*batch) 26 | new_lms = torch.zeros((len(lms), lms[0].shape[0], lms[0].shape[1])) # batch x seq x feature(136) 27 | new_lms[0] = lms[0] 28 | for i in range(1, len(lms)): 29 | # import pdb; 30 | # pdb.set_trace() 31 | new_lms[i] = torch.cat((lms[i], torch.zeros((lens[0] - lens[i]),136)), 0) 32 | return new_lms, tgs, lens 33 | 34 | 35 | class LSTM_Classifier(nn.Module): 36 | 37 | def __init__(self, embedding_dim, hidden_dim, target_size=1): 38 | super(LSTM_Classifier, self).__init__() 39 | self.hidden_dim = hidden_dim 40 | 41 | # The LSTM takes word embeddings as inputs, and outputs hidden states 42 | # with dimensionality hidden_dim. 43 | self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=2) 44 | 45 | # The linear layer that maps from hidden state space to tag space 46 | self.lc = nn.Linear(hidden_dim, target_size) 47 | 48 | def forward(self, landmarks, lengths): 49 | # pack_padded_sequence so that padded items in the sequence won't be shown to the LSTM 50 | packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True) 51 | _, (ht, _) = self.lstm(packed_input) 52 | # import pdb; 53 | # pdb.set_trace() 54 | # packed_output, (ht, ct) = self.lstm(packed_input) # ht is the final output of each batch! ht (1, 4, 272) can be found in output[:,input_sizes-1,:] 55 | # output, input_sizes = pad_packed_sequence(packed_output, batch_first=True) 56 | ''' 57 | (Pdb) output[:,input_sizes-1,:] 58 | tensor([[[-0.0176, 0.1605, 0.1339, ..., -0.0914, 0.2951, -0.0065], 59 | [-0.0225, 0.1589, 0.1340, ..., -0.0925, 0.2950, -0.0095], 60 | [-0.0253, 0.1574, 0.1431, ..., -0.0865, 0.3022, -0.0119], 61 | [-0.0303, 0.1515, 0.1422, ..., -0.1094, 0.2976, -0.0032]], 62 | 63 | [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 64 | [-0.0165, 0.1666, 0.1344, ..., -0.0698, 0.2945, -0.0163], 65 | [-0.0235, 0.1697, 0.1479, ..., -0.0657, 0.3001, -0.0195], 66 | [-0.0235, 0.1734, 0.1515, ..., -0.0608, 0.3029, -0.0201]], 67 | 68 | [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 69 | [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 70 | [-0.0492, 0.1666, 0.1444, ..., -0.0749, 0.2816, -0.0188], 71 | [-0.0490, 0.1542, 0.1449, ..., -0.0865, 0.2821, -0.0205]], 72 | 73 | [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 74 | [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 75 | [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 76 | [-0.0460, 0.1522, 0.1381, ..., -0.0959, 0.2843, -0.0071]]], 77 | device='cuda:2', grad_fn=) 78 | (Pdb) ht.shape 79 | torch.Size([1, 4, 272]) 80 | (Pdb) ht 81 | tensor([[[-0.0176, 0.1605, 0.1339, ..., -0.0914, 0.2951, -0.0065], 82 | [-0.0165, 0.1666, 0.1344, ..., -0.0698, 0.2945, -0.0163], 83 | [-0.0492, 0.1666, 0.1444, ..., -0.0749, 0.2816, -0.0188], 84 | [-0.0460, 0.1522, 0.1381, ..., -0.0959, 0.2843, -0.0071]]], 85 | device='cuda:2', grad_fn=) 86 | 87 | ''' 88 | # import pdb; 89 | # pdb.set_trace() 90 | logit = self.lc(ht[-1]) 91 | return logit 92 | 93 | 94 | # inp = [torch.randn(1, 68*2) for _ in range(5)] 95 | # print(inp) 96 | # inp = torch.cat(inp) 97 | # print(inp) 98 | # model = LSTM_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1) 99 | # out = model(inp) 100 | # print(out) 101 | 102 | 103 | model = LSTM_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1) 104 | model = model.cuda() 105 | loss_function = torch.nn.BCEWithLogitsLoss() 106 | optimizer = optim.Adam(model.parameters(), lr=1e-4) 107 | # l2 = torch.nn.BCELoss() 108 | 109 | 110 | # for i in range(2): 111 | # model.zero_grad() 112 | # inp = [torch.randn(1, 68*2) for _ in range(5)] 113 | # inp = torch.cat(inp) 114 | # out = model(inp)[-1] # we could do a classifcation for every output (probably better) 115 | # print(out) 116 | # loss = loss_function(out,torch.Tensor(1)) 117 | # # loss = l2(nn.Sigmoid()(out), torch.Tensor(1)) 118 | # print(loss) 119 | # loss.backward() 120 | # optimizer.step() 121 | 122 | 123 | dataset_train = LandmarkList(root='/datasets/move_closer/Data_Landmark/', fileList='/datasets/move_closer/TrainList.txt') 124 | dataloader_train = data.DataLoader(dataset_train, batch_size=128, shuffle=True, num_workers=4, collate_fn=pad_collate) 125 | 126 | for i in range(MAX_EPOCH): 127 | for batch, labels, lengths in dataloader_train: 128 | model.zero_grad() 129 | out = model(batch.cuda(), lengths) # we could do a classifcation for every output (probably better) 130 | # import pdb; 131 | # pdb.set_trace() 132 | loss = loss_function(out, torch.FloatTensor(labels).unsqueeze(1).cuda()) 133 | # loss = l2(nn.Sigmoid()(out), labels) 134 | print(loss.data) 135 | loss.backward() 136 | optimizer.step() 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | # Demo to prove they are the same and padding doesn't feed the whole batch contineously! 145 | # for batch, labels, lengths in dataloader_train: 146 | # model.zero_grad() 147 | # out = model(batch.cuda(), lengths) # we could do a classifcation for every output (probably better) 148 | # import pdb; 149 | # pdb.set_trace() 150 | # 151 | # dataloader_train = data.DataLoader(dataset_train, batch_size=1, shuffle=False, num_workers=0, collate_fn=pad_collate) 152 | # for batch, labels, lengths in dataloader_train: 153 | # model.zero_grad() 154 | # out = model(batch.cuda(), lengths) # we could do a classifcation for every output (probably better) 155 | # import pdb; 156 | # pdb.set_trace() 157 | 158 | 159 | 160 | -------------------------------------------------------------------------------- /main_softmax.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.optim as optim 5 | from dataset import LandmarkList 6 | from torch.utils import data 7 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence 8 | import argparse 9 | 10 | 11 | # rnn = 'GRU' 12 | # rnn = 'embedGRU' 13 | rnn = 'biGRU' 14 | # rnn = 'LSTM' 15 | EMBEDDING_DIM = 68*2 16 | HIDDEN_DIM = 68*2* 2 17 | N_LAYERS_RNN = 3 18 | DROPOUT = 0.5 19 | MAX_EPOCH = 1000 20 | LR = 1e-4 21 | DEVICES = 1 22 | torch.cuda.set_device(DEVICES) 23 | SAVE_BEST_MODEL = True 24 | 25 | 26 | def compute_binary_accuracy(model, data_loader, loss_function): 27 | correct_pred, num_examples, total_loss = 0, 0, 0. 28 | model.eval() 29 | with torch.no_grad(): 30 | for batch, labels, lengths in data_loader: 31 | # import pdb; 32 | # pdb.set_trace() 33 | logits = model(batch.cuda(), lengths) 34 | total_loss += loss_function(logits, torch.LongTensor(labels).cuda()).item() 35 | # predicted_labels = (torch.sigmoid(logits) > 0.5).long() 36 | _, predicted_labels = torch.max(logits, 1) 37 | num_examples += len(lengths) 38 | correct_pred += (predicted_labels.cpu().long() == torch.LongTensor(labels)).sum() 39 | return correct_pred.float().item()/num_examples * 100, total_loss 40 | 41 | 42 | def pad_collate(batch): 43 | batch.sort(key=lambda x: x[2], reverse=True) 44 | lms, tgs, lens = zip(*batch) 45 | new_lms = torch.zeros((len(lms), lms[0].shape[0], lms[0].shape[1])) # batch x seq x feature(136) 46 | new_lms[0] = lms[0] 47 | for i in range(1, len(lms)): 48 | new_lms[i] = torch.cat((lms[i], torch.zeros((lens[0] - lens[i]),136)), 0) 49 | return new_lms, tgs, lens 50 | 51 | 52 | class LSTM_Classifier(nn.Module): 53 | 54 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False): 55 | super(LSTM_Classifier, self).__init__() 56 | self.hidden_dim = hidden_dim 57 | 58 | # The LSTM takes word embeddings as inputs, and outputs hidden states 59 | # with dimensionality hidden_dim. 60 | self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=N_LAYERS_RNN, dropout=DROPOUT) 61 | 62 | # The linear layer that maps from hidden state space to tag space 63 | self.lc = nn.Linear(hidden_dim, target_size) 64 | self.dropout = nn.Dropout(DROPOUT) 65 | 66 | def forward(self, landmarks, lengths): 67 | packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True) 68 | _, (ht, _) = self.lstm(packed_input) 69 | ht = self.dropout(ht[-1]) 70 | logit = self.lc(ht) 71 | return logit 72 | 73 | 74 | class embed_GRU_Classifier(nn.Module): 75 | 76 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False): 77 | super(embed_GRU_Classifier, self).__init__() 78 | self.hidden_dim = hidden_dim 79 | 80 | self.embed = nn.Linear(EMBEDDING_DIM, EMBEDDING_DIM, bias=False) 81 | # The LSTM takes word embeddings as inputs, and outputs hidden states 82 | # with dimensionality hidden_dim 83 | self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=N_LAYERS_RNN, bidirectional=bidirectional, dropout=DROPOUT) 84 | 85 | # The linear layer that maps from hidden state space to tag space 86 | self.lc1 = nn.Linear(hidden_dim,EMBEDDING_DIM) 87 | self.lc2 = nn.Linear(EMBEDDING_DIM, target_size) 88 | self.dropout = nn.Dropout(DROPOUT) 89 | 90 | def forward(self, landmarks, lengths): 91 | # import pdb; pdb.set_trace() 92 | landmarks = self.embed(landmarks) 93 | packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True) 94 | _, ht = self.gru(packed_input) 95 | # import pdb; pdb.set_trace() 96 | ht = self.dropout(ht[-1]) 97 | logit = self.lc2(F.tanh(self.lc1(ht))) 98 | return logit 99 | 100 | 101 | class GRU_Classifier(nn.Module): 102 | 103 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False): 104 | super(GRU_Classifier, self).__init__() 105 | self.hidden_dim = hidden_dim 106 | 107 | # The LSTM takes word embeddings as inputs, and outputs hidden states 108 | # with dimensionality hidden_dim 109 | self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=N_LAYERS_RNN, bidirectional=bidirectional, dropout=DROPOUT) 110 | 111 | # The linear layer that maps from hidden state space to tag space 112 | self.lc1 = nn.Linear(hidden_dim,target_size) 113 | # self.lc1 = nn.Linear(hidden_dim,EMBEDDING_DIM) 114 | # self.lc2 = nn.Linear(EMBEDDING_DIM, target_size) 115 | self.dropout = nn.Dropout(DROPOUT) 116 | 117 | def forward(self, landmarks, lengths): 118 | packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True) 119 | _, ht = self.gru(packed_input) 120 | # import pdb; pdb.set_trace() 121 | ht = self.dropout(ht[-1]) 122 | logit = self.lc1(ht) 123 | # logit = self.lc2(F.relu(self.lc1(ht))) 124 | return logit 125 | 126 | 127 | class biGRU_Classifier(nn.Module): 128 | 129 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=True): 130 | super(biGRU_Classifier, self).__init__() 131 | self.hidden_dim = hidden_dim 132 | 133 | # The LSTM takes word embeddings as inputs, and outputs hidden states 134 | # with dimensionality hidden_dim. 135 | self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=N_LAYERS_RNN, bidirectional=bidirectional, dropout=DROPOUT) 136 | 137 | # The linear layer that maps from hidden state space to tag space 138 | self.lc1 = nn.Linear(hidden_dim*2, hidden_dim) 139 | self.lc2 = nn.Linear(hidden_dim, target_size) 140 | self.dropout = nn.Dropout(DROPOUT) 141 | 142 | def forward(self, landmarks, lengths): 143 | packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True) 144 | _, ht = self.gru(packed_input) 145 | # import pdb; pdb.set_trace() 146 | ht = self.dropout(torch.cat((ht[-2,:,:], ht[-1,:,:]), dim=1)) 147 | logit = self.lc2(F.relu(self.lc1(ht))) 148 | return logit 149 | 150 | 151 | if rnn == 'embedGRU': 152 | model = embed_GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 2) 153 | if rnn == 'GRU': 154 | model = GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 2) 155 | if rnn == 'biGRU': 156 | model = biGRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 2) 157 | if rnn == 'LSTM': 158 | model = LSTM_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 2) 159 | model = model.cuda() 160 | loss_function = torch.nn.CrossEntropyLoss() 161 | loss_function_eval_sum = torch.nn.CrossEntropyLoss(reduction='sum') 162 | optimizer = optim.Adam(model.parameters(), lr=LR) 163 | # l2 = torch.nn.BCELoss() 164 | 165 | 166 | dataset_train = LandmarkList(root='/datasets/move_closer/Data_Landmark/', fileList='/datasets/move_closer/TrainList.txt') 167 | dataloader_train = data.DataLoader(dataset_train, batch_size=128, shuffle=True, num_workers=0, collate_fn=pad_collate) 168 | 169 | dataset_test = LandmarkList(root='/datasets/move_closer/Data_Landmark/', fileList='/datasets/move_closer/TestList.txt') 170 | dataloader_test = data.DataLoader(dataset_test, batch_size=64, shuffle=False, num_workers=1, collate_fn=pad_collate) 171 | 172 | best_test_acc = 0. 173 | for epoch in range(MAX_EPOCH): 174 | model.train() 175 | n_iter = 0 176 | for batch, labels, lengths in dataloader_train: 177 | model.zero_grad() 178 | out = model(batch.cuda(), lengths) # we could do a classifcation for every output (probably better) 179 | # import pdb; pdb.set_trace() 180 | loss = loss_function(out, torch.LongTensor(labels).cuda()) 181 | # loss = l2(nn.Sigmoid()(out), labels) 182 | loss.backward() 183 | optimizer.step() 184 | n_iter += 1 185 | train_acc, train_loss = compute_binary_accuracy(model, dataloader_train, loss_function_eval_sum) 186 | test_acc, test_loss = compute_binary_accuracy(model, dataloader_test, loss_function_eval_sum) 187 | print('Epoch{},train_acc,{:.2f}%,train_loss,{:.8f},valid_acc,{:.2f}%,valid_loss,{:.8f}'.format(epoch, train_acc, train_loss, test_acc, test_loss)) 188 | if test_acc > best_test_acc: 189 | best_test_acc = test_acc 190 | if SAVE_BEST_MODEL: 191 | torch.save(model.state_dict(), 'models/' + rnn + 192 | '_L' + str(N_LAYERS_RNN) + '.pt') 193 | print('best epoch {}, train_acc {}, test_acc {}'.format(epoch, train_acc, test_acc)) 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | # class LSTM_Classifier(nn.Module): 205 | # 206 | # def __init__(self, embedding_dim, hidden_dim, target_size=1): 207 | # super(LSTM_Classifier, self).__init__() 208 | # self.hidden_dim = hidden_dim 209 | # 210 | # # The LSTM takes word embeddings as inputs, and outputs hidden states 211 | # # with dimensionality hidden_dim. 212 | # self.lstm = nn.LSTM(embedding_dim, hidden_dim) 213 | # 214 | # # The linear layer that maps from hidden state space to tag space 215 | # self.lc = nn.Linear(hidden_dim, target_size) 216 | # 217 | # def forward(self, landmarks, lengths): 218 | # # pack_padded_sequence so that padded items in the sequence won't be shown to the LSTM 219 | # packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True) 220 | # _, (ht, _) = self.lstm(packed_input) 221 | # import pdb; 222 | # pdb.set_trace() 223 | # # packed_output, (ht, ct) = self.lstm(packed_input) # ht is the final output of each batch! ht (1, 4, 272) can be found in output[:,input_sizes-1,:] 224 | # # output, input_sizes = pad_packed_sequence(packed_output, batch_first=True) 225 | # ''' 226 | # (Pdb) output[:,input_sizes-1,:] 227 | # tensor([[[-0.0176, 0.1605, 0.1339, ..., -0.0914, 0.2951, -0.0065], 228 | # [-0.0225, 0.1589, 0.1340, ..., -0.0925, 0.2950, -0.0095], 229 | # [-0.0253, 0.1574, 0.1431, ..., -0.0865, 0.3022, -0.0119], 230 | # [-0.0303, 0.1515, 0.1422, ..., -0.1094, 0.2976, -0.0032]], 231 | # 232 | # [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 233 | # [-0.0165, 0.1666, 0.1344, ..., -0.0698, 0.2945, -0.0163], 234 | # [-0.0235, 0.1697, 0.1479, ..., -0.0657, 0.3001, -0.0195], 235 | # [-0.0235, 0.1734, 0.1515, ..., -0.0608, 0.3029, -0.0201]], 236 | # 237 | # [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 238 | # [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 239 | # [-0.0492, 0.1666, 0.1444, ..., -0.0749, 0.2816, -0.0188], 240 | # [-0.0490, 0.1542, 0.1449, ..., -0.0865, 0.2821, -0.0205]], 241 | # 242 | # [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 243 | # [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 244 | # [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 245 | # [-0.0460, 0.1522, 0.1381, ..., -0.0959, 0.2843, -0.0071]]], 246 | # device='cuda:2', grad_fn=) 247 | # (Pdb) ht.shape 248 | # torch.Size([1, 4, 272]) 249 | # (Pdb) ht 250 | # tensor([[[-0.0176, 0.1605, 0.1339, ..., -0.0914, 0.2951, -0.0065], 251 | # [-0.0165, 0.1666, 0.1344, ..., -0.0698, 0.2945, -0.0163], 252 | # [-0.0492, 0.1666, 0.1444, ..., -0.0749, 0.2816, -0.0188], 253 | # [-0.0460, 0.1522, 0.1381, ..., -0.0959, 0.2843, -0.0071]]], 254 | # device='cuda:2', grad_fn=) 255 | # 256 | # ''' 257 | # # import pdb; 258 | # # pdb.set_trace() 259 | # logit = self.lc(ht.squeeze(0)) 260 | # return logit -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence 5 | 6 | 7 | DROPOUT = 0.5 8 | 9 | 10 | class LSTM_Classifier(nn.Module): 11 | 12 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False, n_layer=1): 13 | super(LSTM_Classifier, self).__init__() 14 | self.hidden_dim = hidden_dim 15 | 16 | # The LSTM takes word embeddings as inputs, and outputs hidden states 17 | # with dimensionality hidden_dim. 18 | self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layer, dropout=DROPOUT) 19 | 20 | # The linear layer that maps from hidden state space to tag space 21 | self.lc = nn.Linear(hidden_dim, target_size) 22 | self.dropout = nn.Dropout(DROPOUT) 23 | 24 | def forward(self, landmarks, lengths): 25 | packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True) 26 | _, (ht, _) = self.lstm(packed_input) 27 | ht = self.dropout(ht[-1]) 28 | logit = self.lc(ht) 29 | return logit 30 | 31 | 32 | class embed_GRU_Classifier(nn.Module): 33 | 34 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False, n_layer=1): 35 | super(embed_GRU_Classifier, self).__init__() 36 | self.hidden_dim = hidden_dim 37 | 38 | self.embed1 = nn.Linear(embedding_dim, int(hidden_dim*2), bias=False) 39 | self.embed2 = nn.Linear(int(hidden_dim*2), hidden_dim, bias=False) 40 | # The LSTM takes word embeddings as inputs, and outputs hidden states 41 | # with dimensionality hidden_dim 42 | self.gru = nn.GRU(hidden_dim, hidden_dim, num_layers=n_layer, bidirectional=bidirectional, dropout=DROPOUT) 43 | 44 | # The linear layer that maps from hidden state space to tag space 45 | self.lc1 = nn.Linear(hidden_dim,int(hidden_dim/2)) 46 | self.lc2 = nn.Linear(int(hidden_dim/2), target_size) 47 | self.dropout = nn.Dropout(DROPOUT) 48 | 49 | # super(embed_GRU_Classifier, self).__init__() 50 | # self.hidden_dim = hidden_dim 51 | # 52 | # self.embed1 = nn.Linear(embedding_dim, int(embedding_dim/2), bias=False) 53 | # self.embed2 = nn.Linear(int(embedding_dim/2), int(embedding_dim/4), bias=False) 54 | # # The LSTM takes word embeddings as inputs, and outputs hidden states 55 | # # with dimensionality hidden_dim 56 | # self.gru = nn.GRU(int(embedding_dim/4), int(embedding_dim/4), num_layers=n_layer, bidirectional=bidirectional, dropout=DROPOUT) 57 | # 58 | # # The linear layer that maps from hidden state space to tag space 59 | # self.lc1 = nn.Linear(int(embedding_dim/4),int(embedding_dim/8)) 60 | # self.lc2 = nn.Linear(int(embedding_dim/8), target_size) 61 | # self.dropout = nn.Dropout(DROPOUT) 62 | 63 | def forward(self, landmarks, lengths): 64 | # import pdb; pdb.set_trace() 65 | landmarks = F.tanh(self.embed2(F.tanh(self.embed1(landmarks)))) 66 | packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True) 67 | _, ht = self.gru(packed_input) 68 | # import pdb; pdb.set_trace() 69 | ht = self.dropout(ht[-1]) 70 | logit = self.lc2(F.tanh(self.lc1(ht))) 71 | return logit 72 | 73 | 74 | class GRU_Classifier(nn.Module): 75 | 76 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False, n_layer=1): 77 | super(GRU_Classifier, self).__init__() 78 | self.hidden_dim = hidden_dim 79 | self.grad_clipping = 10. 80 | # The LSTM takes word embeddings as inputs, and outputs hidden states 81 | # with dimensionality hidden_dim 82 | self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=n_layer, bidirectional=bidirectional, dropout=DROPOUT) 83 | 84 | # The linear layer that maps from hidden state space to tag space 85 | self.lc1 = nn.Linear(hidden_dim,target_size) 86 | # self.lc1 = nn.Linear(hidden_dim,EMBEDDING_DIM) 87 | # self.lc2 = nn.Linear(EMBEDDING_DIM, target_size) 88 | self.dropout = nn.Dropout(DROPOUT) 89 | 90 | def forward(self, landmarks, lengths): 91 | packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True) 92 | _, ht = self.gru(packed_input) 93 | # import pdb; pdb.set_trace() 94 | if ht.requires_grad: 95 | ht.register_hook(lambda x: x.clamp(min=-self.grad_clipping, max=self.grad_clipping)) 96 | ht = self.dropout(ht[-1]) 97 | logit = self.lc1(ht) # probably a 1x1 conv is need to do linear transform 98 | # logit = self.lc2(F.relu(self.lc1(ht))) 99 | return logit 100 | 101 | 102 | class biGRU_Classifier(nn.Module): 103 | 104 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=True, n_layer=1): 105 | super(biGRU_Classifier, self).__init__() 106 | self.hidden_dim = hidden_dim 107 | self.grad_clipping = 10. 108 | # The LSTM takes word embeddings as inputs, and outputs hidden states 109 | # with dimensionality hidden_dim. 110 | self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=n_layer, bidirectional=bidirectional, dropout=DROPOUT) 111 | 112 | # The linear layer that maps from hidden state space to tag space 113 | self.lc1 = nn.Linear(hidden_dim*2, hidden_dim) 114 | self.lc2 = nn.Linear(hidden_dim, target_size) 115 | self.dropout = nn.Dropout(DROPOUT) 116 | 117 | def forward(self, landmarks, lengths): 118 | packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True) 119 | _, ht = self.gru(packed_input) 120 | if ht.requires_grad: 121 | ht.register_hook(lambda x: x.clamp(min=-self.grad_clipping, max=self.grad_clipping)) 122 | ht = self.dropout(torch.cat((ht[-2,:,:], ht[-1,:,:]), dim=1)) 123 | logit = self.lc2(F.relu(self.lc1(ht))) 124 | return logit 125 | 126 | 127 | class Framewise_GRU_Classifier(nn.Module): 128 | 129 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False, n_layer=1): 130 | super(Framewise_GRU_Classifier, self).__init__() 131 | self.hidden_dim = hidden_dim 132 | self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=n_layer, bidirectional=bidirectional, dropout=DROPOUT) 133 | 134 | # The linear layer that maps from hidden state space to tag space 135 | # self.lc1 = nn.Linear(hidden_dim, target_size) 136 | self.lc1 = nn.Linear(hidden_dim, embedding_dim) 137 | self.lc2 = nn.Linear(embedding_dim, target_size) 138 | self.dropout = nn.Dropout(DROPOUT) 139 | 140 | def forward(self, landmarks, lengths): 141 | packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True) 142 | packed_output, _ = self.gru(packed_input) 143 | output, _ = pad_packed_sequence(packed_output, batch_first=True) 144 | output = output.contiguous() 145 | output = output.view(-1, self.hidden_dim) 146 | output = self.dropout(output) 147 | logit = self.lc1(output) # probably a 1x1 conv is need to do linear transform 148 | logit = self.lc2(self.dropout(F.relu(logit))) 149 | return logit.view(len(lengths), -1, 1) 150 | 151 | 152 | class sumGRU(nn.Module): 153 | 154 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False, n_layer=1): 155 | super(sumGRU, self).__init__() 156 | self.hidden_dim = hidden_dim 157 | self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=n_layer, bidirectional=bidirectional, dropout=DROPOUT) 158 | 159 | # The linear layer that maps from hidden state space to tag space 160 | self.lc1 = nn.Linear(hidden_dim, embedding_dim) 161 | self.lc2 = nn.Linear(embedding_dim, target_size) 162 | self.dropout = nn.Dropout(DROPOUT) 163 | 164 | def forward(self, landmarks, lengths): 165 | packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True) 166 | packed_output, _ = self.gru(packed_input) 167 | output, _ = pad_packed_sequence(packed_output, batch_first=True) 168 | # import pdb; pdb.set_trace() 169 | output = self.dropout(output.sum(1)) 170 | # logit = self.lc1(output) # probably a 1x1 conv is need to do linear transform 171 | logit = self.lc2(F.relu(self.lc1(output))) 172 | return logit 173 | 174 | 175 | 176 | class cnn_2d(nn.Module): 177 | 178 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False): 179 | super(cnn_2d, self).__init__() 180 | self.hidden_dim = hidden_dim 181 | self.n_layers = 2 # 2, 4, 6 ,8 182 | if self.n_layers >= 2: 183 | self.conv1 = nn.Conv1d(in_channels=embedding_dim, out_channels=hidden_dim, kernel_size=3, padding=1) 184 | self.conv2 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1) 185 | self.bn1 = nn.BatchNorm1d(num_features=self.hidden_dim) 186 | self.bn2 = nn.BatchNorm1d(num_features=self.hidden_dim) 187 | self.p1 = nn.MaxPool1d(kernel_size=2) 188 | if self.n_layers >= 4: 189 | self.conv3 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1) 190 | self.conv4 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1) 191 | self.bn3 = nn.BatchNorm1d(num_features=self.hidden_dim) 192 | self.bn4 = nn.BatchNorm1d(num_features=self.hidden_dim) 193 | self.p2 = nn.MaxPool1d(kernel_size=2) 194 | if self.n_layers >= 6: 195 | self.conv5 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1) 196 | self.conv6 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1) 197 | self.bn5 = nn.BatchNorm1d(num_features=self.hidden_dim) 198 | self.bn6 = nn.BatchNorm1d(num_features=self.hidden_dim) 199 | self.p3 = nn.MaxPool1d(kernel_size=2) 200 | if self.n_layers == 8: 201 | self.conv7 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1) 202 | self.conv8 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1) 203 | self.bn7 = nn.BatchNorm1d(num_features=self.hidden_dim) 204 | self.bn8 = nn.BatchNorm1d(num_features=self.hidden_dim) 205 | 206 | self.glbAvgPool = nn.AdaptiveAvgPool1d(1) 207 | 208 | self.dropout = nn.Dropout(DROPOUT) 209 | # The linear layer that maps from hidden state space to tag space 210 | self.lc1 = nn.Linear(hidden_dim, int(hidden_dim*2)) 211 | self.lc2 = nn.Linear(int(hidden_dim*2), target_size) 212 | 213 | def forward(self, landmarks, lengths): 214 | landmarks = landmarks.permute(0, 2, 1) # (b, seq, dim) --> (b, dim, seq) 215 | # Convolve on Seq for each dim to get (b, dim, seq) 216 | if self.n_layers == 8: 217 | landmarks = F.relu(self.bn8(self.conv8(F.relu(self.bn7(self.conv7(self.p3(F.relu(self.bn6(self.conv6(F.relu(self.bn5(self.conv5(self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks))))))))))))))))))))))))))) 218 | elif self.n_layers == 6: 219 | landmarks = self.p3(F.relu(self.bn6(self.conv6(F.relu(self.bn5(self.conv5(self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks))))))))))))))))))))) 220 | elif self.n_layers == 4: 221 | landmarks = self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))))))))) 222 | elif self.n_layers == 2: 223 | landmarks = self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks))))))) 224 | else: 225 | print('Not specify n_layers') 226 | # Permute back: (b, dim, d_seq) --> (b, seq, dim) 227 | landmarks = landmarks.permute(0, 2, 1) 228 | # flat it to feed into fc: (b x seq, dim) 229 | landmarks = landmarks.contiguous() 230 | batch_size, seq_len, dim_feature = landmarks.shape 231 | landmarks = landmarks.view(-1, dim_feature) 232 | landmarks = F.tanh(self.lc1(self.dropout(landmarks))) # (b x seq, 1) 233 | landmarks = self.lc2(self.dropout(landmarks)) 234 | # unflat back to (b, seq, 1) 235 | landmarks = landmarks.view(batch_size, seq_len, 1) 236 | 237 | logit_list = [] 238 | if self.n_layers == 8 or self.n_layers == 6: 239 | for i, landmark in enumerate(landmarks): 240 | logit_list.append(self.glbAvgPool(landmark[:int(lengths[i]/8)].unsqueeze(0).permute(0, 2, 1)).squeeze(-1)) 241 | if self.n_layers == 4: 242 | for i, landmark in enumerate(landmarks): 243 | logit_list.append(self.glbAvgPool(landmark[:int(lengths[i]/4)].unsqueeze(0).permute(0, 2, 1)).squeeze(-1)) 244 | if self.n_layers == 2: 245 | for i, landmark in enumerate(landmarks): 246 | logit_list.append(self.glbAvgPool(landmark[:int(lengths[i]/2)].unsqueeze(0).permute(0, 2, 1)).squeeze(-1)) 247 | 248 | return torch.cat(logit_list) 249 | 250 | 251 | class cnn_Classifier(nn.Module): 252 | 253 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False): 254 | super(cnn_Classifier, self).__init__() 255 | self.hidden_dim = hidden_dim # can change to smaller ones 64 . 32. 16 256 | self.n_layers = 2 # 2, 4, 6 ,8 257 | self.use_bn = False 258 | if self.n_layers >= 2: 259 | self.conv1 = nn.Conv2d(in_channels=1, out_channels=self.hidden_dim, kernel_size=3, padding=1) 260 | self.conv2 = nn.Conv2d(in_channels=self.hidden_dim, out_channels=self.hidden_dim, kernel_size=3, padding=1) 261 | if self.use_bn: 262 | self.bn1 = nn.BatchNorm2d(num_features=self.hidden_dim) 263 | self.bn2 = nn.BatchNorm2d(num_features=self.hidden_dim) 264 | self.p1 = nn.MaxPool2d(kernel_size=2) 265 | if self.n_layers >= 4: 266 | self.conv3 = nn.Conv2d(in_channels=self.hidden_dim, out_channels=self.hidden_dim, kernel_size=3, padding=1) 267 | self.conv4 = nn.Conv2d(in_channels=self.hidden_dim, out_channels=self.hidden_dim, kernel_size=3, padding=1) 268 | if self.use_bn: 269 | self.bn3 = nn.BatchNorm2d(num_features=self.hidden_dim) 270 | self.bn4 = nn.BatchNorm2d(num_features=self.hidden_dim) 271 | self.p2 = nn.MaxPool2d(kernel_size=2) 272 | if self.n_layers >= 6: 273 | self.conv5 = nn.Conv2d(in_channels=self.hidden_dim, out_channels=self.hidden_dim, kernel_size=3, padding=1) 274 | self.conv6 = nn.Conv2d(in_channels=self.hidden_dim, out_channels=self.hidden_dim, kernel_size=3, padding=1) 275 | if self.use_bn: 276 | self.bn5 = nn.BatchNorm2d(num_features=self.hidden_dim) 277 | self.bn6 = nn.BatchNorm2d(num_features=self.hidden_dim) 278 | self.p3 = nn.MaxPool2d(kernel_size=2) 279 | if self.n_layers == 8: 280 | self.conv7 = nn.Conv2d(in_channels=self.hidden_dim, out_channels=self.hidden_dim, kernel_size=3, padding=1) 281 | self.conv8 = nn.Conv2d(in_channels=self.hidden_dim, out_channels=self.hidden_dim, kernel_size=3, padding=1) 282 | if self.use_bn: 283 | self.bn7 = nn.BatchNorm2d(num_features=self.hidden_dim) 284 | self.bn8 = nn.BatchNorm2d(num_features=self.hidden_dim) 285 | 286 | self.dropout = nn.Dropout(DROPOUT) 287 | # The linear layer that maps from hidden state space to tag space 288 | self.lc1 = nn.Linear(hidden_dim, int(hidden_dim*2)) 289 | self.lc2 = nn.Linear(int(hidden_dim*2), target_size) 290 | 291 | def forward(self, landmarks, lengths): 292 | landmarks = landmarks.permute(0, 2, 1) # (b, seq, dim) --> (b, dim, seq) 293 | # Convolve on Seq for each dim to get (b, dim, seq) 294 | if self.use_bn: 295 | if self.n_layers == 8: 296 | landmarks = F.relu(self.bn8(self.conv8(F.relu(self.bn7(self.conv7(self.p3(F.relu(self.bn6(self.conv6(F.relu(self.bn5(self.conv5(self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks))))))))))))))))))))))))))) 297 | elif self.n_layers == 6: 298 | landmarks = self.p3(F.relu(self.bn6(self.conv6(F.relu(self.bn5(self.conv5(self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks))))))))))))))))))))) 299 | elif self.n_layers == 4: 300 | landmarks = self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))))))))) 301 | elif self.n_layers == 2: 302 | landmarks = self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks))))))) 303 | else: 304 | print('Not specify n_layers') 305 | else: 306 | if self.n_layers == 8: 307 | landmarks = F.relu(self.conv8(F.relu(self.conv7(self.p3(F.relu(self.conv6(F.relu(self.conv5(self.p2(F.relu(self.conv4(F.relu(self.conv3(self.p1(F.relu(self.conv2(F.relu(self.conv1(landmarks))))))))))))))))))) 308 | elif self.n_layers == 6: 309 | landmarks = self.p3(F.relu(self.conv6(F.relu(self.conv5(self.p2(F.relu(self.conv4(F.relu(self.conv3(self.p1(F.relu(self.conv2(F.relu(self.conv1(landmarks))))))))))))))) 310 | elif self.n_layers == 4: 311 | landmarks = self.p2(F.relu(self.conv4(F.relu(self.conv3(self.p1(F.relu(self.conv2(F.relu(self.conv1(landmarks)))))))))) 312 | elif self.n_layers == 2: 313 | landmarks = self.p1(F.relu(self.conv2(F.relu(self.conv1(landmarks))))) 314 | else: 315 | print('Not specify n_layers') 316 | # Permute back: (b, dim, d_seq) --> (b, seq, dim) 317 | landmarks = landmarks.permute(0, 2, 1) 318 | # flat it to feed into fc: (b x seq, dim) 319 | landmarks = landmarks.contiguous() 320 | batch_size, seq_len, dim_feature = landmarks.shape 321 | landmarks = landmarks.view(-1, dim_feature) 322 | landmarks = F.tanh(self.lc1(self.dropout(landmarks))) # (b x seq, 1) 323 | landmarks = self.lc2(self.dropout(landmarks)) 324 | # unflat back to (b, seq, 1) 325 | landmarks = landmarks.view(batch_size, seq_len, 1) 326 | 327 | logit_list = [] 328 | if self.n_layers == 8 or self.n_layers == 6: 329 | for i, landmark in enumerate(landmarks): 330 | logit_list.append(self.glbAvgPool(landmark[:int(lengths[i]/8)].unsqueeze(0).permute(0, 2, 1)).squeeze(-1)) 331 | if self.n_layers == 4: 332 | for i, landmark in enumerate(landmarks): 333 | logit_list.append(self.glbAvgPool(landmark[:int(lengths[i]/4)].unsqueeze(0).permute(0, 2, 1)).squeeze(-1)) 334 | if self.n_layers == 2: 335 | for i, landmark in enumerate(landmarks): 336 | logit_list.append(self.glbAvgPool(landmark[:int(lengths[i]/2)].unsqueeze(0).permute(0, 2, 1)).squeeze(-1)) 337 | 338 | return torch.cat(logit_list) 339 | 340 | 341 | class crnn_Classifier(nn.Module): 342 | 343 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False, n_layer=1): 344 | super(crnn_Classifier, self).__init__() 345 | self.hidden_dim = hidden_dim 346 | self.n_layers = 4 # 2, 4, 6 ,8 347 | if self.n_layers >= 2: 348 | self.conv1 = nn.Conv1d(in_channels=embedding_dim, out_channels=hidden_dim, kernel_size=3, padding=1) 349 | self.conv2 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1) 350 | self.bn1 = nn.BatchNorm1d(num_features=self.hidden_dim) 351 | self.bn2 = nn.BatchNorm1d(num_features=self.hidden_dim) 352 | self.p1 = nn.MaxPool1d(kernel_size=2) 353 | self.scale_pool = 2 354 | if self.n_layers >= 4: 355 | self.conv3 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1) 356 | self.conv4 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1) 357 | self.bn3 = nn.BatchNorm1d(num_features=self.hidden_dim) 358 | self.bn4 = nn.BatchNorm1d(num_features=self.hidden_dim) 359 | self.p2 = nn.MaxPool1d(kernel_size=2) 360 | self.scale_pool = 4 361 | if self.n_layers >= 6: 362 | self.conv5 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1) 363 | self.conv6 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1) 364 | self.bn5 = nn.BatchNorm1d(num_features=self.hidden_dim) 365 | self.bn6 = nn.BatchNorm1d(num_features=self.hidden_dim) 366 | self.p3 = nn.MaxPool1d(kernel_size=2) 367 | self.scale_pool = 8 368 | if self.n_layers == 8: 369 | self.conv7 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1) 370 | self.conv8 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1) 371 | self.bn7 = nn.BatchNorm1d(num_features=self.hidden_dim) 372 | self.bn8 = nn.BatchNorm1d(num_features=self.hidden_dim) 373 | self.scale_pool = 8 374 | 375 | self.dropout = nn.Dropout(DROPOUT) 376 | self.gru = nn.GRU(hidden_dim, hidden_dim, num_layers=n_layer, bidirectional=bidirectional, dropout=DROPOUT) 377 | self.grad_clipping = 10. 378 | # The linear layer that maps from hidden state space to tag space 379 | self.lc1 = nn.Linear(hidden_dim, embedding_dim) 380 | self.lc2 = nn.Linear(embedding_dim, target_size) 381 | 382 | def forward(self, landmarks, lengths): 383 | landmarks = landmarks.permute(0, 2, 1) # (b, seq, dim) --> (b, dim, seq) 384 | # Convolve on Seq for each dim to get (b, dim, seq) 385 | if self.n_layers == 8: 386 | landmarks = F.relu(self.bn8(self.conv8(F.relu(self.bn7(self.conv7(self.p3(F.relu(self.bn6(self.conv6(F.relu(self.bn5(self.conv5(self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks))))))))))))))))))))))))))) 387 | elif self.n_layers == 6: 388 | landmarks = self.p3(F.relu(self.bn6(self.conv6(F.relu(self.bn5(self.conv5(self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks))))))))))))))))))))) 389 | elif self.n_layers == 4: 390 | landmarks = self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))))))))) 391 | elif self.n_layers == 2: 392 | landmarks = self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks))))))) 393 | else: 394 | print('Not specify n_layers') 395 | 396 | # Permute back: (b, dim, d_seq) --> (b, seq, dim) with shorter seq 397 | landmarks = landmarks.permute(0, 2, 1) 398 | # Feed into GRU 399 | # import pdb; pdb.set_trace() 400 | # packed_input = pack_padded_sequence(self.dropout(landmarks), torch.IntTensor(lengths)/self.scale_pool, batch_first=True) 401 | packed_input = pack_padded_sequence(self.dropout(landmarks), tuple(int(x/self.scale_pool) for x in lengths), batch_first=True) 402 | _, ht = self.gru(packed_input) 403 | if ht.requires_grad: 404 | ht.register_hook(lambda x: x.clamp(min=-self.grad_clipping, max=self.grad_clipping)) 405 | ht = self.dropout(ht[-1]) 406 | logit = F.relu(self.lc1(ht)) 407 | logit = self.lc2(self.dropout(logit)) 408 | return logit 409 | 410 | # to be implemented 411 | class FrameCRNN(nn.Module): 412 | 413 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False, n_layer=1): 414 | super(FrameCRNN, self).__init__() 415 | self.hidden_dim = hidden_dim 416 | self.n_layers = 2 # 2, 4, 6 ,8 417 | if self.n_layers >= 2: 418 | self.conv1 = nn.Conv1d(in_channels=embedding_dim, out_channels=hidden_dim, kernel_size=3, padding=1) 419 | self.conv2 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1) 420 | self.bn1 = nn.BatchNorm1d(num_features=self.hidden_dim) 421 | self.bn2 = nn.BatchNorm1d(num_features=self.hidden_dim) 422 | self.p1 = nn.MaxPool1d(kernel_size=2) 423 | self.scale_pool = 2 424 | if self.n_layers >= 4: 425 | self.conv3 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1) 426 | self.conv4 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1) 427 | self.bn3 = nn.BatchNorm1d(num_features=self.hidden_dim) 428 | self.bn4 = nn.BatchNorm1d(num_features=self.hidden_dim) 429 | self.p2 = nn.MaxPool1d(kernel_size=2) 430 | self.scale_pool = 4 431 | if self.n_layers >= 6: 432 | self.conv5 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1) 433 | self.conv6 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1) 434 | self.bn5 = nn.BatchNorm1d(num_features=self.hidden_dim) 435 | self.bn6 = nn.BatchNorm1d(num_features=self.hidden_dim) 436 | self.p3 = nn.MaxPool1d(kernel_size=2) 437 | self.scale_pool = 8 438 | if self.n_layers == 8: 439 | self.conv7 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1) 440 | self.conv8 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1) 441 | self.bn7 = nn.BatchNorm1d(num_features=self.hidden_dim) 442 | self.bn8 = nn.BatchNorm1d(num_features=self.hidden_dim) 443 | self.scale_pool = 8 444 | 445 | self.dropout = nn.Dropout(DROPOUT) 446 | self.gru = nn.GRU(hidden_dim, hidden_dim, num_layers=n_layer, bidirectional=bidirectional, dropout=DROPOUT) 447 | 448 | # The linear layer that maps from hidden state space to tag space 449 | self.lc1 = nn.Linear(hidden_dim, embedding_dim) 450 | self.lc2 = nn.Linear(embedding_dim, target_size) 451 | 452 | def forward(self, landmarks, lengths): 453 | landmarks = landmarks.permute(0, 2, 1) # (b, seq, dim) --> (b, dim, seq) 454 | # Convolve on Seq for each dim to get (b, dim, seq) 455 | if self.n_layers == 8: 456 | landmarks = F.relu(self.bn8(self.conv8(F.relu(self.bn7(self.conv7(self.p3(F.relu(self.bn6(self.conv6(F.relu(self.bn5(self.conv5(self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks))))))))))))))))))))))))))) 457 | elif self.n_layers == 6: 458 | landmarks = self.p3(F.relu(self.bn6(self.conv6(F.relu(self.bn5(self.conv5(self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks))))))))))))))))))))) 459 | elif self.n_layers == 4: 460 | landmarks = self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))))))))) 461 | elif self.n_layers == 2: 462 | landmarks = self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks))))))) 463 | else: 464 | print('Not specify n_layers') 465 | 466 | # Permute back: (b, dim, d_seq) --> (b, seq, dim) with shorter seq 467 | landmarks = landmarks.permute(0, 2, 1) 468 | # Feed into GRU 469 | packed_input = pack_padded_sequence(self.dropout(landmarks), torch.IntTensor(lengths)/self.scale_pool, batch_first=True) 470 | _, ht = self.gru(packed_input) 471 | ht = self.dropout(ht[-1]) 472 | logit = F.relu(self.lc1(ht)) 473 | logit = self.lc2(self.dropout(logit)) 474 | return logit 475 | 476 | 477 | 478 | 479 | 480 | # class LSTM_Classifier(nn.Module): 481 | # 482 | # def __init__(self, embedding_dim, hidden_dim, target_size=1): 483 | # super(LSTM_Classifier, self).__init__() 484 | # self.hidden_dim = hidden_dim 485 | # 486 | # # The LSTM takes word embeddings as inputs, and outputs hidden states 487 | # # with dimensionality hidden_dim. 488 | # self.lstm = nn.LSTM(embedding_dim, hidden_dim) 489 | # 490 | # # The linear layer that maps from hidden state space to tag space 491 | # self.lc = nn.Linear(hidden_dim, target_size) 492 | # 493 | # def forward(self, landmarks, lengths): 494 | # # pack_padded_sequence so that padded items in the sequence won't be shown to the LSTM 495 | # packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True) 496 | # _, (ht, _) = self.lstm(packed_input) 497 | # import pdb; 498 | # pdb.set_trace() 499 | # # packed_output, (ht, ct) = self.lstm(packed_input) # ht is the final output of each batch! ht (1, 4, 272) can be found in output[:,input_sizes-1,:] 500 | # # output, input_sizes = pad_packed_sequence(packed_output, batch_first=True) 501 | # ''' 502 | # (Pdb) output[:,input_sizes-1,:] 503 | # tensor([[[-0.0176, 0.1605, 0.1339, ..., -0.0914, 0.2951, -0.0065], 504 | # [-0.0225, 0.1589, 0.1340, ..., -0.0925, 0.2950, -0.0095], 505 | # [-0.0253, 0.1574, 0.1431, ..., -0.0865, 0.3022, -0.0119], 506 | # [-0.0303, 0.1515, 0.1422, ..., -0.1094, 0.2976, -0.0032]], 507 | # 508 | # [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 509 | # [-0.0165, 0.1666, 0.1344, ..., -0.0698, 0.2945, -0.0163], 510 | # [-0.0235, 0.1697, 0.1479, ..., -0.0657, 0.3001, -0.0195], 511 | # [-0.0235, 0.1734, 0.1515, ..., -0.0608, 0.3029, -0.0201]], 512 | # 513 | # [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 514 | # [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 515 | # [-0.0492, 0.1666, 0.1444, ..., -0.0749, 0.2816, -0.0188], 516 | # [-0.0490, 0.1542, 0.1449, ..., -0.0865, 0.2821, -0.0205]], 517 | # 518 | # [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 519 | # [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 520 | # [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 521 | # [-0.0460, 0.1522, 0.1381, ..., -0.0959, 0.2843, -0.0071]]], 522 | # device='cuda:2', grad_fn=) 523 | # (Pdb) ht.shape 524 | # torch.Size([1, 4, 272]) 525 | # (Pdb) ht 526 | # tensor([[[-0.0176, 0.1605, 0.1339, ..., -0.0914, 0.2951, -0.0065], 527 | # [-0.0165, 0.1666, 0.1344, ..., -0.0698, 0.2945, -0.0163], 528 | # [-0.0492, 0.1666, 0.1444, ..., -0.0749, 0.2816, -0.0188], 529 | # [-0.0460, 0.1522, 0.1381, ..., -0.0959, 0.2843, -0.0071]]], 530 | # device='cuda:2', grad_fn=) 531 | # 532 | # ''' 533 | # # import pdb; 534 | # # pdb.set_trace() 535 | # logit = self.lc(ht.squeeze(0)) 536 | # return logit -------------------------------------------------------------------------------- /plot_log.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | import os 4 | import matplotlib 5 | matplotlib.use('Agg') 6 | import matplotlib.pyplot as plt 7 | 8 | def main(argv): 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument( 12 | "log_file", 13 | help = "path to log file" 14 | ) 15 | args = parser.parse_args() 16 | 17 | if os.path.isdir(args.log_file): 18 | for root, _, files in os.walk(args.log_file): 19 | if len(files) > 0: 20 | for file in files: 21 | if file.find('txt') > 0: 22 | print(file) 23 | f = open(root+'/'+file) 24 | lines = [line.rstrip("\n") for line in f.readlines()] 25 | epochs = [] 26 | train_acc, valid_acc = [], [] 27 | train_loss, valid_loss = [], [] 28 | 29 | for line in lines: 30 | try: 31 | line_list = line.split(',') 32 | if line_list[0][:5] == 'Epoch': 33 | a = int(line_list[0][5:]) 34 | b = float(line_list[2][:-1]) 35 | c = float(line_list[4]) 36 | d = float(line_list[6][:-1]) 37 | e = float(line_list[8]) 38 | epochs.append(a) 39 | train_acc.append(b) 40 | train_loss.append(c) 41 | valid_acc.append(d) 42 | valid_loss.append(e) 43 | except: 44 | pass 45 | print('missing a few epoch') 46 | fig = plt.figure(figsize=(14, 10)) 47 | ax1 = fig.add_subplot(2, 1, 1) 48 | ax1.plot(epochs, train_acc, 'r', label='train_acc') 49 | ax1.plot(epochs, valid_acc, 'b', label='valid_acc') 50 | ax1.grid() 51 | ax1.title.set_text('Accuracy') 52 | ax1.set_xlabel('epochs') 53 | ax1.set_ylabel('accuracy %') 54 | ax1.legend() 55 | ax2 = fig.add_subplot(2, 1, 2) 56 | ax2.plot(epochs, train_loss, 'r', label='train_loss') 57 | ax2.plot(epochs, valid_loss, 'b', label='valid_loss') 58 | ax2.grid() 59 | ax2.title.set_text('Loss') 60 | ax2.set_xlabel('epochs') 61 | ax2.set_ylabel('loss') 62 | ax2.legend() 63 | # plt.show() 64 | plt.savefig(root + '/' + file[:-3] + 'png') 65 | plt.close() 66 | else: 67 | f = open(args.log_file) 68 | lines = [line.rstrip("\n") for line in f.readlines()] 69 | epochs = [] 70 | train_acc, valid_acc = [], [] 71 | train_loss, valid_loss = [], [] 72 | 73 | for line in lines: 74 | line_list = line.split(',') 75 | if line_list[0][:5] == 'Epoch': 76 | line_list = line.split(',') 77 | epochs.append(int(line_list[0][5:])) 78 | train_acc.append(float(line_list[2][:-1])) 79 | train_loss.append(float(line_list[4])) 80 | valid_acc.append(float(line_list[6][:-1])) 81 | valid_loss.append(float(line_list[8])) 82 | # import pdb; 83 | # pdb.set_trace() 84 | fig = plt.figure(figsize=(14, 10)) 85 | # import pdb; 86 | # pdb.set_trace() 87 | ax1 = fig.add_subplot(2, 1, 1) 88 | ax1.plot(epochs, train_acc, 'r', label='train_acc') 89 | ax1.plot(epochs, valid_acc, 'b', label='valid_acc') 90 | ax1.grid() 91 | ax1.title.set_text('Accuracy') 92 | ax1.set_xlabel('epochs') 93 | ax1.set_ylabel('accuracy %') 94 | ax1.legend() 95 | ax2 = fig.add_subplot(2, 1, 2) 96 | ax2.plot(epochs, train_loss, 'r', label='train_loss') 97 | ax2.plot(epochs, valid_loss, 'b', label='valid_loss') 98 | ax2.grid() 99 | ax2.title.set_text('Loss') 100 | ax2.set_xlabel('epochs') 101 | ax2.set_ylabel('loss') 102 | ax2.legend() 103 | # plt.show() 104 | plt.savefig(args.log_file[:-3]+'png') 105 | 106 | if __name__ == "__main__": 107 | main(sys.argv) -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.optim as optim 5 | from dataset import LandmarkList, LandmarkListTest 6 | from torch.utils import data 7 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence 8 | import argparse 9 | 10 | from model import * 11 | 12 | # rnn = 'frameGRU' 13 | # rnn = 'sumGRU' 14 | # rnn = 'crnn' 15 | # rnn = 'cnn' 16 | # rnn = 'GRU' 17 | # rnn = 'framewise_GRU' 18 | # rnn = 'embedGRU' 19 | rnn = 'biGRU' 20 | # rnn = 'LSTM' 21 | EMBEDDING_DIM = int(68 * 67 /2) 22 | HIDDEN_DIM = 128 23 | N_LAYERS_RNN = 3 24 | LR = 1e-4 25 | DEVICES = 0 26 | torch.cuda.set_device(DEVICES) 27 | 28 | 29 | def compute_binary_accuracy(model, data_loader, th_list): 30 | len_th_list = len(th_list) 31 | correct_pred, num_examples, FP, FN = [0.]*len_th_list, 0, [0]*len_th_list, [0]*len_th_list 32 | FP_list = [] 33 | FN_list = [] 34 | for _ in range(len_th_list): 35 | FP_list.append([]) 36 | FN_list.append([]) 37 | model.eval() 38 | with torch.no_grad(): 39 | if rnn == 'frameGRU': 40 | for batch, labels, lengths, f_names in data_loader: 41 | logits = model(batch.cuda(), lengths) 42 | out = torch.sigmoid(logits) 43 | new_out_list = [] 44 | for i in range(len(lengths)): 45 | new_out_list.append(out[i][:lengths[i]].mean(0, keepdim=True)) 46 | # import pdb; pdb.set_trace() 47 | out = torch.cat(new_out_list, 0) 48 | num_examples += len(lengths) 49 | for i, th in enumerate(th_list): 50 | predicted_labels = (out > th).long() 51 | if predicted_labels.squeeze(1).cpu().long() == torch.LongTensor(labels): 52 | correct_pred[i] += 1 53 | elif labels == 0: 54 | # print('FP: ', FP) 55 | FP[i] += 1 56 | FP_list[i].append(f_names[0] + '_' + str(labels.item()) + '_' + str( 57 | out.squeeze(1).cpu().item())) 58 | else: 59 | # print('FN: ', FN) 60 | FN[i] += 1 61 | FN_list[i].append(f_names[0] + '_' + str(labels.item()) + '_' + str( 62 | out.squeeze(1).cpu().item())) 63 | return [n_correct/num_examples * 100 for n_correct in correct_pred], FP, FN, FP_list, FN_list 64 | else: 65 | for batch, labels, lengths, f_names in data_loader: 66 | #import pdb; pdb.set_trace() 67 | logits = model(batch.cuda(), lengths) 68 | num_examples += len(lengths) 69 | for i, th in enumerate(th_list): 70 | predicted_labels = (torch.sigmoid(logits) > th).long() 71 | if predicted_labels.squeeze(1).cpu().long() == torch.LongTensor(labels): 72 | correct_pred[i] += 1 73 | elif labels == 0: 74 | FP[i] += 1 75 | FP_list[i].append(f_names[0]+'_'+str(labels.item())+'_'+str(torch.sigmoid(logits).squeeze(1).cpu().item())) 76 | else: 77 | FN[i] += 1 78 | FN_list[i].append(f_names[0]+'_'+str(labels.item())+'_'+str(torch.sigmoid(logits).squeeze(1).cpu().item())) 79 | return [n_correct/num_examples * 100 for n_correct in correct_pred], FP, FN, FP_list, FN_list 80 | 81 | 82 | 83 | if rnn == 'frameGRU': 84 | model = Framewise_GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN) 85 | if rnn == 'sumGRU': 86 | model = sumGRU(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN) 87 | if rnn == 'embedGRU': 88 | model = embed_GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN) 89 | model.load_state_dict(torch.load("models/" + str(rnn) + "_L" + str(N_LAYERS_RNN) + ".pt")) 90 | if rnn == 'GRU': 91 | model = GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN) 92 | model.load_state_dict(torch.load("models/" + str(rnn) + "_L" + str(N_LAYERS_RNN) + ".pt")) 93 | if rnn == 'biGRU': 94 | model = biGRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN) 95 | model.load_state_dict(torch.load("models/" + str(rnn) + "_L" + str(N_LAYERS_RNN) + ".pt")) 96 | if rnn == 'LSTM': 97 | model = LSTM_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN) 98 | if rnn == 'cnn': 99 | model = cnn_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1) 100 | if rnn == 'crnn': 101 | model = crnn_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN) 102 | # model.load_state_dict(torch.load("models/"+str(rnn)+".pt")) 103 | model = model.cuda() 104 | 105 | loss_function = torch.nn.BCEWithLogitsLoss() 106 | loss_function_eval_sum = torch.nn.BCEWithLogitsLoss(reduction='sum') 107 | optimizer = optim.Adam(model.parameters(), lr=LR) 108 | 109 | dataset_train = LandmarkListTest(root='/datasets/move_closer/Data_Distortion/', fileList='/datasets/move_closer/TrainList.txt') 110 | dataloader_train = data.DataLoader(dataset_train, batch_size=1, shuffle=False, num_workers=0) 111 | 112 | dataset_test = LandmarkListTest(root='/datasets/move_closer/Data_Distortion/', fileList='/datasets/move_closer/TestList.txt') 113 | dataloader_test = data.DataLoader(dataset_test, batch_size=1, shuffle=False, num_workers=0) 114 | 115 | # thresholds = [x * 0.01 for x in range(30, 71)] 116 | thresholds = [0.5] 117 | 118 | train_acc, train_fp, train_fn, train_fp_list, train_fn_list = compute_binary_accuracy(model, dataloader_train, thresholds) 119 | test_acc, test_fp, test_fn, test_fp_list, test_fn_list = compute_binary_accuracy(model, dataloader_test, thresholds) 120 | 121 | for i in range(0, len(thresholds)): 122 | print('\n\n-----------------Eval for threshold of {:.2f}-------------------\n\n'.format(thresholds[i])) 123 | print('train_acc,{:.2f}%,train_fp,{},train_fn,{}\nvalid_acc,{:.2f}%,valid_fp,{},valid_fn,{}\n' 124 | .format(train_acc[i], train_fp[i], train_fn[i], test_acc[i], test_fp[i], test_fn[i])) 125 | print('Train FP') 126 | for n in train_fp_list[i]: 127 | print(n) 128 | print('\nTrain FN') 129 | for n in train_fn_list[i]: 130 | print(n) 131 | 132 | print('\n\n\nTest FP') 133 | for n in test_fp_list[i]: 134 | print(n) 135 | print('\nTest FN') 136 | for n in test_fn_list[i]: 137 | print(n) 138 | 139 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.optim as optim 5 | from dataset import LandmarkList 6 | from torch.utils import data 7 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence 8 | import argparse 9 | 10 | from model import * 11 | 12 | # rnn = 'frameGRU' 13 | # to be implemented - rnn = 'frameCRNN' 14 | rnn = 'sumGRU' 15 | # rnn = 'crnn' 16 | # rnn = 'cnn' 17 | # rnn = 'GRU' 18 | # rnn = 'embedGRU' 19 | # rnn = 'biGRU' 20 | # rnn = 'LSTM' 21 | EMBEDDING_DIM = int(68 * 67 /2) 22 | HIDDEN_DIM = 128 23 | N_LAYERS_RNN = 1 24 | MAX_EPOCH = 30000 25 | LR = 1e-4 26 | DEVICES = 3 27 | SAVE_BEST_MODEL = True 28 | torch.cuda.set_device(DEVICES) 29 | 30 | 31 | def compute_binary_accuracy(model, data_loader, loss_function): 32 | correct_pred, num_examples, total_loss = 0, 0, 0. 33 | model.eval() 34 | with torch.no_grad(): 35 | if rnn == 'frameGRU' or rnn == 'frameCRNN': 36 | for batch, labels, lengths in data_loader: 37 | logits = model(batch.cuda(), lengths) 38 | out = torch.sigmoid(logits) 39 | # if rnn == 'frameGRU': 40 | # new_out_list = [] 41 | # new_labels_list = [] 42 | # for i in range(len(lengths)): 43 | # new_out_list.append(out[i][:lengths[i]].sum()) 44 | # out = torch.cat(new_out_list, 0) 45 | new_labels_list = [] 46 | new_logits_list = [] 47 | new_out_list = [] 48 | for i in range(len(lengths)): 49 | new_labels_list += [labels[i]] * lengths[i] 50 | new_logits_list.append(out[i][:lengths[i]]) 51 | new_out_list.append(out[i][:lengths[i]].mean(0, keepdim=True)) 52 | # import pdb; pdb.set_trace() 53 | logits_framewise = torch.cat(new_logits_list, 0) 54 | labels_framewise = new_labels_list 55 | out = torch.cat(new_out_list, 0) 56 | total_loss += loss_function(logits_framewise, torch.FloatTensor(labels_framewise).unsqueeze(1).cuda()).item() 57 | predicted_labels = (out > 0.5).long() 58 | num_examples += len(lengths) 59 | correct_pred += (predicted_labels.squeeze(1).cpu().long() == torch.LongTensor(labels)).sum() 60 | return correct_pred.float().item()/num_examples * 100, total_loss 61 | else: 62 | for batch, labels, lengths in data_loader: 63 | logits = model(batch.cuda(), lengths) 64 | total_loss += loss_function(logits, torch.FloatTensor(labels).unsqueeze(1).cuda()).item() 65 | predicted_labels = (torch.sigmoid(logits) > 0.5).long() 66 | num_examples += len(lengths) 67 | correct_pred += (predicted_labels.squeeze(1).cpu().long() == torch.LongTensor(labels)).sum() 68 | return correct_pred.float().item()/num_examples * 100, total_loss 69 | 70 | 71 | def pad_collate(batch): 72 | batch.sort(key=lambda x: x[2], reverse=True) 73 | lms, tgs, lens = zip(*batch) 74 | new_lms = torch.zeros((len(lms), lms[0].shape[0], lms[0].shape[1])) # batch x seq x feature(136) 75 | new_lms[0] = lms[0] 76 | for i in range(1, len(lms)): 77 | new_lms[i] = torch.cat((lms[i], torch.zeros((lens[0] - lens[i]),EMBEDDING_DIM)), 0) 78 | return new_lms, tgs, lens 79 | 80 | if rnn == 'frameGRU': 81 | model = Framewise_GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN) 82 | if rnn == 'frameCRNN': 83 | model = FrameCRNN(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN) 84 | if rnn == 'sumGRU': 85 | model = sumGRU(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN) 86 | if rnn == 'embedGRU': 87 | model = embed_GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN) 88 | if rnn == 'GRU': 89 | model = GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN) 90 | if rnn == 'biGRU': 91 | model = biGRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN) 92 | if rnn == 'LSTM': 93 | model = LSTM_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN) 94 | if rnn == 'cnn': 95 | model = cnn_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1) 96 | if rnn == 'crnn': 97 | model = crnn_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN) 98 | model = model.cuda() 99 | 100 | loss_function = torch.nn.BCEWithLogitsLoss() 101 | loss_function_eval_sum = torch.nn.BCEWithLogitsLoss(reduction='sum') 102 | optimizer = optim.Adam(model.parameters(), lr=LR) 103 | 104 | dataset_train = LandmarkList(root='/datasets/move_closer/Data_Distortion/', fileList='/datasets/move_closer/TrainList.txt') 105 | dataloader_train = data.DataLoader(dataset_train, batch_size=128, shuffle=True, num_workers=0, collate_fn=pad_collate) 106 | # if rnn == 'frameGRU': 107 | # dataloader_train = data.DataLoader(dataset_train, batch_size=8, shuffle=True, num_workers=2, 108 | # collate_fn=pad_collate) 109 | 110 | dataset_test = LandmarkList(root='/datasets/move_closer/Data_Distortion/', fileList='/datasets/move_closer/TestList.txt') 111 | dataloader_test = data.DataLoader(dataset_test, batch_size=64, shuffle=False, num_workers=0, collate_fn=pad_collate) 112 | 113 | best_test_acc = 0. 114 | for epoch in range(MAX_EPOCH): 115 | model.train() 116 | n_iter = 0 117 | for batch, labels, lengths in dataloader_train: 118 | model.zero_grad() 119 | out = model(batch.cuda(), lengths) # we could do a classifcation for every output (probably better) 120 | if rnn == 'frameGRU': 121 | new_labels_list = [] 122 | new_out_list = [] 123 | for i in range(len(lengths)): 124 | new_labels_list += [labels[i]] * lengths[i] 125 | new_out_list.append(out[i][:lengths[i]]) 126 | out = torch.cat(new_out_list, 0) 127 | labels = new_labels_list 128 | loss = loss_function(out, torch.FloatTensor(labels).unsqueeze(1).cuda()) 129 | loss.backward() 130 | optimizer.step() 131 | n_iter += 1 132 | train_acc, train_loss = compute_binary_accuracy(model, dataloader_train, loss_function_eval_sum) 133 | test_acc, test_loss = compute_binary_accuracy(model, dataloader_test, loss_function_eval_sum) 134 | print('Epoch{},train_acc,{:.2f}%,train_loss,{:.8f},valid_acc,{:.2f}%,valid_loss,{:.8f}'.format(epoch, train_acc, train_loss, test_acc, test_loss)) 135 | if test_acc > best_test_acc: 136 | best_test_acc = test_acc 137 | if SAVE_BEST_MODEL: 138 | torch.save(model.state_dict(), 'models/' + rnn + 139 | '_L' + str(N_LAYERS_RNN) + '.pt') 140 | print('best epoch {}, train_acc {}, test_acc {}'.format(epoch, train_acc, test_acc)) 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | # class LSTM_Classifier(nn.Module): 150 | # 151 | # def __init__(self, embedding_dim, hidden_dim, target_size=1): 152 | # super(LSTM_Classifier, self).__init__() 153 | # self.hidden_dim = hidden_dim 154 | # 155 | # # The LSTM takes word embeddings as inputs, and outputs hidden states 156 | # # with dimensionality hidden_dim. 157 | # self.lstm = nn.LSTM(embedding_dim, hidden_dim) 158 | # 159 | # # The linear layer that maps from hidden state space to tag space 160 | # self.lc = nn.Linear(hidden_dim, target_size) 161 | # 162 | # def forward(self, landmarks, lengths): 163 | # # pack_padded_sequence so that padded items in the sequence won't be shown to the LSTM 164 | # packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True) 165 | # _, (ht, _) = self.lstm(packed_input) 166 | # import pdb; 167 | # pdb.set_trace() 168 | # # packed_output, (ht, ct) = self.lstm(packed_input) # ht is the final output of each batch! ht (1, 4, 272) can be found in output[:,input_sizes-1,:] 169 | # # output, input_sizes = pad_packed_sequence(packed_output, batch_first=True) 170 | # ''' 171 | # (Pdb) output[:,input_sizes-1,:] 172 | # tensor([[[-0.0176, 0.1605, 0.1339, ..., -0.0914, 0.2951, -0.0065], 173 | # [-0.0225, 0.1589, 0.1340, ..., -0.0925, 0.2950, -0.0095], 174 | # [-0.0253, 0.1574, 0.1431, ..., -0.0865, 0.3022, -0.0119], 175 | # [-0.0303, 0.1515, 0.1422, ..., -0.1094, 0.2976, -0.0032]], 176 | # 177 | # [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 178 | # [-0.0165, 0.1666, 0.1344, ..., -0.0698, 0.2945, -0.0163], 179 | # [-0.0235, 0.1697, 0.1479, ..., -0.0657, 0.3001, -0.0195], 180 | # [-0.0235, 0.1734, 0.1515, ..., -0.0608, 0.3029, -0.0201]], 181 | # 182 | # [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 183 | # [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 184 | # [-0.0492, 0.1666, 0.1444, ..., -0.0749, 0.2816, -0.0188], 185 | # [-0.0490, 0.1542, 0.1449, ..., -0.0865, 0.2821, -0.0205]], 186 | # 187 | # [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 188 | # [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 189 | # [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 190 | # [-0.0460, 0.1522, 0.1381, ..., -0.0959, 0.2843, -0.0071]]], 191 | # device='cuda:2', grad_fn=) 192 | # (Pdb) ht.shape 193 | # torch.Size([1, 4, 272]) 194 | # (Pdb) ht 195 | # tensor([[[-0.0176, 0.1605, 0.1339, ..., -0.0914, 0.2951, -0.0065], 196 | # [-0.0165, 0.1666, 0.1344, ..., -0.0698, 0.2945, -0.0163], 197 | # [-0.0492, 0.1666, 0.1444, ..., -0.0749, 0.2816, -0.0188], 198 | # [-0.0460, 0.1522, 0.1381, ..., -0.0959, 0.2843, -0.0071]]], 199 | # device='cuda:2', grad_fn=) 200 | # 201 | # ''' 202 | # # import pdb; 203 | # # pdb.set_trace() 204 | # logit = self.lc(ht.squeeze(0)) 205 | # return logit -------------------------------------------------------------------------------- /train_2dcnn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.optim as optim 5 | from dataset import LandmarkList 6 | from torch.utils import data 7 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence 8 | import argparse 9 | 10 | from model import * 11 | 12 | 13 | rnn = '2dcnn' 14 | # rnn = 'frameGRU' 15 | # to be implemented - rnn = 'frameCRNN' 16 | # rnn = 'sumGRU' 17 | # rnn = 'crnn' 18 | # rnn = 'cnn' 19 | # rnn = 'GRU' 20 | # rnn = 'embedGRU' 21 | # rnn = 'biGRU' 22 | # rnn = 'LSTM' 23 | EMBEDDING_DIM = int(68 * 67 /2) 24 | HIDDEN_DIM = 128 25 | N_LAYERS_RNN = 3 26 | MAX_EPOCH = 1000 27 | LR = 1e-4 28 | DEVICES = 2 29 | SAVE_BEST_MODEL = True 30 | torch.cuda.set_device(DEVICES) 31 | 32 | 33 | def compute_binary_accuracy(model, data_loader, loss_function): 34 | correct_pred, num_examples, total_loss = 0, 0, 0. 35 | model.eval() 36 | with torch.no_grad(): 37 | if rnn == 'frameGRU' or rnn == 'frameCRNN': 38 | for batch, labels, lengths in data_loader: 39 | logits = model(batch.cuda(), lengths) 40 | out = torch.sigmoid(logits) 41 | # if rnn == 'frameGRU': 42 | # new_out_list = [] 43 | # new_labels_list = [] 44 | # for i in range(len(lengths)): 45 | # new_out_list.append(out[i][:lengths[i]].sum()) 46 | # out = torch.cat(new_out_list, 0) 47 | new_labels_list = [] 48 | new_logits_list = [] 49 | new_out_list = [] 50 | for i in range(len(lengths)): 51 | new_labels_list += [labels[i]] * lengths[i] 52 | new_logits_list.append(out[i][:lengths[i]]) 53 | new_out_list.append(out[i][:lengths[i]].mean(0, keepdim=True)) 54 | # import pdb; pdb.set_trace() 55 | logits_framewise = torch.cat(new_logits_list, 0) 56 | labels_framewise = new_labels_list 57 | out = torch.cat(new_out_list, 0) 58 | total_loss += loss_function(logits_framewise, torch.FloatTensor(labels_framewise).unsqueeze(1).cuda()).item() 59 | predicted_labels = (out > 0.5).long() 60 | num_examples += len(lengths) 61 | correct_pred += (predicted_labels.squeeze(1).cpu().long() == torch.LongTensor(labels)).sum() 62 | return correct_pred.float().item()/num_examples * 100, total_loss 63 | else: 64 | for batch, labels, lengths in data_loader: 65 | logits = model(batch.cuda(), lengths) 66 | total_loss += loss_function(logits, torch.FloatTensor(labels).unsqueeze(1).cuda()).item() 67 | predicted_labels = (torch.sigmoid(logits) > 0.5).long() 68 | num_examples += len(lengths) 69 | correct_pred += (predicted_labels.squeeze(1).cpu().long() == torch.LongTensor(labels)).sum() 70 | return correct_pred.float().item()/num_examples * 100, total_loss 71 | 72 | 73 | def pad_collate(batch): 74 | batch.sort(key=lambda x: x[2], reverse=True) 75 | lms, tgs, lens = zip(*batch) 76 | new_lms = torch.zeros((len(lms), lms[0].shape[0], lms[0].shape[1])) # batch x seq x feature(136) 77 | new_lms[0] = lms[0] 78 | for i in range(1, len(lms)): 79 | new_lms[i] = torch.cat((lms[i], torch.zeros((lens[0] - lens[i]),EMBEDDING_DIM)), 0) 80 | return new_lms, tgs, lens 81 | 82 | if rnn == '2dcnn': 83 | model = cnn_2d(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN) 84 | if rnn == 'frameGRU': 85 | model = Framewise_GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN) 86 | if rnn == 'frameCRNN': 87 | model = FrameCRNN(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN) 88 | if rnn == 'sumGRU': 89 | model = sumGRU(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN) 90 | if rnn == 'embedGRU': 91 | model = embed_GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN) 92 | if rnn == 'GRU': 93 | model = GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN) 94 | if rnn == 'biGRU': 95 | model = biGRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN) 96 | if rnn == 'LSTM': 97 | model = LSTM_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN) 98 | if rnn == 'cnn': 99 | model = cnn_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1) 100 | if rnn == 'crnn': 101 | model = crnn_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN) 102 | model = model.cuda() 103 | 104 | loss_function = torch.nn.BCEWithLogitsLoss() 105 | loss_function_eval_sum = torch.nn.BCEWithLogitsLoss(reduction='sum') 106 | optimizer = optim.Adam(model.parameters(), lr=LR) 107 | 108 | dataset_train = LandmarkList(root='/datasets/move_closer/Data_Distortion/', fileList='/datasets/move_closer/TrainList.txt') 109 | dataloader_train = data.DataLoader(dataset_train, batch_size=128, shuffle=True, num_workers=0, collate_fn=pad_collate) 110 | # if rnn == 'frameGRU': 111 | # dataloader_train = data.DataLoader(dataset_train, batch_size=8, shuffle=True, num_workers=2, 112 | # collate_fn=pad_collate) 113 | 114 | dataset_test = LandmarkList(root='/datasets/move_closer/Data_Distortion/', fileList='/datasets/move_closer/TestList.txt') 115 | dataloader_test = data.DataLoader(dataset_test, batch_size=64, shuffle=False, num_workers=0, collate_fn=pad_collate) 116 | 117 | best_test_acc = 0. 118 | for epoch in range(MAX_EPOCH): 119 | model.train() 120 | n_iter = 0 121 | for batch, labels, lengths in dataloader_train: 122 | model.zero_grad() 123 | out = model(batch.cuda(), lengths) # we could do a classifcation for every output (probably better) 124 | if rnn == 'frameGRU': 125 | new_labels_list = [] 126 | new_out_list = [] 127 | for i in range(len(lengths)): 128 | new_labels_list += [labels[i]] * lengths[i] 129 | new_out_list.append(out[i][:lengths[i]]) 130 | out = torch.cat(new_out_list, 0) 131 | labels = new_labels_list 132 | loss = loss_function(out, torch.FloatTensor(labels).unsqueeze(1).cuda()) 133 | loss.backward() 134 | optimizer.step() 135 | n_iter += 1 136 | train_acc, train_loss = compute_binary_accuracy(model, dataloader_train, loss_function_eval_sum) 137 | test_acc, test_loss = compute_binary_accuracy(model, dataloader_test, loss_function_eval_sum) 138 | print('Epoch{},train_acc,{:.2f}%,train_loss,{:.8f},valid_acc,{:.2f}%,valid_loss,{:.8f}'.format(epoch, train_acc, train_loss, test_acc, test_loss)) 139 | if test_acc > best_test_acc: 140 | best_test_acc = test_acc 141 | if SAVE_BEST_MODEL: 142 | torch.save(model.state_dict(), 'models/' + rnn + 143 | '_L' + str(N_LAYERS_RNN) + '.pt') 144 | print('best epoch {}, train_acc {}, test_acc {}'.format(epoch, train_acc, test_acc)) 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | # class LSTM_Classifier(nn.Module): 154 | # 155 | # def __init__(self, embedding_dim, hidden_dim, target_size=1): 156 | # super(LSTM_Classifier, self).__init__() 157 | # self.hidden_dim = hidden_dim 158 | # 159 | # # The LSTM takes word embeddings as inputs, and outputs hidden states 160 | # # with dimensionality hidden_dim. 161 | # self.lstm = nn.LSTM(embedding_dim, hidden_dim) 162 | # 163 | # # The linear layer that maps from hidden state space to tag space 164 | # self.lc = nn.Linear(hidden_dim, target_size) 165 | # 166 | # def forward(self, landmarks, lengths): 167 | # # pack_padded_sequence so that padded items in the sequence won't be shown to the LSTM 168 | # packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True) 169 | # _, (ht, _) = self.lstm(packed_input) 170 | # import pdb; 171 | # pdb.set_trace() 172 | # # packed_output, (ht, ct) = self.lstm(packed_input) # ht is the final output of each batch! ht (1, 4, 272) can be found in output[:,input_sizes-1,:] 173 | # # output, input_sizes = pad_packed_sequence(packed_output, batch_first=True) 174 | # ''' 175 | # (Pdb) output[:,input_sizes-1,:] 176 | # tensor([[[-0.0176, 0.1605, 0.1339, ..., -0.0914, 0.2951, -0.0065], 177 | # [-0.0225, 0.1589, 0.1340, ..., -0.0925, 0.2950, -0.0095], 178 | # [-0.0253, 0.1574, 0.1431, ..., -0.0865, 0.3022, -0.0119], 179 | # [-0.0303, 0.1515, 0.1422, ..., -0.1094, 0.2976, -0.0032]], 180 | # 181 | # [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 182 | # [-0.0165, 0.1666, 0.1344, ..., -0.0698, 0.2945, -0.0163], 183 | # [-0.0235, 0.1697, 0.1479, ..., -0.0657, 0.3001, -0.0195], 184 | # [-0.0235, 0.1734, 0.1515, ..., -0.0608, 0.3029, -0.0201]], 185 | # 186 | # [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 187 | # [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 188 | # [-0.0492, 0.1666, 0.1444, ..., -0.0749, 0.2816, -0.0188], 189 | # [-0.0490, 0.1542, 0.1449, ..., -0.0865, 0.2821, -0.0205]], 190 | # 191 | # [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 192 | # [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 193 | # [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], 194 | # [-0.0460, 0.1522, 0.1381, ..., -0.0959, 0.2843, -0.0071]]], 195 | # device='cuda:2', grad_fn=) 196 | # (Pdb) ht.shape 197 | # torch.Size([1, 4, 272]) 198 | # (Pdb) ht 199 | # tensor([[[-0.0176, 0.1605, 0.1339, ..., -0.0914, 0.2951, -0.0065], 200 | # [-0.0165, 0.1666, 0.1344, ..., -0.0698, 0.2945, -0.0163], 201 | # [-0.0492, 0.1666, 0.1444, ..., -0.0749, 0.2816, -0.0188], 202 | # [-0.0460, 0.1522, 0.1381, ..., -0.0959, 0.2843, -0.0071]]], 203 | # device='cuda:2', grad_fn=) 204 | # 205 | # ''' 206 | # # import pdb; 207 | # # pdb.set_trace() 208 | # logit = self.lc(ht.squeeze(0)) 209 | # return logit --------------------------------------------------------------------------------