├── .gitignore
├── .idea
├── .gitignore
├── inspectionProfiles
│ └── profiles_settings.xml
├── misc.xml
├── modules.xml
├── sequential_model_pytorch_rnn_1dcnn.iml
└── vcs.xml
├── README.md
├── dataset.py
├── dataset2.py
├── dataset_list.py
├── main2.py
├── main_softmax.py
├── model.py
├── plot_log.py
├── test.py
├── train.py
└── train_2dcnn.py
/.gitignore:
--------------------------------------------------------------------------------
1 | outputs*/
2 | eval/
3 | models/
4 | __pycache__/
5 |
--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Editor-based HTTP Client requests
5 | /httpRequests/
6 | # Datasource local storage ignored files
7 | /dataSources/
8 | /dataSources.local.xml
9 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/sequential_model_pytorch_rnn_1dcnn.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Sequential Model Pytorch
2 |
3 | The repo provides library supporting:
4 | - RNN, LSTM, GRU, bi-GRU, 1d-CNN, RCNN, etc.
5 | - Adaptable to variable length input sequence.
6 |
7 | ## Quick start
8 |
9 | Choose the type of neural networks in `train.py`
10 |
11 | To train:
12 |
13 | ```angular2html
14 | python train.py
15 | ```
16 |
17 | To test:
18 |
19 | ```angular2html
20 | python test.py
21 | ```
--------------------------------------------------------------------------------
/dataset.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch.utils.data as data
6 |
7 | import os
8 | import os.path
9 | import re
10 | import torch
11 | import pickle
12 |
13 |
14 | def default_loader(path):
15 | with open(path, 'rb') as fp:
16 | lm_list = pickle.load(fp)
17 | fp.close()
18 | return lm_list
19 |
20 | def default_list_reader(fileList):
21 | lmList = []
22 | with open(fileList, 'r') as file:
23 | for line in file.readlines():
24 | lmPath=line.strip()[:-2].strip()
25 | label=line.strip()[-1]
26 |
27 | lmList.append((lmPath, int(label)))
28 | return lmList
29 |
30 |
31 | class LandmarkList(data.Dataset):
32 | def __init__(self, root, fileList, transform=None, list_reader=default_list_reader, loader=default_loader):
33 | self.root = root
34 | self.lmList = list_reader(fileList)
35 | self.transform = transform
36 | self.loader = loader
37 |
38 | def __getitem__(self, index):
39 | lmPath, target = self.lmList[index]
40 | lm = self.loader(os.path.join(self.root, lmPath))
41 | if self.transform is not None:
42 | lm = self.transform(lm)
43 | return lm, target, lm.shape[0]
44 |
45 | def __len__(self):
46 | return len(self.lmList)
47 |
48 |
49 | class LandmarkListTest(data.Dataset):
50 | def __init__(self, root, fileList, transform=None, list_reader=default_list_reader, loader=default_loader):
51 | self.root = root
52 | self.lmList = list_reader(fileList)
53 | self.transform = transform
54 | self.loader = loader
55 |
56 | def __getitem__(self, index):
57 | lmPath, target = self.lmList[index]
58 | lm = self.loader(os.path.join(self.root, lmPath))
59 | if self.transform is not None:
60 | lm = self.transform(lm)
61 | return lm, target, lm.shape[0], lmPath
62 |
63 | def __len__(self):
64 | return len(self.lmList)
--------------------------------------------------------------------------------
/dataset2.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch.utils.data as data
6 |
7 | import os
8 | import os.path
9 | import re
10 | import torch
11 | import pickle
12 |
13 |
14 | def default_loader(path):
15 | with open(path, 'rb') as fp:
16 | lm_list = pickle.load(fp)
17 | fp.close()
18 | return lm_list
19 |
20 | def default_list_reader(fileList):
21 | lmList = []
22 | with open(fileList, 'r') as file:
23 | for line in file.readlines():
24 | lmPath=line.strip()[:-2].strip()
25 | label=line.strip()[-1]
26 |
27 | lmList.append((lmPath, int(label)))
28 | return lmList
29 |
30 |
31 | class LandmarkList(data.Dataset):
32 | def __init__(self, root, fileList, transform=None, list_reader=default_list_reader, loader=default_loader):
33 | self.root = root
34 | self.lmList = list_reader(fileList)
35 | self.transform = transform
36 | self.loader = loader
37 |
38 | def __getitem__(self, index):
39 | lmPath, target = self.lmList[index]
40 | lm = self.loader(os.path.join(self.root, lmPath))
41 | if self.transform is not None:
42 | lm = self.transform(lm)
43 | return lm, target, lm.shape[0]
44 |
45 | def __len__(self):
46 | return len(self.lmList)
--------------------------------------------------------------------------------
/dataset_list.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch.utils.data as data
6 |
7 | import os
8 | import os.path
9 | import re
10 | import torch
11 | import pickle
12 |
13 |
14 | def default_loader(path):
15 | with open(path, 'rb') as fp:
16 | lm_list = pickle.load(fp)
17 | fp.close()
18 | return lm_list
19 |
20 | def default_list_reader(fileList):
21 | lmList = []
22 | with open(fileList, 'r') as file:
23 | for line in file.readlines():
24 | lmPath=line.strip()[:-2].strip()
25 | label=line.strip()[-1]
26 |
27 | lmList.append((lmPath, int(label)))
28 | return lmList
29 |
30 |
31 | class LandmarkList(data.Dataset):
32 | def __init__(self, root, fileList, transform=None, list_reader=default_list_reader, loader=default_loader):
33 | self.root = root
34 | self.lmList = list_reader(fileList)
35 | self.transform = transform
36 | self.loader = loader
37 |
38 | def __getitem__(self, index):
39 | lmPath, target = self.lmList[index]
40 | lm = self.loader(os.path.join(self.root, lmPath))
41 | if self.transform is not None:
42 | lm = self.transform(lm)
43 | return lm, target, len(lm)
44 |
45 | def __len__(self):
46 | return len(self.lmList)
--------------------------------------------------------------------------------
/main2.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | import torch.optim as optim
5 | from dataset2 import LandmarkList
6 | from torch.utils import data
7 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
8 | import argparse
9 |
10 | # parser.add_argument('--root_path', type=str, default='/home/guosheng/Liveness/Code/FaceFlashing/Data/',
11 | # metavar='H',
12 | # help='Dir Head')
13 | # parser.add_argument('--trainFile', type=str, default='TrainList_4sources_13082019.txt', metavar='TRF', help='training file name')
14 |
15 |
16 | EMBEDDING_DIM = 68*2
17 | HIDDEN_DIM = 68*4
18 | MAX_EPOCH = 10
19 | DEVICES = 2
20 | torch.cuda.set_device(DEVICES)
21 |
22 |
23 | def pad_collate(batch):
24 | batch.sort(key=lambda x: x[2], reverse=True)
25 | lms, tgs, lens = zip(*batch)
26 | new_lms = torch.zeros((len(lms), lms[0].shape[0], lms[0].shape[1])) # batch x seq x feature(136)
27 | new_lms[0] = lms[0]
28 | for i in range(1, len(lms)):
29 | # import pdb;
30 | # pdb.set_trace()
31 | new_lms[i] = torch.cat((lms[i], torch.zeros((lens[0] - lens[i]),136)), 0)
32 | return new_lms, tgs, lens
33 |
34 |
35 | class LSTM_Classifier(nn.Module):
36 |
37 | def __init__(self, embedding_dim, hidden_dim, target_size=1):
38 | super(LSTM_Classifier, self).__init__()
39 | self.hidden_dim = hidden_dim
40 |
41 | # The LSTM takes word embeddings as inputs, and outputs hidden states
42 | # with dimensionality hidden_dim.
43 | self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=2)
44 |
45 | # The linear layer that maps from hidden state space to tag space
46 | self.lc = nn.Linear(hidden_dim, target_size)
47 |
48 | def forward(self, landmarks, lengths):
49 | # pack_padded_sequence so that padded items in the sequence won't be shown to the LSTM
50 | packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
51 | _, (ht, _) = self.lstm(packed_input)
52 | # import pdb;
53 | # pdb.set_trace()
54 | # packed_output, (ht, ct) = self.lstm(packed_input) # ht is the final output of each batch! ht (1, 4, 272) can be found in output[:,input_sizes-1,:]
55 | # output, input_sizes = pad_packed_sequence(packed_output, batch_first=True)
56 | '''
57 | (Pdb) output[:,input_sizes-1,:]
58 | tensor([[[-0.0176, 0.1605, 0.1339, ..., -0.0914, 0.2951, -0.0065],
59 | [-0.0225, 0.1589, 0.1340, ..., -0.0925, 0.2950, -0.0095],
60 | [-0.0253, 0.1574, 0.1431, ..., -0.0865, 0.3022, -0.0119],
61 | [-0.0303, 0.1515, 0.1422, ..., -0.1094, 0.2976, -0.0032]],
62 |
63 | [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
64 | [-0.0165, 0.1666, 0.1344, ..., -0.0698, 0.2945, -0.0163],
65 | [-0.0235, 0.1697, 0.1479, ..., -0.0657, 0.3001, -0.0195],
66 | [-0.0235, 0.1734, 0.1515, ..., -0.0608, 0.3029, -0.0201]],
67 |
68 | [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
69 | [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
70 | [-0.0492, 0.1666, 0.1444, ..., -0.0749, 0.2816, -0.0188],
71 | [-0.0490, 0.1542, 0.1449, ..., -0.0865, 0.2821, -0.0205]],
72 |
73 | [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
74 | [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
75 | [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
76 | [-0.0460, 0.1522, 0.1381, ..., -0.0959, 0.2843, -0.0071]]],
77 | device='cuda:2', grad_fn=)
78 | (Pdb) ht.shape
79 | torch.Size([1, 4, 272])
80 | (Pdb) ht
81 | tensor([[[-0.0176, 0.1605, 0.1339, ..., -0.0914, 0.2951, -0.0065],
82 | [-0.0165, 0.1666, 0.1344, ..., -0.0698, 0.2945, -0.0163],
83 | [-0.0492, 0.1666, 0.1444, ..., -0.0749, 0.2816, -0.0188],
84 | [-0.0460, 0.1522, 0.1381, ..., -0.0959, 0.2843, -0.0071]]],
85 | device='cuda:2', grad_fn=)
86 |
87 | '''
88 | # import pdb;
89 | # pdb.set_trace()
90 | logit = self.lc(ht[-1])
91 | return logit
92 |
93 |
94 | # inp = [torch.randn(1, 68*2) for _ in range(5)]
95 | # print(inp)
96 | # inp = torch.cat(inp)
97 | # print(inp)
98 | # model = LSTM_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1)
99 | # out = model(inp)
100 | # print(out)
101 |
102 |
103 | model = LSTM_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1)
104 | model = model.cuda()
105 | loss_function = torch.nn.BCEWithLogitsLoss()
106 | optimizer = optim.Adam(model.parameters(), lr=1e-4)
107 | # l2 = torch.nn.BCELoss()
108 |
109 |
110 | # for i in range(2):
111 | # model.zero_grad()
112 | # inp = [torch.randn(1, 68*2) for _ in range(5)]
113 | # inp = torch.cat(inp)
114 | # out = model(inp)[-1] # we could do a classifcation for every output (probably better)
115 | # print(out)
116 | # loss = loss_function(out,torch.Tensor(1))
117 | # # loss = l2(nn.Sigmoid()(out), torch.Tensor(1))
118 | # print(loss)
119 | # loss.backward()
120 | # optimizer.step()
121 |
122 |
123 | dataset_train = LandmarkList(root='/datasets/move_closer/Data_Landmark/', fileList='/datasets/move_closer/TrainList.txt')
124 | dataloader_train = data.DataLoader(dataset_train, batch_size=128, shuffle=True, num_workers=4, collate_fn=pad_collate)
125 |
126 | for i in range(MAX_EPOCH):
127 | for batch, labels, lengths in dataloader_train:
128 | model.zero_grad()
129 | out = model(batch.cuda(), lengths) # we could do a classifcation for every output (probably better)
130 | # import pdb;
131 | # pdb.set_trace()
132 | loss = loss_function(out, torch.FloatTensor(labels).unsqueeze(1).cuda())
133 | # loss = l2(nn.Sigmoid()(out), labels)
134 | print(loss.data)
135 | loss.backward()
136 | optimizer.step()
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 | # Demo to prove they are the same and padding doesn't feed the whole batch contineously!
145 | # for batch, labels, lengths in dataloader_train:
146 | # model.zero_grad()
147 | # out = model(batch.cuda(), lengths) # we could do a classifcation for every output (probably better)
148 | # import pdb;
149 | # pdb.set_trace()
150 | #
151 | # dataloader_train = data.DataLoader(dataset_train, batch_size=1, shuffle=False, num_workers=0, collate_fn=pad_collate)
152 | # for batch, labels, lengths in dataloader_train:
153 | # model.zero_grad()
154 | # out = model(batch.cuda(), lengths) # we could do a classifcation for every output (probably better)
155 | # import pdb;
156 | # pdb.set_trace()
157 |
158 |
159 |
160 |
--------------------------------------------------------------------------------
/main_softmax.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | import torch.optim as optim
5 | from dataset import LandmarkList
6 | from torch.utils import data
7 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
8 | import argparse
9 |
10 |
11 | # rnn = 'GRU'
12 | # rnn = 'embedGRU'
13 | rnn = 'biGRU'
14 | # rnn = 'LSTM'
15 | EMBEDDING_DIM = 68*2
16 | HIDDEN_DIM = 68*2* 2
17 | N_LAYERS_RNN = 3
18 | DROPOUT = 0.5
19 | MAX_EPOCH = 1000
20 | LR = 1e-4
21 | DEVICES = 1
22 | torch.cuda.set_device(DEVICES)
23 | SAVE_BEST_MODEL = True
24 |
25 |
26 | def compute_binary_accuracy(model, data_loader, loss_function):
27 | correct_pred, num_examples, total_loss = 0, 0, 0.
28 | model.eval()
29 | with torch.no_grad():
30 | for batch, labels, lengths in data_loader:
31 | # import pdb;
32 | # pdb.set_trace()
33 | logits = model(batch.cuda(), lengths)
34 | total_loss += loss_function(logits, torch.LongTensor(labels).cuda()).item()
35 | # predicted_labels = (torch.sigmoid(logits) > 0.5).long()
36 | _, predicted_labels = torch.max(logits, 1)
37 | num_examples += len(lengths)
38 | correct_pred += (predicted_labels.cpu().long() == torch.LongTensor(labels)).sum()
39 | return correct_pred.float().item()/num_examples * 100, total_loss
40 |
41 |
42 | def pad_collate(batch):
43 | batch.sort(key=lambda x: x[2], reverse=True)
44 | lms, tgs, lens = zip(*batch)
45 | new_lms = torch.zeros((len(lms), lms[0].shape[0], lms[0].shape[1])) # batch x seq x feature(136)
46 | new_lms[0] = lms[0]
47 | for i in range(1, len(lms)):
48 | new_lms[i] = torch.cat((lms[i], torch.zeros((lens[0] - lens[i]),136)), 0)
49 | return new_lms, tgs, lens
50 |
51 |
52 | class LSTM_Classifier(nn.Module):
53 |
54 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False):
55 | super(LSTM_Classifier, self).__init__()
56 | self.hidden_dim = hidden_dim
57 |
58 | # The LSTM takes word embeddings as inputs, and outputs hidden states
59 | # with dimensionality hidden_dim.
60 | self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=N_LAYERS_RNN, dropout=DROPOUT)
61 |
62 | # The linear layer that maps from hidden state space to tag space
63 | self.lc = nn.Linear(hidden_dim, target_size)
64 | self.dropout = nn.Dropout(DROPOUT)
65 |
66 | def forward(self, landmarks, lengths):
67 | packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
68 | _, (ht, _) = self.lstm(packed_input)
69 | ht = self.dropout(ht[-1])
70 | logit = self.lc(ht)
71 | return logit
72 |
73 |
74 | class embed_GRU_Classifier(nn.Module):
75 |
76 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False):
77 | super(embed_GRU_Classifier, self).__init__()
78 | self.hidden_dim = hidden_dim
79 |
80 | self.embed = nn.Linear(EMBEDDING_DIM, EMBEDDING_DIM, bias=False)
81 | # The LSTM takes word embeddings as inputs, and outputs hidden states
82 | # with dimensionality hidden_dim
83 | self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=N_LAYERS_RNN, bidirectional=bidirectional, dropout=DROPOUT)
84 |
85 | # The linear layer that maps from hidden state space to tag space
86 | self.lc1 = nn.Linear(hidden_dim,EMBEDDING_DIM)
87 | self.lc2 = nn.Linear(EMBEDDING_DIM, target_size)
88 | self.dropout = nn.Dropout(DROPOUT)
89 |
90 | def forward(self, landmarks, lengths):
91 | # import pdb; pdb.set_trace()
92 | landmarks = self.embed(landmarks)
93 | packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
94 | _, ht = self.gru(packed_input)
95 | # import pdb; pdb.set_trace()
96 | ht = self.dropout(ht[-1])
97 | logit = self.lc2(F.tanh(self.lc1(ht)))
98 | return logit
99 |
100 |
101 | class GRU_Classifier(nn.Module):
102 |
103 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False):
104 | super(GRU_Classifier, self).__init__()
105 | self.hidden_dim = hidden_dim
106 |
107 | # The LSTM takes word embeddings as inputs, and outputs hidden states
108 | # with dimensionality hidden_dim
109 | self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=N_LAYERS_RNN, bidirectional=bidirectional, dropout=DROPOUT)
110 |
111 | # The linear layer that maps from hidden state space to tag space
112 | self.lc1 = nn.Linear(hidden_dim,target_size)
113 | # self.lc1 = nn.Linear(hidden_dim,EMBEDDING_DIM)
114 | # self.lc2 = nn.Linear(EMBEDDING_DIM, target_size)
115 | self.dropout = nn.Dropout(DROPOUT)
116 |
117 | def forward(self, landmarks, lengths):
118 | packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
119 | _, ht = self.gru(packed_input)
120 | # import pdb; pdb.set_trace()
121 | ht = self.dropout(ht[-1])
122 | logit = self.lc1(ht)
123 | # logit = self.lc2(F.relu(self.lc1(ht)))
124 | return logit
125 |
126 |
127 | class biGRU_Classifier(nn.Module):
128 |
129 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=True):
130 | super(biGRU_Classifier, self).__init__()
131 | self.hidden_dim = hidden_dim
132 |
133 | # The LSTM takes word embeddings as inputs, and outputs hidden states
134 | # with dimensionality hidden_dim.
135 | self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=N_LAYERS_RNN, bidirectional=bidirectional, dropout=DROPOUT)
136 |
137 | # The linear layer that maps from hidden state space to tag space
138 | self.lc1 = nn.Linear(hidden_dim*2, hidden_dim)
139 | self.lc2 = nn.Linear(hidden_dim, target_size)
140 | self.dropout = nn.Dropout(DROPOUT)
141 |
142 | def forward(self, landmarks, lengths):
143 | packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
144 | _, ht = self.gru(packed_input)
145 | # import pdb; pdb.set_trace()
146 | ht = self.dropout(torch.cat((ht[-2,:,:], ht[-1,:,:]), dim=1))
147 | logit = self.lc2(F.relu(self.lc1(ht)))
148 | return logit
149 |
150 |
151 | if rnn == 'embedGRU':
152 | model = embed_GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 2)
153 | if rnn == 'GRU':
154 | model = GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 2)
155 | if rnn == 'biGRU':
156 | model = biGRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 2)
157 | if rnn == 'LSTM':
158 | model = LSTM_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 2)
159 | model = model.cuda()
160 | loss_function = torch.nn.CrossEntropyLoss()
161 | loss_function_eval_sum = torch.nn.CrossEntropyLoss(reduction='sum')
162 | optimizer = optim.Adam(model.parameters(), lr=LR)
163 | # l2 = torch.nn.BCELoss()
164 |
165 |
166 | dataset_train = LandmarkList(root='/datasets/move_closer/Data_Landmark/', fileList='/datasets/move_closer/TrainList.txt')
167 | dataloader_train = data.DataLoader(dataset_train, batch_size=128, shuffle=True, num_workers=0, collate_fn=pad_collate)
168 |
169 | dataset_test = LandmarkList(root='/datasets/move_closer/Data_Landmark/', fileList='/datasets/move_closer/TestList.txt')
170 | dataloader_test = data.DataLoader(dataset_test, batch_size=64, shuffle=False, num_workers=1, collate_fn=pad_collate)
171 |
172 | best_test_acc = 0.
173 | for epoch in range(MAX_EPOCH):
174 | model.train()
175 | n_iter = 0
176 | for batch, labels, lengths in dataloader_train:
177 | model.zero_grad()
178 | out = model(batch.cuda(), lengths) # we could do a classifcation for every output (probably better)
179 | # import pdb; pdb.set_trace()
180 | loss = loss_function(out, torch.LongTensor(labels).cuda())
181 | # loss = l2(nn.Sigmoid()(out), labels)
182 | loss.backward()
183 | optimizer.step()
184 | n_iter += 1
185 | train_acc, train_loss = compute_binary_accuracy(model, dataloader_train, loss_function_eval_sum)
186 | test_acc, test_loss = compute_binary_accuracy(model, dataloader_test, loss_function_eval_sum)
187 | print('Epoch{},train_acc,{:.2f}%,train_loss,{:.8f},valid_acc,{:.2f}%,valid_loss,{:.8f}'.format(epoch, train_acc, train_loss, test_acc, test_loss))
188 | if test_acc > best_test_acc:
189 | best_test_acc = test_acc
190 | if SAVE_BEST_MODEL:
191 | torch.save(model.state_dict(), 'models/' + rnn +
192 | '_L' + str(N_LAYERS_RNN) + '.pt')
193 | print('best epoch {}, train_acc {}, test_acc {}'.format(epoch, train_acc, test_acc))
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 | # class LSTM_Classifier(nn.Module):
205 | #
206 | # def __init__(self, embedding_dim, hidden_dim, target_size=1):
207 | # super(LSTM_Classifier, self).__init__()
208 | # self.hidden_dim = hidden_dim
209 | #
210 | # # The LSTM takes word embeddings as inputs, and outputs hidden states
211 | # # with dimensionality hidden_dim.
212 | # self.lstm = nn.LSTM(embedding_dim, hidden_dim)
213 | #
214 | # # The linear layer that maps from hidden state space to tag space
215 | # self.lc = nn.Linear(hidden_dim, target_size)
216 | #
217 | # def forward(self, landmarks, lengths):
218 | # # pack_padded_sequence so that padded items in the sequence won't be shown to the LSTM
219 | # packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
220 | # _, (ht, _) = self.lstm(packed_input)
221 | # import pdb;
222 | # pdb.set_trace()
223 | # # packed_output, (ht, ct) = self.lstm(packed_input) # ht is the final output of each batch! ht (1, 4, 272) can be found in output[:,input_sizes-1,:]
224 | # # output, input_sizes = pad_packed_sequence(packed_output, batch_first=True)
225 | # '''
226 | # (Pdb) output[:,input_sizes-1,:]
227 | # tensor([[[-0.0176, 0.1605, 0.1339, ..., -0.0914, 0.2951, -0.0065],
228 | # [-0.0225, 0.1589, 0.1340, ..., -0.0925, 0.2950, -0.0095],
229 | # [-0.0253, 0.1574, 0.1431, ..., -0.0865, 0.3022, -0.0119],
230 | # [-0.0303, 0.1515, 0.1422, ..., -0.1094, 0.2976, -0.0032]],
231 | #
232 | # [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
233 | # [-0.0165, 0.1666, 0.1344, ..., -0.0698, 0.2945, -0.0163],
234 | # [-0.0235, 0.1697, 0.1479, ..., -0.0657, 0.3001, -0.0195],
235 | # [-0.0235, 0.1734, 0.1515, ..., -0.0608, 0.3029, -0.0201]],
236 | #
237 | # [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
238 | # [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
239 | # [-0.0492, 0.1666, 0.1444, ..., -0.0749, 0.2816, -0.0188],
240 | # [-0.0490, 0.1542, 0.1449, ..., -0.0865, 0.2821, -0.0205]],
241 | #
242 | # [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
243 | # [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
244 | # [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
245 | # [-0.0460, 0.1522, 0.1381, ..., -0.0959, 0.2843, -0.0071]]],
246 | # device='cuda:2', grad_fn=)
247 | # (Pdb) ht.shape
248 | # torch.Size([1, 4, 272])
249 | # (Pdb) ht
250 | # tensor([[[-0.0176, 0.1605, 0.1339, ..., -0.0914, 0.2951, -0.0065],
251 | # [-0.0165, 0.1666, 0.1344, ..., -0.0698, 0.2945, -0.0163],
252 | # [-0.0492, 0.1666, 0.1444, ..., -0.0749, 0.2816, -0.0188],
253 | # [-0.0460, 0.1522, 0.1381, ..., -0.0959, 0.2843, -0.0071]]],
254 | # device='cuda:2', grad_fn=)
255 | #
256 | # '''
257 | # # import pdb;
258 | # # pdb.set_trace()
259 | # logit = self.lc(ht.squeeze(0))
260 | # return logit
--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
5 |
6 |
7 | DROPOUT = 0.5
8 |
9 |
10 | class LSTM_Classifier(nn.Module):
11 |
12 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False, n_layer=1):
13 | super(LSTM_Classifier, self).__init__()
14 | self.hidden_dim = hidden_dim
15 |
16 | # The LSTM takes word embeddings as inputs, and outputs hidden states
17 | # with dimensionality hidden_dim.
18 | self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layer, dropout=DROPOUT)
19 |
20 | # The linear layer that maps from hidden state space to tag space
21 | self.lc = nn.Linear(hidden_dim, target_size)
22 | self.dropout = nn.Dropout(DROPOUT)
23 |
24 | def forward(self, landmarks, lengths):
25 | packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
26 | _, (ht, _) = self.lstm(packed_input)
27 | ht = self.dropout(ht[-1])
28 | logit = self.lc(ht)
29 | return logit
30 |
31 |
32 | class embed_GRU_Classifier(nn.Module):
33 |
34 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False, n_layer=1):
35 | super(embed_GRU_Classifier, self).__init__()
36 | self.hidden_dim = hidden_dim
37 |
38 | self.embed1 = nn.Linear(embedding_dim, int(hidden_dim*2), bias=False)
39 | self.embed2 = nn.Linear(int(hidden_dim*2), hidden_dim, bias=False)
40 | # The LSTM takes word embeddings as inputs, and outputs hidden states
41 | # with dimensionality hidden_dim
42 | self.gru = nn.GRU(hidden_dim, hidden_dim, num_layers=n_layer, bidirectional=bidirectional, dropout=DROPOUT)
43 |
44 | # The linear layer that maps from hidden state space to tag space
45 | self.lc1 = nn.Linear(hidden_dim,int(hidden_dim/2))
46 | self.lc2 = nn.Linear(int(hidden_dim/2), target_size)
47 | self.dropout = nn.Dropout(DROPOUT)
48 |
49 | # super(embed_GRU_Classifier, self).__init__()
50 | # self.hidden_dim = hidden_dim
51 | #
52 | # self.embed1 = nn.Linear(embedding_dim, int(embedding_dim/2), bias=False)
53 | # self.embed2 = nn.Linear(int(embedding_dim/2), int(embedding_dim/4), bias=False)
54 | # # The LSTM takes word embeddings as inputs, and outputs hidden states
55 | # # with dimensionality hidden_dim
56 | # self.gru = nn.GRU(int(embedding_dim/4), int(embedding_dim/4), num_layers=n_layer, bidirectional=bidirectional, dropout=DROPOUT)
57 | #
58 | # # The linear layer that maps from hidden state space to tag space
59 | # self.lc1 = nn.Linear(int(embedding_dim/4),int(embedding_dim/8))
60 | # self.lc2 = nn.Linear(int(embedding_dim/8), target_size)
61 | # self.dropout = nn.Dropout(DROPOUT)
62 |
63 | def forward(self, landmarks, lengths):
64 | # import pdb; pdb.set_trace()
65 | landmarks = F.tanh(self.embed2(F.tanh(self.embed1(landmarks))))
66 | packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
67 | _, ht = self.gru(packed_input)
68 | # import pdb; pdb.set_trace()
69 | ht = self.dropout(ht[-1])
70 | logit = self.lc2(F.tanh(self.lc1(ht)))
71 | return logit
72 |
73 |
74 | class GRU_Classifier(nn.Module):
75 |
76 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False, n_layer=1):
77 | super(GRU_Classifier, self).__init__()
78 | self.hidden_dim = hidden_dim
79 | self.grad_clipping = 10.
80 | # The LSTM takes word embeddings as inputs, and outputs hidden states
81 | # with dimensionality hidden_dim
82 | self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=n_layer, bidirectional=bidirectional, dropout=DROPOUT)
83 |
84 | # The linear layer that maps from hidden state space to tag space
85 | self.lc1 = nn.Linear(hidden_dim,target_size)
86 | # self.lc1 = nn.Linear(hidden_dim,EMBEDDING_DIM)
87 | # self.lc2 = nn.Linear(EMBEDDING_DIM, target_size)
88 | self.dropout = nn.Dropout(DROPOUT)
89 |
90 | def forward(self, landmarks, lengths):
91 | packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
92 | _, ht = self.gru(packed_input)
93 | # import pdb; pdb.set_trace()
94 | if ht.requires_grad:
95 | ht.register_hook(lambda x: x.clamp(min=-self.grad_clipping, max=self.grad_clipping))
96 | ht = self.dropout(ht[-1])
97 | logit = self.lc1(ht) # probably a 1x1 conv is need to do linear transform
98 | # logit = self.lc2(F.relu(self.lc1(ht)))
99 | return logit
100 |
101 |
102 | class biGRU_Classifier(nn.Module):
103 |
104 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=True, n_layer=1):
105 | super(biGRU_Classifier, self).__init__()
106 | self.hidden_dim = hidden_dim
107 | self.grad_clipping = 10.
108 | # The LSTM takes word embeddings as inputs, and outputs hidden states
109 | # with dimensionality hidden_dim.
110 | self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=n_layer, bidirectional=bidirectional, dropout=DROPOUT)
111 |
112 | # The linear layer that maps from hidden state space to tag space
113 | self.lc1 = nn.Linear(hidden_dim*2, hidden_dim)
114 | self.lc2 = nn.Linear(hidden_dim, target_size)
115 | self.dropout = nn.Dropout(DROPOUT)
116 |
117 | def forward(self, landmarks, lengths):
118 | packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
119 | _, ht = self.gru(packed_input)
120 | if ht.requires_grad:
121 | ht.register_hook(lambda x: x.clamp(min=-self.grad_clipping, max=self.grad_clipping))
122 | ht = self.dropout(torch.cat((ht[-2,:,:], ht[-1,:,:]), dim=1))
123 | logit = self.lc2(F.relu(self.lc1(ht)))
124 | return logit
125 |
126 |
127 | class Framewise_GRU_Classifier(nn.Module):
128 |
129 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False, n_layer=1):
130 | super(Framewise_GRU_Classifier, self).__init__()
131 | self.hidden_dim = hidden_dim
132 | self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=n_layer, bidirectional=bidirectional, dropout=DROPOUT)
133 |
134 | # The linear layer that maps from hidden state space to tag space
135 | # self.lc1 = nn.Linear(hidden_dim, target_size)
136 | self.lc1 = nn.Linear(hidden_dim, embedding_dim)
137 | self.lc2 = nn.Linear(embedding_dim, target_size)
138 | self.dropout = nn.Dropout(DROPOUT)
139 |
140 | def forward(self, landmarks, lengths):
141 | packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
142 | packed_output, _ = self.gru(packed_input)
143 | output, _ = pad_packed_sequence(packed_output, batch_first=True)
144 | output = output.contiguous()
145 | output = output.view(-1, self.hidden_dim)
146 | output = self.dropout(output)
147 | logit = self.lc1(output) # probably a 1x1 conv is need to do linear transform
148 | logit = self.lc2(self.dropout(F.relu(logit)))
149 | return logit.view(len(lengths), -1, 1)
150 |
151 |
152 | class sumGRU(nn.Module):
153 |
154 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False, n_layer=1):
155 | super(sumGRU, self).__init__()
156 | self.hidden_dim = hidden_dim
157 | self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=n_layer, bidirectional=bidirectional, dropout=DROPOUT)
158 |
159 | # The linear layer that maps from hidden state space to tag space
160 | self.lc1 = nn.Linear(hidden_dim, embedding_dim)
161 | self.lc2 = nn.Linear(embedding_dim, target_size)
162 | self.dropout = nn.Dropout(DROPOUT)
163 |
164 | def forward(self, landmarks, lengths):
165 | packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
166 | packed_output, _ = self.gru(packed_input)
167 | output, _ = pad_packed_sequence(packed_output, batch_first=True)
168 | # import pdb; pdb.set_trace()
169 | output = self.dropout(output.sum(1))
170 | # logit = self.lc1(output) # probably a 1x1 conv is need to do linear transform
171 | logit = self.lc2(F.relu(self.lc1(output)))
172 | return logit
173 |
174 |
175 |
176 | class cnn_2d(nn.Module):
177 |
178 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False):
179 | super(cnn_2d, self).__init__()
180 | self.hidden_dim = hidden_dim
181 | self.n_layers = 2 # 2, 4, 6 ,8
182 | if self.n_layers >= 2:
183 | self.conv1 = nn.Conv1d(in_channels=embedding_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
184 | self.conv2 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
185 | self.bn1 = nn.BatchNorm1d(num_features=self.hidden_dim)
186 | self.bn2 = nn.BatchNorm1d(num_features=self.hidden_dim)
187 | self.p1 = nn.MaxPool1d(kernel_size=2)
188 | if self.n_layers >= 4:
189 | self.conv3 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
190 | self.conv4 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
191 | self.bn3 = nn.BatchNorm1d(num_features=self.hidden_dim)
192 | self.bn4 = nn.BatchNorm1d(num_features=self.hidden_dim)
193 | self.p2 = nn.MaxPool1d(kernel_size=2)
194 | if self.n_layers >= 6:
195 | self.conv5 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
196 | self.conv6 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
197 | self.bn5 = nn.BatchNorm1d(num_features=self.hidden_dim)
198 | self.bn6 = nn.BatchNorm1d(num_features=self.hidden_dim)
199 | self.p3 = nn.MaxPool1d(kernel_size=2)
200 | if self.n_layers == 8:
201 | self.conv7 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
202 | self.conv8 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
203 | self.bn7 = nn.BatchNorm1d(num_features=self.hidden_dim)
204 | self.bn8 = nn.BatchNorm1d(num_features=self.hidden_dim)
205 |
206 | self.glbAvgPool = nn.AdaptiveAvgPool1d(1)
207 |
208 | self.dropout = nn.Dropout(DROPOUT)
209 | # The linear layer that maps from hidden state space to tag space
210 | self.lc1 = nn.Linear(hidden_dim, int(hidden_dim*2))
211 | self.lc2 = nn.Linear(int(hidden_dim*2), target_size)
212 |
213 | def forward(self, landmarks, lengths):
214 | landmarks = landmarks.permute(0, 2, 1) # (b, seq, dim) --> (b, dim, seq)
215 | # Convolve on Seq for each dim to get (b, dim, seq)
216 | if self.n_layers == 8:
217 | landmarks = F.relu(self.bn8(self.conv8(F.relu(self.bn7(self.conv7(self.p3(F.relu(self.bn6(self.conv6(F.relu(self.bn5(self.conv5(self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))))))))))))))))))))))
218 | elif self.n_layers == 6:
219 | landmarks = self.p3(F.relu(self.bn6(self.conv6(F.relu(self.bn5(self.conv5(self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))))))))))))))))
220 | elif self.n_layers == 4:
221 | landmarks = self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks))))))))))))))
222 | elif self.n_layers == 2:
223 | landmarks = self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))
224 | else:
225 | print('Not specify n_layers')
226 | # Permute back: (b, dim, d_seq) --> (b, seq, dim)
227 | landmarks = landmarks.permute(0, 2, 1)
228 | # flat it to feed into fc: (b x seq, dim)
229 | landmarks = landmarks.contiguous()
230 | batch_size, seq_len, dim_feature = landmarks.shape
231 | landmarks = landmarks.view(-1, dim_feature)
232 | landmarks = F.tanh(self.lc1(self.dropout(landmarks))) # (b x seq, 1)
233 | landmarks = self.lc2(self.dropout(landmarks))
234 | # unflat back to (b, seq, 1)
235 | landmarks = landmarks.view(batch_size, seq_len, 1)
236 |
237 | logit_list = []
238 | if self.n_layers == 8 or self.n_layers == 6:
239 | for i, landmark in enumerate(landmarks):
240 | logit_list.append(self.glbAvgPool(landmark[:int(lengths[i]/8)].unsqueeze(0).permute(0, 2, 1)).squeeze(-1))
241 | if self.n_layers == 4:
242 | for i, landmark in enumerate(landmarks):
243 | logit_list.append(self.glbAvgPool(landmark[:int(lengths[i]/4)].unsqueeze(0).permute(0, 2, 1)).squeeze(-1))
244 | if self.n_layers == 2:
245 | for i, landmark in enumerate(landmarks):
246 | logit_list.append(self.glbAvgPool(landmark[:int(lengths[i]/2)].unsqueeze(0).permute(0, 2, 1)).squeeze(-1))
247 |
248 | return torch.cat(logit_list)
249 |
250 |
251 | class cnn_Classifier(nn.Module):
252 |
253 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False):
254 | super(cnn_Classifier, self).__init__()
255 | self.hidden_dim = hidden_dim # can change to smaller ones 64 . 32. 16
256 | self.n_layers = 2 # 2, 4, 6 ,8
257 | self.use_bn = False
258 | if self.n_layers >= 2:
259 | self.conv1 = nn.Conv2d(in_channels=1, out_channels=self.hidden_dim, kernel_size=3, padding=1)
260 | self.conv2 = nn.Conv2d(in_channels=self.hidden_dim, out_channels=self.hidden_dim, kernel_size=3, padding=1)
261 | if self.use_bn:
262 | self.bn1 = nn.BatchNorm2d(num_features=self.hidden_dim)
263 | self.bn2 = nn.BatchNorm2d(num_features=self.hidden_dim)
264 | self.p1 = nn.MaxPool2d(kernel_size=2)
265 | if self.n_layers >= 4:
266 | self.conv3 = nn.Conv2d(in_channels=self.hidden_dim, out_channels=self.hidden_dim, kernel_size=3, padding=1)
267 | self.conv4 = nn.Conv2d(in_channels=self.hidden_dim, out_channels=self.hidden_dim, kernel_size=3, padding=1)
268 | if self.use_bn:
269 | self.bn3 = nn.BatchNorm2d(num_features=self.hidden_dim)
270 | self.bn4 = nn.BatchNorm2d(num_features=self.hidden_dim)
271 | self.p2 = nn.MaxPool2d(kernel_size=2)
272 | if self.n_layers >= 6:
273 | self.conv5 = nn.Conv2d(in_channels=self.hidden_dim, out_channels=self.hidden_dim, kernel_size=3, padding=1)
274 | self.conv6 = nn.Conv2d(in_channels=self.hidden_dim, out_channels=self.hidden_dim, kernel_size=3, padding=1)
275 | if self.use_bn:
276 | self.bn5 = nn.BatchNorm2d(num_features=self.hidden_dim)
277 | self.bn6 = nn.BatchNorm2d(num_features=self.hidden_dim)
278 | self.p3 = nn.MaxPool2d(kernel_size=2)
279 | if self.n_layers == 8:
280 | self.conv7 = nn.Conv2d(in_channels=self.hidden_dim, out_channels=self.hidden_dim, kernel_size=3, padding=1)
281 | self.conv8 = nn.Conv2d(in_channels=self.hidden_dim, out_channels=self.hidden_dim, kernel_size=3, padding=1)
282 | if self.use_bn:
283 | self.bn7 = nn.BatchNorm2d(num_features=self.hidden_dim)
284 | self.bn8 = nn.BatchNorm2d(num_features=self.hidden_dim)
285 |
286 | self.dropout = nn.Dropout(DROPOUT)
287 | # The linear layer that maps from hidden state space to tag space
288 | self.lc1 = nn.Linear(hidden_dim, int(hidden_dim*2))
289 | self.lc2 = nn.Linear(int(hidden_dim*2), target_size)
290 |
291 | def forward(self, landmarks, lengths):
292 | landmarks = landmarks.permute(0, 2, 1) # (b, seq, dim) --> (b, dim, seq)
293 | # Convolve on Seq for each dim to get (b, dim, seq)
294 | if self.use_bn:
295 | if self.n_layers == 8:
296 | landmarks = F.relu(self.bn8(self.conv8(F.relu(self.bn7(self.conv7(self.p3(F.relu(self.bn6(self.conv6(F.relu(self.bn5(self.conv5(self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))))))))))))))))))))))
297 | elif self.n_layers == 6:
298 | landmarks = self.p3(F.relu(self.bn6(self.conv6(F.relu(self.bn5(self.conv5(self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))))))))))))))))
299 | elif self.n_layers == 4:
300 | landmarks = self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks))))))))))))))
301 | elif self.n_layers == 2:
302 | landmarks = self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))
303 | else:
304 | print('Not specify n_layers')
305 | else:
306 | if self.n_layers == 8:
307 | landmarks = F.relu(self.conv8(F.relu(self.conv7(self.p3(F.relu(self.conv6(F.relu(self.conv5(self.p2(F.relu(self.conv4(F.relu(self.conv3(self.p1(F.relu(self.conv2(F.relu(self.conv1(landmarks)))))))))))))))))))
308 | elif self.n_layers == 6:
309 | landmarks = self.p3(F.relu(self.conv6(F.relu(self.conv5(self.p2(F.relu(self.conv4(F.relu(self.conv3(self.p1(F.relu(self.conv2(F.relu(self.conv1(landmarks)))))))))))))))
310 | elif self.n_layers == 4:
311 | landmarks = self.p2(F.relu(self.conv4(F.relu(self.conv3(self.p1(F.relu(self.conv2(F.relu(self.conv1(landmarks))))))))))
312 | elif self.n_layers == 2:
313 | landmarks = self.p1(F.relu(self.conv2(F.relu(self.conv1(landmarks)))))
314 | else:
315 | print('Not specify n_layers')
316 | # Permute back: (b, dim, d_seq) --> (b, seq, dim)
317 | landmarks = landmarks.permute(0, 2, 1)
318 | # flat it to feed into fc: (b x seq, dim)
319 | landmarks = landmarks.contiguous()
320 | batch_size, seq_len, dim_feature = landmarks.shape
321 | landmarks = landmarks.view(-1, dim_feature)
322 | landmarks = F.tanh(self.lc1(self.dropout(landmarks))) # (b x seq, 1)
323 | landmarks = self.lc2(self.dropout(landmarks))
324 | # unflat back to (b, seq, 1)
325 | landmarks = landmarks.view(batch_size, seq_len, 1)
326 |
327 | logit_list = []
328 | if self.n_layers == 8 or self.n_layers == 6:
329 | for i, landmark in enumerate(landmarks):
330 | logit_list.append(self.glbAvgPool(landmark[:int(lengths[i]/8)].unsqueeze(0).permute(0, 2, 1)).squeeze(-1))
331 | if self.n_layers == 4:
332 | for i, landmark in enumerate(landmarks):
333 | logit_list.append(self.glbAvgPool(landmark[:int(lengths[i]/4)].unsqueeze(0).permute(0, 2, 1)).squeeze(-1))
334 | if self.n_layers == 2:
335 | for i, landmark in enumerate(landmarks):
336 | logit_list.append(self.glbAvgPool(landmark[:int(lengths[i]/2)].unsqueeze(0).permute(0, 2, 1)).squeeze(-1))
337 |
338 | return torch.cat(logit_list)
339 |
340 |
341 | class crnn_Classifier(nn.Module):
342 |
343 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False, n_layer=1):
344 | super(crnn_Classifier, self).__init__()
345 | self.hidden_dim = hidden_dim
346 | self.n_layers = 4 # 2, 4, 6 ,8
347 | if self.n_layers >= 2:
348 | self.conv1 = nn.Conv1d(in_channels=embedding_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
349 | self.conv2 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
350 | self.bn1 = nn.BatchNorm1d(num_features=self.hidden_dim)
351 | self.bn2 = nn.BatchNorm1d(num_features=self.hidden_dim)
352 | self.p1 = nn.MaxPool1d(kernel_size=2)
353 | self.scale_pool = 2
354 | if self.n_layers >= 4:
355 | self.conv3 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
356 | self.conv4 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
357 | self.bn3 = nn.BatchNorm1d(num_features=self.hidden_dim)
358 | self.bn4 = nn.BatchNorm1d(num_features=self.hidden_dim)
359 | self.p2 = nn.MaxPool1d(kernel_size=2)
360 | self.scale_pool = 4
361 | if self.n_layers >= 6:
362 | self.conv5 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
363 | self.conv6 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
364 | self.bn5 = nn.BatchNorm1d(num_features=self.hidden_dim)
365 | self.bn6 = nn.BatchNorm1d(num_features=self.hidden_dim)
366 | self.p3 = nn.MaxPool1d(kernel_size=2)
367 | self.scale_pool = 8
368 | if self.n_layers == 8:
369 | self.conv7 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
370 | self.conv8 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
371 | self.bn7 = nn.BatchNorm1d(num_features=self.hidden_dim)
372 | self.bn8 = nn.BatchNorm1d(num_features=self.hidden_dim)
373 | self.scale_pool = 8
374 |
375 | self.dropout = nn.Dropout(DROPOUT)
376 | self.gru = nn.GRU(hidden_dim, hidden_dim, num_layers=n_layer, bidirectional=bidirectional, dropout=DROPOUT)
377 | self.grad_clipping = 10.
378 | # The linear layer that maps from hidden state space to tag space
379 | self.lc1 = nn.Linear(hidden_dim, embedding_dim)
380 | self.lc2 = nn.Linear(embedding_dim, target_size)
381 |
382 | def forward(self, landmarks, lengths):
383 | landmarks = landmarks.permute(0, 2, 1) # (b, seq, dim) --> (b, dim, seq)
384 | # Convolve on Seq for each dim to get (b, dim, seq)
385 | if self.n_layers == 8:
386 | landmarks = F.relu(self.bn8(self.conv8(F.relu(self.bn7(self.conv7(self.p3(F.relu(self.bn6(self.conv6(F.relu(self.bn5(self.conv5(self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))))))))))))))))))))))
387 | elif self.n_layers == 6:
388 | landmarks = self.p3(F.relu(self.bn6(self.conv6(F.relu(self.bn5(self.conv5(self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))))))))))))))))
389 | elif self.n_layers == 4:
390 | landmarks = self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks))))))))))))))
391 | elif self.n_layers == 2:
392 | landmarks = self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))
393 | else:
394 | print('Not specify n_layers')
395 |
396 | # Permute back: (b, dim, d_seq) --> (b, seq, dim) with shorter seq
397 | landmarks = landmarks.permute(0, 2, 1)
398 | # Feed into GRU
399 | # import pdb; pdb.set_trace()
400 | # packed_input = pack_padded_sequence(self.dropout(landmarks), torch.IntTensor(lengths)/self.scale_pool, batch_first=True)
401 | packed_input = pack_padded_sequence(self.dropout(landmarks), tuple(int(x/self.scale_pool) for x in lengths), batch_first=True)
402 | _, ht = self.gru(packed_input)
403 | if ht.requires_grad:
404 | ht.register_hook(lambda x: x.clamp(min=-self.grad_clipping, max=self.grad_clipping))
405 | ht = self.dropout(ht[-1])
406 | logit = F.relu(self.lc1(ht))
407 | logit = self.lc2(self.dropout(logit))
408 | return logit
409 |
410 | # to be implemented
411 | class FrameCRNN(nn.Module):
412 |
413 | def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False, n_layer=1):
414 | super(FrameCRNN, self).__init__()
415 | self.hidden_dim = hidden_dim
416 | self.n_layers = 2 # 2, 4, 6 ,8
417 | if self.n_layers >= 2:
418 | self.conv1 = nn.Conv1d(in_channels=embedding_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
419 | self.conv2 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
420 | self.bn1 = nn.BatchNorm1d(num_features=self.hidden_dim)
421 | self.bn2 = nn.BatchNorm1d(num_features=self.hidden_dim)
422 | self.p1 = nn.MaxPool1d(kernel_size=2)
423 | self.scale_pool = 2
424 | if self.n_layers >= 4:
425 | self.conv3 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
426 | self.conv4 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
427 | self.bn3 = nn.BatchNorm1d(num_features=self.hidden_dim)
428 | self.bn4 = nn.BatchNorm1d(num_features=self.hidden_dim)
429 | self.p2 = nn.MaxPool1d(kernel_size=2)
430 | self.scale_pool = 4
431 | if self.n_layers >= 6:
432 | self.conv5 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
433 | self.conv6 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
434 | self.bn5 = nn.BatchNorm1d(num_features=self.hidden_dim)
435 | self.bn6 = nn.BatchNorm1d(num_features=self.hidden_dim)
436 | self.p3 = nn.MaxPool1d(kernel_size=2)
437 | self.scale_pool = 8
438 | if self.n_layers == 8:
439 | self.conv7 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
440 | self.conv8 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
441 | self.bn7 = nn.BatchNorm1d(num_features=self.hidden_dim)
442 | self.bn8 = nn.BatchNorm1d(num_features=self.hidden_dim)
443 | self.scale_pool = 8
444 |
445 | self.dropout = nn.Dropout(DROPOUT)
446 | self.gru = nn.GRU(hidden_dim, hidden_dim, num_layers=n_layer, bidirectional=bidirectional, dropout=DROPOUT)
447 |
448 | # The linear layer that maps from hidden state space to tag space
449 | self.lc1 = nn.Linear(hidden_dim, embedding_dim)
450 | self.lc2 = nn.Linear(embedding_dim, target_size)
451 |
452 | def forward(self, landmarks, lengths):
453 | landmarks = landmarks.permute(0, 2, 1) # (b, seq, dim) --> (b, dim, seq)
454 | # Convolve on Seq for each dim to get (b, dim, seq)
455 | if self.n_layers == 8:
456 | landmarks = F.relu(self.bn8(self.conv8(F.relu(self.bn7(self.conv7(self.p3(F.relu(self.bn6(self.conv6(F.relu(self.bn5(self.conv5(self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))))))))))))))))))))))
457 | elif self.n_layers == 6:
458 | landmarks = self.p3(F.relu(self.bn6(self.conv6(F.relu(self.bn5(self.conv5(self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))))))))))))))))
459 | elif self.n_layers == 4:
460 | landmarks = self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks))))))))))))))
461 | elif self.n_layers == 2:
462 | landmarks = self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))
463 | else:
464 | print('Not specify n_layers')
465 |
466 | # Permute back: (b, dim, d_seq) --> (b, seq, dim) with shorter seq
467 | landmarks = landmarks.permute(0, 2, 1)
468 | # Feed into GRU
469 | packed_input = pack_padded_sequence(self.dropout(landmarks), torch.IntTensor(lengths)/self.scale_pool, batch_first=True)
470 | _, ht = self.gru(packed_input)
471 | ht = self.dropout(ht[-1])
472 | logit = F.relu(self.lc1(ht))
473 | logit = self.lc2(self.dropout(logit))
474 | return logit
475 |
476 |
477 |
478 |
479 |
480 | # class LSTM_Classifier(nn.Module):
481 | #
482 | # def __init__(self, embedding_dim, hidden_dim, target_size=1):
483 | # super(LSTM_Classifier, self).__init__()
484 | # self.hidden_dim = hidden_dim
485 | #
486 | # # The LSTM takes word embeddings as inputs, and outputs hidden states
487 | # # with dimensionality hidden_dim.
488 | # self.lstm = nn.LSTM(embedding_dim, hidden_dim)
489 | #
490 | # # The linear layer that maps from hidden state space to tag space
491 | # self.lc = nn.Linear(hidden_dim, target_size)
492 | #
493 | # def forward(self, landmarks, lengths):
494 | # # pack_padded_sequence so that padded items in the sequence won't be shown to the LSTM
495 | # packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
496 | # _, (ht, _) = self.lstm(packed_input)
497 | # import pdb;
498 | # pdb.set_trace()
499 | # # packed_output, (ht, ct) = self.lstm(packed_input) # ht is the final output of each batch! ht (1, 4, 272) can be found in output[:,input_sizes-1,:]
500 | # # output, input_sizes = pad_packed_sequence(packed_output, batch_first=True)
501 | # '''
502 | # (Pdb) output[:,input_sizes-1,:]
503 | # tensor([[[-0.0176, 0.1605, 0.1339, ..., -0.0914, 0.2951, -0.0065],
504 | # [-0.0225, 0.1589, 0.1340, ..., -0.0925, 0.2950, -0.0095],
505 | # [-0.0253, 0.1574, 0.1431, ..., -0.0865, 0.3022, -0.0119],
506 | # [-0.0303, 0.1515, 0.1422, ..., -0.1094, 0.2976, -0.0032]],
507 | #
508 | # [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
509 | # [-0.0165, 0.1666, 0.1344, ..., -0.0698, 0.2945, -0.0163],
510 | # [-0.0235, 0.1697, 0.1479, ..., -0.0657, 0.3001, -0.0195],
511 | # [-0.0235, 0.1734, 0.1515, ..., -0.0608, 0.3029, -0.0201]],
512 | #
513 | # [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
514 | # [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
515 | # [-0.0492, 0.1666, 0.1444, ..., -0.0749, 0.2816, -0.0188],
516 | # [-0.0490, 0.1542, 0.1449, ..., -0.0865, 0.2821, -0.0205]],
517 | #
518 | # [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
519 | # [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
520 | # [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
521 | # [-0.0460, 0.1522, 0.1381, ..., -0.0959, 0.2843, -0.0071]]],
522 | # device='cuda:2', grad_fn=)
523 | # (Pdb) ht.shape
524 | # torch.Size([1, 4, 272])
525 | # (Pdb) ht
526 | # tensor([[[-0.0176, 0.1605, 0.1339, ..., -0.0914, 0.2951, -0.0065],
527 | # [-0.0165, 0.1666, 0.1344, ..., -0.0698, 0.2945, -0.0163],
528 | # [-0.0492, 0.1666, 0.1444, ..., -0.0749, 0.2816, -0.0188],
529 | # [-0.0460, 0.1522, 0.1381, ..., -0.0959, 0.2843, -0.0071]]],
530 | # device='cuda:2', grad_fn=)
531 | #
532 | # '''
533 | # # import pdb;
534 | # # pdb.set_trace()
535 | # logit = self.lc(ht.squeeze(0))
536 | # return logit
--------------------------------------------------------------------------------
/plot_log.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import sys
3 | import os
4 | import matplotlib
5 | matplotlib.use('Agg')
6 | import matplotlib.pyplot as plt
7 |
8 | def main(argv):
9 |
10 | parser = argparse.ArgumentParser()
11 | parser.add_argument(
12 | "log_file",
13 | help = "path to log file"
14 | )
15 | args = parser.parse_args()
16 |
17 | if os.path.isdir(args.log_file):
18 | for root, _, files in os.walk(args.log_file):
19 | if len(files) > 0:
20 | for file in files:
21 | if file.find('txt') > 0:
22 | print(file)
23 | f = open(root+'/'+file)
24 | lines = [line.rstrip("\n") for line in f.readlines()]
25 | epochs = []
26 | train_acc, valid_acc = [], []
27 | train_loss, valid_loss = [], []
28 |
29 | for line in lines:
30 | try:
31 | line_list = line.split(',')
32 | if line_list[0][:5] == 'Epoch':
33 | a = int(line_list[0][5:])
34 | b = float(line_list[2][:-1])
35 | c = float(line_list[4])
36 | d = float(line_list[6][:-1])
37 | e = float(line_list[8])
38 | epochs.append(a)
39 | train_acc.append(b)
40 | train_loss.append(c)
41 | valid_acc.append(d)
42 | valid_loss.append(e)
43 | except:
44 | pass
45 | print('missing a few epoch')
46 | fig = plt.figure(figsize=(14, 10))
47 | ax1 = fig.add_subplot(2, 1, 1)
48 | ax1.plot(epochs, train_acc, 'r', label='train_acc')
49 | ax1.plot(epochs, valid_acc, 'b', label='valid_acc')
50 | ax1.grid()
51 | ax1.title.set_text('Accuracy')
52 | ax1.set_xlabel('epochs')
53 | ax1.set_ylabel('accuracy %')
54 | ax1.legend()
55 | ax2 = fig.add_subplot(2, 1, 2)
56 | ax2.plot(epochs, train_loss, 'r', label='train_loss')
57 | ax2.plot(epochs, valid_loss, 'b', label='valid_loss')
58 | ax2.grid()
59 | ax2.title.set_text('Loss')
60 | ax2.set_xlabel('epochs')
61 | ax2.set_ylabel('loss')
62 | ax2.legend()
63 | # plt.show()
64 | plt.savefig(root + '/' + file[:-3] + 'png')
65 | plt.close()
66 | else:
67 | f = open(args.log_file)
68 | lines = [line.rstrip("\n") for line in f.readlines()]
69 | epochs = []
70 | train_acc, valid_acc = [], []
71 | train_loss, valid_loss = [], []
72 |
73 | for line in lines:
74 | line_list = line.split(',')
75 | if line_list[0][:5] == 'Epoch':
76 | line_list = line.split(',')
77 | epochs.append(int(line_list[0][5:]))
78 | train_acc.append(float(line_list[2][:-1]))
79 | train_loss.append(float(line_list[4]))
80 | valid_acc.append(float(line_list[6][:-1]))
81 | valid_loss.append(float(line_list[8]))
82 | # import pdb;
83 | # pdb.set_trace()
84 | fig = plt.figure(figsize=(14, 10))
85 | # import pdb;
86 | # pdb.set_trace()
87 | ax1 = fig.add_subplot(2, 1, 1)
88 | ax1.plot(epochs, train_acc, 'r', label='train_acc')
89 | ax1.plot(epochs, valid_acc, 'b', label='valid_acc')
90 | ax1.grid()
91 | ax1.title.set_text('Accuracy')
92 | ax1.set_xlabel('epochs')
93 | ax1.set_ylabel('accuracy %')
94 | ax1.legend()
95 | ax2 = fig.add_subplot(2, 1, 2)
96 | ax2.plot(epochs, train_loss, 'r', label='train_loss')
97 | ax2.plot(epochs, valid_loss, 'b', label='valid_loss')
98 | ax2.grid()
99 | ax2.title.set_text('Loss')
100 | ax2.set_xlabel('epochs')
101 | ax2.set_ylabel('loss')
102 | ax2.legend()
103 | # plt.show()
104 | plt.savefig(args.log_file[:-3]+'png')
105 |
106 | if __name__ == "__main__":
107 | main(sys.argv)
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | import torch.optim as optim
5 | from dataset import LandmarkList, LandmarkListTest
6 | from torch.utils import data
7 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
8 | import argparse
9 |
10 | from model import *
11 |
12 | # rnn = 'frameGRU'
13 | # rnn = 'sumGRU'
14 | # rnn = 'crnn'
15 | # rnn = 'cnn'
16 | # rnn = 'GRU'
17 | # rnn = 'framewise_GRU'
18 | # rnn = 'embedGRU'
19 | rnn = 'biGRU'
20 | # rnn = 'LSTM'
21 | EMBEDDING_DIM = int(68 * 67 /2)
22 | HIDDEN_DIM = 128
23 | N_LAYERS_RNN = 3
24 | LR = 1e-4
25 | DEVICES = 0
26 | torch.cuda.set_device(DEVICES)
27 |
28 |
29 | def compute_binary_accuracy(model, data_loader, th_list):
30 | len_th_list = len(th_list)
31 | correct_pred, num_examples, FP, FN = [0.]*len_th_list, 0, [0]*len_th_list, [0]*len_th_list
32 | FP_list = []
33 | FN_list = []
34 | for _ in range(len_th_list):
35 | FP_list.append([])
36 | FN_list.append([])
37 | model.eval()
38 | with torch.no_grad():
39 | if rnn == 'frameGRU':
40 | for batch, labels, lengths, f_names in data_loader:
41 | logits = model(batch.cuda(), lengths)
42 | out = torch.sigmoid(logits)
43 | new_out_list = []
44 | for i in range(len(lengths)):
45 | new_out_list.append(out[i][:lengths[i]].mean(0, keepdim=True))
46 | # import pdb; pdb.set_trace()
47 | out = torch.cat(new_out_list, 0)
48 | num_examples += len(lengths)
49 | for i, th in enumerate(th_list):
50 | predicted_labels = (out > th).long()
51 | if predicted_labels.squeeze(1).cpu().long() == torch.LongTensor(labels):
52 | correct_pred[i] += 1
53 | elif labels == 0:
54 | # print('FP: ', FP)
55 | FP[i] += 1
56 | FP_list[i].append(f_names[0] + '_' + str(labels.item()) + '_' + str(
57 | out.squeeze(1).cpu().item()))
58 | else:
59 | # print('FN: ', FN)
60 | FN[i] += 1
61 | FN_list[i].append(f_names[0] + '_' + str(labels.item()) + '_' + str(
62 | out.squeeze(1).cpu().item()))
63 | return [n_correct/num_examples * 100 for n_correct in correct_pred], FP, FN, FP_list, FN_list
64 | else:
65 | for batch, labels, lengths, f_names in data_loader:
66 | #import pdb; pdb.set_trace()
67 | logits = model(batch.cuda(), lengths)
68 | num_examples += len(lengths)
69 | for i, th in enumerate(th_list):
70 | predicted_labels = (torch.sigmoid(logits) > th).long()
71 | if predicted_labels.squeeze(1).cpu().long() == torch.LongTensor(labels):
72 | correct_pred[i] += 1
73 | elif labels == 0:
74 | FP[i] += 1
75 | FP_list[i].append(f_names[0]+'_'+str(labels.item())+'_'+str(torch.sigmoid(logits).squeeze(1).cpu().item()))
76 | else:
77 | FN[i] += 1
78 | FN_list[i].append(f_names[0]+'_'+str(labels.item())+'_'+str(torch.sigmoid(logits).squeeze(1).cpu().item()))
79 | return [n_correct/num_examples * 100 for n_correct in correct_pred], FP, FN, FP_list, FN_list
80 |
81 |
82 |
83 | if rnn == 'frameGRU':
84 | model = Framewise_GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
85 | if rnn == 'sumGRU':
86 | model = sumGRU(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
87 | if rnn == 'embedGRU':
88 | model = embed_GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
89 | model.load_state_dict(torch.load("models/" + str(rnn) + "_L" + str(N_LAYERS_RNN) + ".pt"))
90 | if rnn == 'GRU':
91 | model = GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
92 | model.load_state_dict(torch.load("models/" + str(rnn) + "_L" + str(N_LAYERS_RNN) + ".pt"))
93 | if rnn == 'biGRU':
94 | model = biGRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
95 | model.load_state_dict(torch.load("models/" + str(rnn) + "_L" + str(N_LAYERS_RNN) + ".pt"))
96 | if rnn == 'LSTM':
97 | model = LSTM_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
98 | if rnn == 'cnn':
99 | model = cnn_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1)
100 | if rnn == 'crnn':
101 | model = crnn_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
102 | # model.load_state_dict(torch.load("models/"+str(rnn)+".pt"))
103 | model = model.cuda()
104 |
105 | loss_function = torch.nn.BCEWithLogitsLoss()
106 | loss_function_eval_sum = torch.nn.BCEWithLogitsLoss(reduction='sum')
107 | optimizer = optim.Adam(model.parameters(), lr=LR)
108 |
109 | dataset_train = LandmarkListTest(root='/datasets/move_closer/Data_Distortion/', fileList='/datasets/move_closer/TrainList.txt')
110 | dataloader_train = data.DataLoader(dataset_train, batch_size=1, shuffle=False, num_workers=0)
111 |
112 | dataset_test = LandmarkListTest(root='/datasets/move_closer/Data_Distortion/', fileList='/datasets/move_closer/TestList.txt')
113 | dataloader_test = data.DataLoader(dataset_test, batch_size=1, shuffle=False, num_workers=0)
114 |
115 | # thresholds = [x * 0.01 for x in range(30, 71)]
116 | thresholds = [0.5]
117 |
118 | train_acc, train_fp, train_fn, train_fp_list, train_fn_list = compute_binary_accuracy(model, dataloader_train, thresholds)
119 | test_acc, test_fp, test_fn, test_fp_list, test_fn_list = compute_binary_accuracy(model, dataloader_test, thresholds)
120 |
121 | for i in range(0, len(thresholds)):
122 | print('\n\n-----------------Eval for threshold of {:.2f}-------------------\n\n'.format(thresholds[i]))
123 | print('train_acc,{:.2f}%,train_fp,{},train_fn,{}\nvalid_acc,{:.2f}%,valid_fp,{},valid_fn,{}\n'
124 | .format(train_acc[i], train_fp[i], train_fn[i], test_acc[i], test_fp[i], test_fn[i]))
125 | print('Train FP')
126 | for n in train_fp_list[i]:
127 | print(n)
128 | print('\nTrain FN')
129 | for n in train_fn_list[i]:
130 | print(n)
131 |
132 | print('\n\n\nTest FP')
133 | for n in test_fp_list[i]:
134 | print(n)
135 | print('\nTest FN')
136 | for n in test_fn_list[i]:
137 | print(n)
138 |
139 |
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | import torch.optim as optim
5 | from dataset import LandmarkList
6 | from torch.utils import data
7 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
8 | import argparse
9 |
10 | from model import *
11 |
12 | # rnn = 'frameGRU'
13 | # to be implemented - rnn = 'frameCRNN'
14 | rnn = 'sumGRU'
15 | # rnn = 'crnn'
16 | # rnn = 'cnn'
17 | # rnn = 'GRU'
18 | # rnn = 'embedGRU'
19 | # rnn = 'biGRU'
20 | # rnn = 'LSTM'
21 | EMBEDDING_DIM = int(68 * 67 /2)
22 | HIDDEN_DIM = 128
23 | N_LAYERS_RNN = 1
24 | MAX_EPOCH = 30000
25 | LR = 1e-4
26 | DEVICES = 3
27 | SAVE_BEST_MODEL = True
28 | torch.cuda.set_device(DEVICES)
29 |
30 |
31 | def compute_binary_accuracy(model, data_loader, loss_function):
32 | correct_pred, num_examples, total_loss = 0, 0, 0.
33 | model.eval()
34 | with torch.no_grad():
35 | if rnn == 'frameGRU' or rnn == 'frameCRNN':
36 | for batch, labels, lengths in data_loader:
37 | logits = model(batch.cuda(), lengths)
38 | out = torch.sigmoid(logits)
39 | # if rnn == 'frameGRU':
40 | # new_out_list = []
41 | # new_labels_list = []
42 | # for i in range(len(lengths)):
43 | # new_out_list.append(out[i][:lengths[i]].sum())
44 | # out = torch.cat(new_out_list, 0)
45 | new_labels_list = []
46 | new_logits_list = []
47 | new_out_list = []
48 | for i in range(len(lengths)):
49 | new_labels_list += [labels[i]] * lengths[i]
50 | new_logits_list.append(out[i][:lengths[i]])
51 | new_out_list.append(out[i][:lengths[i]].mean(0, keepdim=True))
52 | # import pdb; pdb.set_trace()
53 | logits_framewise = torch.cat(new_logits_list, 0)
54 | labels_framewise = new_labels_list
55 | out = torch.cat(new_out_list, 0)
56 | total_loss += loss_function(logits_framewise, torch.FloatTensor(labels_framewise).unsqueeze(1).cuda()).item()
57 | predicted_labels = (out > 0.5).long()
58 | num_examples += len(lengths)
59 | correct_pred += (predicted_labels.squeeze(1).cpu().long() == torch.LongTensor(labels)).sum()
60 | return correct_pred.float().item()/num_examples * 100, total_loss
61 | else:
62 | for batch, labels, lengths in data_loader:
63 | logits = model(batch.cuda(), lengths)
64 | total_loss += loss_function(logits, torch.FloatTensor(labels).unsqueeze(1).cuda()).item()
65 | predicted_labels = (torch.sigmoid(logits) > 0.5).long()
66 | num_examples += len(lengths)
67 | correct_pred += (predicted_labels.squeeze(1).cpu().long() == torch.LongTensor(labels)).sum()
68 | return correct_pred.float().item()/num_examples * 100, total_loss
69 |
70 |
71 | def pad_collate(batch):
72 | batch.sort(key=lambda x: x[2], reverse=True)
73 | lms, tgs, lens = zip(*batch)
74 | new_lms = torch.zeros((len(lms), lms[0].shape[0], lms[0].shape[1])) # batch x seq x feature(136)
75 | new_lms[0] = lms[0]
76 | for i in range(1, len(lms)):
77 | new_lms[i] = torch.cat((lms[i], torch.zeros((lens[0] - lens[i]),EMBEDDING_DIM)), 0)
78 | return new_lms, tgs, lens
79 |
80 | if rnn == 'frameGRU':
81 | model = Framewise_GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
82 | if rnn == 'frameCRNN':
83 | model = FrameCRNN(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
84 | if rnn == 'sumGRU':
85 | model = sumGRU(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
86 | if rnn == 'embedGRU':
87 | model = embed_GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
88 | if rnn == 'GRU':
89 | model = GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
90 | if rnn == 'biGRU':
91 | model = biGRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
92 | if rnn == 'LSTM':
93 | model = LSTM_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
94 | if rnn == 'cnn':
95 | model = cnn_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1)
96 | if rnn == 'crnn':
97 | model = crnn_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
98 | model = model.cuda()
99 |
100 | loss_function = torch.nn.BCEWithLogitsLoss()
101 | loss_function_eval_sum = torch.nn.BCEWithLogitsLoss(reduction='sum')
102 | optimizer = optim.Adam(model.parameters(), lr=LR)
103 |
104 | dataset_train = LandmarkList(root='/datasets/move_closer/Data_Distortion/', fileList='/datasets/move_closer/TrainList.txt')
105 | dataloader_train = data.DataLoader(dataset_train, batch_size=128, shuffle=True, num_workers=0, collate_fn=pad_collate)
106 | # if rnn == 'frameGRU':
107 | # dataloader_train = data.DataLoader(dataset_train, batch_size=8, shuffle=True, num_workers=2,
108 | # collate_fn=pad_collate)
109 |
110 | dataset_test = LandmarkList(root='/datasets/move_closer/Data_Distortion/', fileList='/datasets/move_closer/TestList.txt')
111 | dataloader_test = data.DataLoader(dataset_test, batch_size=64, shuffle=False, num_workers=0, collate_fn=pad_collate)
112 |
113 | best_test_acc = 0.
114 | for epoch in range(MAX_EPOCH):
115 | model.train()
116 | n_iter = 0
117 | for batch, labels, lengths in dataloader_train:
118 | model.zero_grad()
119 | out = model(batch.cuda(), lengths) # we could do a classifcation for every output (probably better)
120 | if rnn == 'frameGRU':
121 | new_labels_list = []
122 | new_out_list = []
123 | for i in range(len(lengths)):
124 | new_labels_list += [labels[i]] * lengths[i]
125 | new_out_list.append(out[i][:lengths[i]])
126 | out = torch.cat(new_out_list, 0)
127 | labels = new_labels_list
128 | loss = loss_function(out, torch.FloatTensor(labels).unsqueeze(1).cuda())
129 | loss.backward()
130 | optimizer.step()
131 | n_iter += 1
132 | train_acc, train_loss = compute_binary_accuracy(model, dataloader_train, loss_function_eval_sum)
133 | test_acc, test_loss = compute_binary_accuracy(model, dataloader_test, loss_function_eval_sum)
134 | print('Epoch{},train_acc,{:.2f}%,train_loss,{:.8f},valid_acc,{:.2f}%,valid_loss,{:.8f}'.format(epoch, train_acc, train_loss, test_acc, test_loss))
135 | if test_acc > best_test_acc:
136 | best_test_acc = test_acc
137 | if SAVE_BEST_MODEL:
138 | torch.save(model.state_dict(), 'models/' + rnn +
139 | '_L' + str(N_LAYERS_RNN) + '.pt')
140 | print('best epoch {}, train_acc {}, test_acc {}'.format(epoch, train_acc, test_acc))
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 | # class LSTM_Classifier(nn.Module):
150 | #
151 | # def __init__(self, embedding_dim, hidden_dim, target_size=1):
152 | # super(LSTM_Classifier, self).__init__()
153 | # self.hidden_dim = hidden_dim
154 | #
155 | # # The LSTM takes word embeddings as inputs, and outputs hidden states
156 | # # with dimensionality hidden_dim.
157 | # self.lstm = nn.LSTM(embedding_dim, hidden_dim)
158 | #
159 | # # The linear layer that maps from hidden state space to tag space
160 | # self.lc = nn.Linear(hidden_dim, target_size)
161 | #
162 | # def forward(self, landmarks, lengths):
163 | # # pack_padded_sequence so that padded items in the sequence won't be shown to the LSTM
164 | # packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
165 | # _, (ht, _) = self.lstm(packed_input)
166 | # import pdb;
167 | # pdb.set_trace()
168 | # # packed_output, (ht, ct) = self.lstm(packed_input) # ht is the final output of each batch! ht (1, 4, 272) can be found in output[:,input_sizes-1,:]
169 | # # output, input_sizes = pad_packed_sequence(packed_output, batch_first=True)
170 | # '''
171 | # (Pdb) output[:,input_sizes-1,:]
172 | # tensor([[[-0.0176, 0.1605, 0.1339, ..., -0.0914, 0.2951, -0.0065],
173 | # [-0.0225, 0.1589, 0.1340, ..., -0.0925, 0.2950, -0.0095],
174 | # [-0.0253, 0.1574, 0.1431, ..., -0.0865, 0.3022, -0.0119],
175 | # [-0.0303, 0.1515, 0.1422, ..., -0.1094, 0.2976, -0.0032]],
176 | #
177 | # [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
178 | # [-0.0165, 0.1666, 0.1344, ..., -0.0698, 0.2945, -0.0163],
179 | # [-0.0235, 0.1697, 0.1479, ..., -0.0657, 0.3001, -0.0195],
180 | # [-0.0235, 0.1734, 0.1515, ..., -0.0608, 0.3029, -0.0201]],
181 | #
182 | # [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
183 | # [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
184 | # [-0.0492, 0.1666, 0.1444, ..., -0.0749, 0.2816, -0.0188],
185 | # [-0.0490, 0.1542, 0.1449, ..., -0.0865, 0.2821, -0.0205]],
186 | #
187 | # [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
188 | # [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
189 | # [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
190 | # [-0.0460, 0.1522, 0.1381, ..., -0.0959, 0.2843, -0.0071]]],
191 | # device='cuda:2', grad_fn=)
192 | # (Pdb) ht.shape
193 | # torch.Size([1, 4, 272])
194 | # (Pdb) ht
195 | # tensor([[[-0.0176, 0.1605, 0.1339, ..., -0.0914, 0.2951, -0.0065],
196 | # [-0.0165, 0.1666, 0.1344, ..., -0.0698, 0.2945, -0.0163],
197 | # [-0.0492, 0.1666, 0.1444, ..., -0.0749, 0.2816, -0.0188],
198 | # [-0.0460, 0.1522, 0.1381, ..., -0.0959, 0.2843, -0.0071]]],
199 | # device='cuda:2', grad_fn=)
200 | #
201 | # '''
202 | # # import pdb;
203 | # # pdb.set_trace()
204 | # logit = self.lc(ht.squeeze(0))
205 | # return logit
--------------------------------------------------------------------------------
/train_2dcnn.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | import torch.optim as optim
5 | from dataset import LandmarkList
6 | from torch.utils import data
7 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
8 | import argparse
9 |
10 | from model import *
11 |
12 |
13 | rnn = '2dcnn'
14 | # rnn = 'frameGRU'
15 | # to be implemented - rnn = 'frameCRNN'
16 | # rnn = 'sumGRU'
17 | # rnn = 'crnn'
18 | # rnn = 'cnn'
19 | # rnn = 'GRU'
20 | # rnn = 'embedGRU'
21 | # rnn = 'biGRU'
22 | # rnn = 'LSTM'
23 | EMBEDDING_DIM = int(68 * 67 /2)
24 | HIDDEN_DIM = 128
25 | N_LAYERS_RNN = 3
26 | MAX_EPOCH = 1000
27 | LR = 1e-4
28 | DEVICES = 2
29 | SAVE_BEST_MODEL = True
30 | torch.cuda.set_device(DEVICES)
31 |
32 |
33 | def compute_binary_accuracy(model, data_loader, loss_function):
34 | correct_pred, num_examples, total_loss = 0, 0, 0.
35 | model.eval()
36 | with torch.no_grad():
37 | if rnn == 'frameGRU' or rnn == 'frameCRNN':
38 | for batch, labels, lengths in data_loader:
39 | logits = model(batch.cuda(), lengths)
40 | out = torch.sigmoid(logits)
41 | # if rnn == 'frameGRU':
42 | # new_out_list = []
43 | # new_labels_list = []
44 | # for i in range(len(lengths)):
45 | # new_out_list.append(out[i][:lengths[i]].sum())
46 | # out = torch.cat(new_out_list, 0)
47 | new_labels_list = []
48 | new_logits_list = []
49 | new_out_list = []
50 | for i in range(len(lengths)):
51 | new_labels_list += [labels[i]] * lengths[i]
52 | new_logits_list.append(out[i][:lengths[i]])
53 | new_out_list.append(out[i][:lengths[i]].mean(0, keepdim=True))
54 | # import pdb; pdb.set_trace()
55 | logits_framewise = torch.cat(new_logits_list, 0)
56 | labels_framewise = new_labels_list
57 | out = torch.cat(new_out_list, 0)
58 | total_loss += loss_function(logits_framewise, torch.FloatTensor(labels_framewise).unsqueeze(1).cuda()).item()
59 | predicted_labels = (out > 0.5).long()
60 | num_examples += len(lengths)
61 | correct_pred += (predicted_labels.squeeze(1).cpu().long() == torch.LongTensor(labels)).sum()
62 | return correct_pred.float().item()/num_examples * 100, total_loss
63 | else:
64 | for batch, labels, lengths in data_loader:
65 | logits = model(batch.cuda(), lengths)
66 | total_loss += loss_function(logits, torch.FloatTensor(labels).unsqueeze(1).cuda()).item()
67 | predicted_labels = (torch.sigmoid(logits) > 0.5).long()
68 | num_examples += len(lengths)
69 | correct_pred += (predicted_labels.squeeze(1).cpu().long() == torch.LongTensor(labels)).sum()
70 | return correct_pred.float().item()/num_examples * 100, total_loss
71 |
72 |
73 | def pad_collate(batch):
74 | batch.sort(key=lambda x: x[2], reverse=True)
75 | lms, tgs, lens = zip(*batch)
76 | new_lms = torch.zeros((len(lms), lms[0].shape[0], lms[0].shape[1])) # batch x seq x feature(136)
77 | new_lms[0] = lms[0]
78 | for i in range(1, len(lms)):
79 | new_lms[i] = torch.cat((lms[i], torch.zeros((lens[0] - lens[i]),EMBEDDING_DIM)), 0)
80 | return new_lms, tgs, lens
81 |
82 | if rnn == '2dcnn':
83 | model = cnn_2d(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
84 | if rnn == 'frameGRU':
85 | model = Framewise_GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
86 | if rnn == 'frameCRNN':
87 | model = FrameCRNN(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
88 | if rnn == 'sumGRU':
89 | model = sumGRU(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
90 | if rnn == 'embedGRU':
91 | model = embed_GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
92 | if rnn == 'GRU':
93 | model = GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
94 | if rnn == 'biGRU':
95 | model = biGRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
96 | if rnn == 'LSTM':
97 | model = LSTM_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
98 | if rnn == 'cnn':
99 | model = cnn_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1)
100 | if rnn == 'crnn':
101 | model = crnn_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
102 | model = model.cuda()
103 |
104 | loss_function = torch.nn.BCEWithLogitsLoss()
105 | loss_function_eval_sum = torch.nn.BCEWithLogitsLoss(reduction='sum')
106 | optimizer = optim.Adam(model.parameters(), lr=LR)
107 |
108 | dataset_train = LandmarkList(root='/datasets/move_closer/Data_Distortion/', fileList='/datasets/move_closer/TrainList.txt')
109 | dataloader_train = data.DataLoader(dataset_train, batch_size=128, shuffle=True, num_workers=0, collate_fn=pad_collate)
110 | # if rnn == 'frameGRU':
111 | # dataloader_train = data.DataLoader(dataset_train, batch_size=8, shuffle=True, num_workers=2,
112 | # collate_fn=pad_collate)
113 |
114 | dataset_test = LandmarkList(root='/datasets/move_closer/Data_Distortion/', fileList='/datasets/move_closer/TestList.txt')
115 | dataloader_test = data.DataLoader(dataset_test, batch_size=64, shuffle=False, num_workers=0, collate_fn=pad_collate)
116 |
117 | best_test_acc = 0.
118 | for epoch in range(MAX_EPOCH):
119 | model.train()
120 | n_iter = 0
121 | for batch, labels, lengths in dataloader_train:
122 | model.zero_grad()
123 | out = model(batch.cuda(), lengths) # we could do a classifcation for every output (probably better)
124 | if rnn == 'frameGRU':
125 | new_labels_list = []
126 | new_out_list = []
127 | for i in range(len(lengths)):
128 | new_labels_list += [labels[i]] * lengths[i]
129 | new_out_list.append(out[i][:lengths[i]])
130 | out = torch.cat(new_out_list, 0)
131 | labels = new_labels_list
132 | loss = loss_function(out, torch.FloatTensor(labels).unsqueeze(1).cuda())
133 | loss.backward()
134 | optimizer.step()
135 | n_iter += 1
136 | train_acc, train_loss = compute_binary_accuracy(model, dataloader_train, loss_function_eval_sum)
137 | test_acc, test_loss = compute_binary_accuracy(model, dataloader_test, loss_function_eval_sum)
138 | print('Epoch{},train_acc,{:.2f}%,train_loss,{:.8f},valid_acc,{:.2f}%,valid_loss,{:.8f}'.format(epoch, train_acc, train_loss, test_acc, test_loss))
139 | if test_acc > best_test_acc:
140 | best_test_acc = test_acc
141 | if SAVE_BEST_MODEL:
142 | torch.save(model.state_dict(), 'models/' + rnn +
143 | '_L' + str(N_LAYERS_RNN) + '.pt')
144 | print('best epoch {}, train_acc {}, test_acc {}'.format(epoch, train_acc, test_acc))
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 | # class LSTM_Classifier(nn.Module):
154 | #
155 | # def __init__(self, embedding_dim, hidden_dim, target_size=1):
156 | # super(LSTM_Classifier, self).__init__()
157 | # self.hidden_dim = hidden_dim
158 | #
159 | # # The LSTM takes word embeddings as inputs, and outputs hidden states
160 | # # with dimensionality hidden_dim.
161 | # self.lstm = nn.LSTM(embedding_dim, hidden_dim)
162 | #
163 | # # The linear layer that maps from hidden state space to tag space
164 | # self.lc = nn.Linear(hidden_dim, target_size)
165 | #
166 | # def forward(self, landmarks, lengths):
167 | # # pack_padded_sequence so that padded items in the sequence won't be shown to the LSTM
168 | # packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
169 | # _, (ht, _) = self.lstm(packed_input)
170 | # import pdb;
171 | # pdb.set_trace()
172 | # # packed_output, (ht, ct) = self.lstm(packed_input) # ht is the final output of each batch! ht (1, 4, 272) can be found in output[:,input_sizes-1,:]
173 | # # output, input_sizes = pad_packed_sequence(packed_output, batch_first=True)
174 | # '''
175 | # (Pdb) output[:,input_sizes-1,:]
176 | # tensor([[[-0.0176, 0.1605, 0.1339, ..., -0.0914, 0.2951, -0.0065],
177 | # [-0.0225, 0.1589, 0.1340, ..., -0.0925, 0.2950, -0.0095],
178 | # [-0.0253, 0.1574, 0.1431, ..., -0.0865, 0.3022, -0.0119],
179 | # [-0.0303, 0.1515, 0.1422, ..., -0.1094, 0.2976, -0.0032]],
180 | #
181 | # [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
182 | # [-0.0165, 0.1666, 0.1344, ..., -0.0698, 0.2945, -0.0163],
183 | # [-0.0235, 0.1697, 0.1479, ..., -0.0657, 0.3001, -0.0195],
184 | # [-0.0235, 0.1734, 0.1515, ..., -0.0608, 0.3029, -0.0201]],
185 | #
186 | # [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
187 | # [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
188 | # [-0.0492, 0.1666, 0.1444, ..., -0.0749, 0.2816, -0.0188],
189 | # [-0.0490, 0.1542, 0.1449, ..., -0.0865, 0.2821, -0.0205]],
190 | #
191 | # [[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
192 | # [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
193 | # [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
194 | # [-0.0460, 0.1522, 0.1381, ..., -0.0959, 0.2843, -0.0071]]],
195 | # device='cuda:2', grad_fn=)
196 | # (Pdb) ht.shape
197 | # torch.Size([1, 4, 272])
198 | # (Pdb) ht
199 | # tensor([[[-0.0176, 0.1605, 0.1339, ..., -0.0914, 0.2951, -0.0065],
200 | # [-0.0165, 0.1666, 0.1344, ..., -0.0698, 0.2945, -0.0163],
201 | # [-0.0492, 0.1666, 0.1444, ..., -0.0749, 0.2816, -0.0188],
202 | # [-0.0460, 0.1522, 0.1381, ..., -0.0959, 0.2843, -0.0071]]],
203 | # device='cuda:2', grad_fn=)
204 | #
205 | # '''
206 | # # import pdb;
207 | # # pdb.set_trace()
208 | # logit = self.lc(ht.squeeze(0))
209 | # return logit
--------------------------------------------------------------------------------