├── .gitignore
├── .idea
    ├── .gitignore
    ├── inspectionProfiles
    │   └── profiles_settings.xml
    ├── misc.xml
    ├── modules.xml
    ├── sequential_model_pytorch_rnn_1dcnn.iml
    └── vcs.xml
├── README.md
├── dataset.py
├── dataset2.py
├── dataset_list.py
├── main2.py
├── main_softmax.py
├── model.py
├── plot_log.py
├── test.py
├── train.py
└── train_2dcnn.py


/.gitignore:
--------------------------------------------------------------------------------
1 | outputs*/
2 | eval/
3 | models/
4 | __pycache__/
5 | 


--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Editor-based HTTP Client requests
5 | /httpRequests/
6 | # Datasource local storage ignored files
7 | /dataSources/
8 | /dataSources.local.xml
9 | 


--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="InspectionProjectProfileManager">
2 |   <settings>
3 |     <option name="USE_PROJECT_PROFILE" value="false" />
4 |     <version value="1.0" />
5 |   </settings>
6 | </component>


--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/sequential_model_pytorch_rnn_1dcnn.iml" filepath="$PROJECT_DIR$/.idea/sequential_model_pytorch_rnn_1dcnn.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/.idea/sequential_model_pytorch_rnn_1dcnn.iml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <module type="PYTHON_MODULE" version="4">
3 |   <component name="NewModuleRootManager">
4 |     <content url="file://$MODULE_DIR$" />
5 |     <orderEntry type="inheritedJdk" />
6 |     <orderEntry type="sourceFolder" forTests="false" />
7 |   </component>
8 | </module>


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Sequential Model Pytorch
 2 | 
 3 | The repo provides library supporting:
 4 | - RNN, LSTM, GRU, bi-GRU, 1d-CNN, RCNN, etc.
 5 | - Adaptable to variable length input sequence.
 6 | 
 7 | ## Quick start
 8 | 
 9 | Choose the type of neural networks in `train.py`
10 | 
11 | To train:
12 | 
13 | ```angular2html
14 | python train.py
15 | ```
16 | 
17 | To test:
18 | 
19 | ```angular2html
20 | python test.py
21 | ```


--------------------------------------------------------------------------------
/dataset.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import torch.utils.data as data
 6 | 
 7 | import os
 8 | import os.path
 9 | import re
10 | import torch
11 | import pickle
12 | 
13 | 
14 | def default_loader(path):
15 |     with open(path, 'rb') as fp:
16 |         lm_list = pickle.load(fp)
17 |     fp.close()
18 |     return lm_list
19 | 
20 | def default_list_reader(fileList):
21 |     lmList = []
22 |     with open(fileList, 'r') as file:
23 |         for line in file.readlines():
24 |             lmPath=line.strip()[:-2].strip()
25 |             label=line.strip()[-1]
26 | 
27 |             lmList.append((lmPath, int(label)))
28 |     return lmList
29 | 
30 | 
31 | class LandmarkList(data.Dataset):
32 |     def __init__(self, root, fileList, transform=None, list_reader=default_list_reader, loader=default_loader):
33 |         self.root      = root
34 |         self.lmList   = list_reader(fileList)
35 |         self.transform = transform
36 |         self.loader    = loader
37 | 
38 |     def __getitem__(self, index):
39 |         lmPath, target = self.lmList[index]
40 |         lm = self.loader(os.path.join(self.root, lmPath))
41 |         if self.transform is not None:
42 |             lm = self.transform(lm)
43 |         return lm, target, lm.shape[0]
44 | 
45 |     def __len__(self):
46 |         return len(self.lmList)
47 | 
48 | 
49 | class LandmarkListTest(data.Dataset):
50 |     def __init__(self, root, fileList, transform=None, list_reader=default_list_reader, loader=default_loader):
51 |         self.root      = root
52 |         self.lmList   = list_reader(fileList)
53 |         self.transform = transform
54 |         self.loader    = loader
55 | 
56 |     def __getitem__(self, index):
57 |         lmPath, target = self.lmList[index]
58 |         lm = self.loader(os.path.join(self.root, lmPath))
59 |         if self.transform is not None:
60 |             lm = self.transform(lm)
61 |         return lm, target, lm.shape[0], lmPath
62 | 
63 |     def __len__(self):
64 |         return len(self.lmList)


--------------------------------------------------------------------------------
/dataset2.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import torch.utils.data as data
 6 | 
 7 | import os
 8 | import os.path
 9 | import re
10 | import torch
11 | import pickle
12 | 
13 | 
14 | def default_loader(path):
15 |     with open(path, 'rb') as fp:
16 |         lm_list = pickle.load(fp)
17 |     fp.close()
18 |     return lm_list
19 | 
20 | def default_list_reader(fileList):
21 |     lmList = []
22 |     with open(fileList, 'r') as file:
23 |         for line in file.readlines():
24 |             lmPath=line.strip()[:-2].strip()
25 |             label=line.strip()[-1]
26 | 
27 |             lmList.append((lmPath, int(label)))
28 |     return lmList
29 | 
30 | 
31 | class LandmarkList(data.Dataset):
32 |     def __init__(self, root, fileList, transform=None, list_reader=default_list_reader, loader=default_loader):
33 |         self.root      = root
34 |         self.lmList   = list_reader(fileList)
35 |         self.transform = transform
36 |         self.loader    = loader
37 | 
38 |     def __getitem__(self, index):
39 |         lmPath, target = self.lmList[index]
40 |         lm = self.loader(os.path.join(self.root, lmPath))
41 |         if self.transform is not None:
42 |             lm = self.transform(lm)
43 |         return lm, target, lm.shape[0]
44 | 
45 |     def __len__(self):
46 |         return len(self.lmList)


--------------------------------------------------------------------------------
/dataset_list.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import torch.utils.data as data
 6 | 
 7 | import os
 8 | import os.path
 9 | import re
10 | import torch
11 | import pickle
12 | 
13 | 
14 | def default_loader(path):
15 |     with open(path, 'rb') as fp:
16 |         lm_list = pickle.load(fp)
17 |     fp.close()
18 |     return lm_list
19 | 
20 | def default_list_reader(fileList):
21 |     lmList = []
22 |     with open(fileList, 'r') as file:
23 |         for line in file.readlines():
24 |             lmPath=line.strip()[:-2].strip()
25 |             label=line.strip()[-1]
26 | 
27 |             lmList.append((lmPath, int(label)))
28 |     return lmList
29 | 
30 | 
31 | class LandmarkList(data.Dataset):
32 |     def __init__(self, root, fileList, transform=None, list_reader=default_list_reader, loader=default_loader):
33 |         self.root      = root
34 |         self.lmList   = list_reader(fileList)
35 |         self.transform = transform
36 |         self.loader    = loader
37 | 
38 |     def __getitem__(self, index):
39 |         lmPath, target = self.lmList[index]
40 |         lm = self.loader(os.path.join(self.root, lmPath))
41 |         if self.transform is not None:
42 |             lm = self.transform(lm)
43 |         return lm, target, len(lm)
44 | 
45 |     def __len__(self):
46 |         return len(self.lmList)


--------------------------------------------------------------------------------
/main2.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import torch.optim as optim
  5 | from dataset2 import LandmarkList
  6 | from torch.utils import data
  7 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
  8 | import argparse
  9 | 
 10 | # parser.add_argument('--root_path', type=str, default='/home/guosheng/Liveness/Code/FaceFlashing/Data/',
 11 | #                     metavar='H',
 12 | #                     help='Dir Head')
 13 | # parser.add_argument('--trainFile', type=str, default='TrainList_4sources_13082019.txt', metavar='TRF', help='training file name')
 14 | 
 15 | 
 16 | EMBEDDING_DIM = 68*2
 17 | HIDDEN_DIM = 68*4
 18 | MAX_EPOCH = 10
 19 | DEVICES = 2
 20 | torch.cuda.set_device(DEVICES)
 21 | 
 22 | 
 23 | def pad_collate(batch):
 24 |     batch.sort(key=lambda x: x[2], reverse=True)
 25 |     lms, tgs, lens = zip(*batch)
 26 |     new_lms = torch.zeros((len(lms), lms[0].shape[0], lms[0].shape[1])) # batch x seq x feature(136)
 27 |     new_lms[0] = lms[0]
 28 |     for i in range(1, len(lms)):
 29 |         # import pdb;
 30 |         # pdb.set_trace()
 31 |         new_lms[i] = torch.cat((lms[i], torch.zeros((lens[0] - lens[i]),136)), 0)
 32 |     return new_lms, tgs, lens
 33 | 
 34 | 
 35 | class LSTM_Classifier(nn.Module):
 36 | 
 37 |     def __init__(self, embedding_dim, hidden_dim, target_size=1):
 38 |         super(LSTM_Classifier, self).__init__()
 39 |         self.hidden_dim = hidden_dim
 40 | 
 41 |         # The LSTM takes word embeddings as inputs, and outputs hidden states
 42 |         # with dimensionality hidden_dim.
 43 |         self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=2)
 44 | 
 45 |         # The linear layer that maps from hidden state space to tag space
 46 |         self.lc = nn.Linear(hidden_dim, target_size)
 47 | 
 48 |     def forward(self, landmarks, lengths):
 49 |         # pack_padded_sequence so that padded items in the sequence won't be shown to the LSTM
 50 |         packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
 51 |         _, (ht, _) = self.lstm(packed_input)
 52 |         # import pdb;
 53 |         # pdb.set_trace()
 54 |         # packed_output, (ht, ct) = self.lstm(packed_input)   # ht is the final output of each batch! ht (1, 4, 272) can be found in output[:,input_sizes-1,:]
 55 |         # output, input_sizes = pad_packed_sequence(packed_output, batch_first=True)
 56 |         '''
 57 |         (Pdb) output[:,input_sizes-1,:]
 58 | tensor([[[-0.0176,  0.1605,  0.1339,  ..., -0.0914,  0.2951, -0.0065],
 59 |          [-0.0225,  0.1589,  0.1340,  ..., -0.0925,  0.2950, -0.0095],
 60 |          [-0.0253,  0.1574,  0.1431,  ..., -0.0865,  0.3022, -0.0119],
 61 |          [-0.0303,  0.1515,  0.1422,  ..., -0.1094,  0.2976, -0.0032]],
 62 | 
 63 |         [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
 64 |          [-0.0165,  0.1666,  0.1344,  ..., -0.0698,  0.2945, -0.0163],
 65 |          [-0.0235,  0.1697,  0.1479,  ..., -0.0657,  0.3001, -0.0195],
 66 |          [-0.0235,  0.1734,  0.1515,  ..., -0.0608,  0.3029, -0.0201]],
 67 | 
 68 |         [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
 69 |          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
 70 |          [-0.0492,  0.1666,  0.1444,  ..., -0.0749,  0.2816, -0.0188],
 71 |          [-0.0490,  0.1542,  0.1449,  ..., -0.0865,  0.2821, -0.0205]],
 72 | 
 73 |         [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
 74 |          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
 75 |          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
 76 |          [-0.0460,  0.1522,  0.1381,  ..., -0.0959,  0.2843, -0.0071]]],
 77 |        device='cuda:2', grad_fn=<IndexBackward>)
 78 | (Pdb) ht.shape
 79 | torch.Size([1, 4, 272])
 80 | (Pdb) ht
 81 | tensor([[[-0.0176,  0.1605,  0.1339,  ..., -0.0914,  0.2951, -0.0065],
 82 |          [-0.0165,  0.1666,  0.1344,  ..., -0.0698,  0.2945, -0.0163],
 83 |          [-0.0492,  0.1666,  0.1444,  ..., -0.0749,  0.2816, -0.0188],
 84 |          [-0.0460,  0.1522,  0.1381,  ..., -0.0959,  0.2843, -0.0071]]],
 85 |        device='cuda:2', grad_fn=<CudnnRnnBackward>)
 86 | 
 87 |         '''
 88 |         # import pdb;
 89 |         # pdb.set_trace()
 90 |         logit = self.lc(ht[-1])
 91 |         return logit
 92 | 
 93 | 
 94 | # inp = [torch.randn(1, 68*2) for _ in range(5)]
 95 | # print(inp)
 96 | # inp = torch.cat(inp)
 97 | # print(inp)
 98 | # model = LSTM_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1)
 99 | # out = model(inp)
100 | # print(out)
101 | 
102 | 
103 | model = LSTM_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1)
104 | model = model.cuda()
105 | loss_function = torch.nn.BCEWithLogitsLoss()
106 | optimizer = optim.Adam(model.parameters(), lr=1e-4)
107 | # l2 = torch.nn.BCELoss()
108 | 
109 | 
110 | # for i in range(2):
111 | #     model.zero_grad()
112 | #     inp = [torch.randn(1, 68*2) for _ in range(5)]
113 | #     inp = torch.cat(inp)
114 | #     out = model(inp)[-1]    # we could do a classifcation for every output (probably better)
115 | #     print(out)
116 | #     loss = loss_function(out,torch.Tensor(1))
117 | #     # loss = l2(nn.Sigmoid()(out), torch.Tensor(1))
118 | #     print(loss)
119 | #     loss.backward()
120 | #     optimizer.step()
121 | 
122 | 
123 | dataset_train = LandmarkList(root='/datasets/move_closer/Data_Landmark/', fileList='/datasets/move_closer/TrainList.txt')
124 | dataloader_train = data.DataLoader(dataset_train, batch_size=128, shuffle=True, num_workers=4,  collate_fn=pad_collate)
125 | 
126 | for i in range(MAX_EPOCH):
127 |     for batch, labels, lengths in dataloader_train:
128 |         model.zero_grad()
129 |         out = model(batch.cuda(), lengths) # we could do a classifcation for every output (probably better)
130 |         # import pdb;
131 |         # pdb.set_trace()
132 |         loss = loss_function(out, torch.FloatTensor(labels).unsqueeze(1).cuda())
133 |         # loss = l2(nn.Sigmoid()(out), labels)
134 |         print(loss.data)
135 |         loss.backward()
136 |         optimizer.step()
137 | 
138 | 
139 | 
140 | 
141 | 
142 | 
143 | 
144 | # Demo to prove they are the same and padding doesn't feed the whole batch contineously!
145 | # for batch, labels, lengths in dataloader_train:
146 | #     model.zero_grad()
147 | #     out = model(batch.cuda(), lengths) # we could do a classifcation for every output (probably better)
148 | #     import pdb;
149 | #     pdb.set_trace()
150 | #
151 | # dataloader_train = data.DataLoader(dataset_train, batch_size=1, shuffle=False, num_workers=0,  collate_fn=pad_collate)
152 | # for batch, labels, lengths in dataloader_train:
153 | #     model.zero_grad()
154 | #     out = model(batch.cuda(), lengths) # we could do a classifcation for every output (probably better)
155 | #     import pdb;
156 | #     pdb.set_trace()
157 | 
158 | 
159 | 
160 | 


--------------------------------------------------------------------------------
/main_softmax.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import torch.optim as optim
  5 | from dataset import LandmarkList
  6 | from torch.utils import data
  7 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
  8 | import argparse
  9 | 
 10 | 
 11 | # rnn = 'GRU'
 12 | # rnn = 'embedGRU'
 13 | rnn = 'biGRU'
 14 | # rnn = 'LSTM'
 15 | EMBEDDING_DIM = 68*2
 16 | HIDDEN_DIM = 68*2* 2
 17 | N_LAYERS_RNN = 3
 18 | DROPOUT = 0.5
 19 | MAX_EPOCH = 1000
 20 | LR = 1e-4
 21 | DEVICES = 1
 22 | torch.cuda.set_device(DEVICES)
 23 | SAVE_BEST_MODEL = True
 24 | 
 25 | 
 26 | def compute_binary_accuracy(model, data_loader, loss_function):
 27 |     correct_pred, num_examples, total_loss = 0, 0, 0.
 28 |     model.eval()
 29 |     with torch.no_grad():
 30 |         for batch, labels, lengths in data_loader:
 31 |             # import pdb;
 32 |             # pdb.set_trace()
 33 |             logits = model(batch.cuda(), lengths)
 34 |             total_loss += loss_function(logits, torch.LongTensor(labels).cuda()).item()
 35 |             # predicted_labels = (torch.sigmoid(logits) > 0.5).long()
 36 |             _, predicted_labels = torch.max(logits, 1)
 37 |             num_examples += len(lengths)
 38 |             correct_pred += (predicted_labels.cpu().long() == torch.LongTensor(labels)).sum()
 39 |         return correct_pred.float().item()/num_examples * 100, total_loss
 40 | 
 41 | 
 42 | def pad_collate(batch):
 43 |     batch.sort(key=lambda x: x[2], reverse=True)
 44 |     lms, tgs, lens = zip(*batch)
 45 |     new_lms = torch.zeros((len(lms), lms[0].shape[0], lms[0].shape[1])) # batch x seq x feature(136)
 46 |     new_lms[0] = lms[0]
 47 |     for i in range(1, len(lms)):
 48 |         new_lms[i] = torch.cat((lms[i], torch.zeros((lens[0] - lens[i]),136)), 0)
 49 |     return new_lms, tgs, lens
 50 | 
 51 | 
 52 | class LSTM_Classifier(nn.Module):
 53 | 
 54 |     def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False):
 55 |         super(LSTM_Classifier, self).__init__()
 56 |         self.hidden_dim = hidden_dim
 57 | 
 58 |         # The LSTM takes word embeddings as inputs, and outputs hidden states
 59 |         # with dimensionality hidden_dim.
 60 |         self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=N_LAYERS_RNN, dropout=DROPOUT)
 61 | 
 62 |         # The linear layer that maps from hidden state space to tag space
 63 |         self.lc = nn.Linear(hidden_dim, target_size)
 64 |         self.dropout = nn.Dropout(DROPOUT)
 65 | 
 66 |     def forward(self, landmarks, lengths):
 67 |         packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
 68 |         _, (ht, _) = self.lstm(packed_input)
 69 |         ht = self.dropout(ht[-1])
 70 |         logit = self.lc(ht)
 71 |         return logit
 72 | 
 73 | 
 74 | class embed_GRU_Classifier(nn.Module):
 75 | 
 76 |     def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False):
 77 |         super(embed_GRU_Classifier, self).__init__()
 78 |         self.hidden_dim = hidden_dim
 79 | 
 80 |         self.embed = nn.Linear(EMBEDDING_DIM, EMBEDDING_DIM, bias=False)
 81 |         # The LSTM takes word embeddings as inputs, and outputs hidden states
 82 |         # with dimensionality hidden_dim
 83 |         self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=N_LAYERS_RNN, bidirectional=bidirectional, dropout=DROPOUT)
 84 | 
 85 |         # The linear layer that maps from hidden state space to tag space
 86 |         self.lc1 = nn.Linear(hidden_dim,EMBEDDING_DIM)
 87 |         self.lc2 = nn.Linear(EMBEDDING_DIM, target_size)
 88 |         self.dropout = nn.Dropout(DROPOUT)
 89 | 
 90 |     def forward(self, landmarks, lengths):
 91 |         # import pdb; pdb.set_trace()
 92 |         landmarks = self.embed(landmarks)
 93 |         packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
 94 |         _, ht = self.gru(packed_input)
 95 |         # import pdb; pdb.set_trace()
 96 |         ht = self.dropout(ht[-1])
 97 |         logit = self.lc2(F.tanh(self.lc1(ht)))
 98 |         return logit
 99 | 
100 | 
101 | class GRU_Classifier(nn.Module):
102 | 
103 |     def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False):
104 |         super(GRU_Classifier, self).__init__()
105 |         self.hidden_dim = hidden_dim
106 | 
107 |         # The LSTM takes word embeddings as inputs, and outputs hidden states
108 |         # with dimensionality hidden_dim
109 |         self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=N_LAYERS_RNN, bidirectional=bidirectional, dropout=DROPOUT)
110 | 
111 |         # The linear layer that maps from hidden state space to tag space
112 |         self.lc1 = nn.Linear(hidden_dim,target_size)
113 |         # self.lc1 = nn.Linear(hidden_dim,EMBEDDING_DIM)
114 |         # self.lc2 = nn.Linear(EMBEDDING_DIM, target_size)
115 |         self.dropout = nn.Dropout(DROPOUT)
116 | 
117 |     def forward(self, landmarks, lengths):
118 |         packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
119 |         _, ht = self.gru(packed_input)
120 |         # import pdb; pdb.set_trace()
121 |         ht = self.dropout(ht[-1])
122 |         logit = self.lc1(ht)
123 |         # logit = self.lc2(F.relu(self.lc1(ht)))
124 |         return logit
125 | 
126 | 
127 | class biGRU_Classifier(nn.Module):
128 | 
129 |     def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=True):
130 |         super(biGRU_Classifier, self).__init__()
131 |         self.hidden_dim = hidden_dim
132 | 
133 |         # The LSTM takes word embeddings as inputs, and outputs hidden states
134 |         # with dimensionality hidden_dim.
135 |         self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=N_LAYERS_RNN, bidirectional=bidirectional, dropout=DROPOUT)
136 | 
137 |         # The linear layer that maps from hidden state space to tag space
138 |         self.lc1 = nn.Linear(hidden_dim*2, hidden_dim)
139 |         self.lc2 = nn.Linear(hidden_dim, target_size)
140 |         self.dropout = nn.Dropout(DROPOUT)
141 | 
142 |     def forward(self, landmarks, lengths):
143 |         packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
144 |         _, ht = self.gru(packed_input)
145 |         # import pdb; pdb.set_trace()
146 |         ht = self.dropout(torch.cat((ht[-2,:,:], ht[-1,:,:]), dim=1))
147 |         logit = self.lc2(F.relu(self.lc1(ht)))
148 |         return logit
149 | 
150 | 
151 | if rnn == 'embedGRU':
152 |     model = embed_GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 2)
153 | if rnn == 'GRU':
154 |     model = GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 2)
155 | if rnn == 'biGRU':
156 |     model = biGRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 2)
157 | if rnn == 'LSTM':
158 |     model = LSTM_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 2)
159 | model = model.cuda()
160 | loss_function = torch.nn.CrossEntropyLoss()
161 | loss_function_eval_sum = torch.nn.CrossEntropyLoss(reduction='sum')
162 | optimizer = optim.Adam(model.parameters(), lr=LR)
163 | # l2 = torch.nn.BCELoss()
164 | 
165 | 
166 | dataset_train = LandmarkList(root='/datasets/move_closer/Data_Landmark/', fileList='/datasets/move_closer/TrainList.txt')
167 | dataloader_train = data.DataLoader(dataset_train, batch_size=128, shuffle=True, num_workers=0, collate_fn=pad_collate)
168 | 
169 | dataset_test = LandmarkList(root='/datasets/move_closer/Data_Landmark/', fileList='/datasets/move_closer/TestList.txt')
170 | dataloader_test = data.DataLoader(dataset_test, batch_size=64, shuffle=False, num_workers=1, collate_fn=pad_collate)
171 | 
172 | best_test_acc = 0.
173 | for epoch in range(MAX_EPOCH):
174 |     model.train()
175 |     n_iter = 0
176 |     for batch, labels, lengths in dataloader_train:
177 |         model.zero_grad()
178 |         out = model(batch.cuda(), lengths)  # we could do a classifcation for every output (probably better)
179 |         # import pdb; pdb.set_trace()
180 |         loss = loss_function(out, torch.LongTensor(labels).cuda())
181 |         # loss = l2(nn.Sigmoid()(out), labels)
182 |         loss.backward()
183 |         optimizer.step()
184 |         n_iter += 1
185 |     train_acc, train_loss = compute_binary_accuracy(model, dataloader_train, loss_function_eval_sum)
186 |     test_acc, test_loss = compute_binary_accuracy(model, dataloader_test, loss_function_eval_sum)
187 |     print('Epoch{},train_acc,{:.2f}%,train_loss,{:.8f},valid_acc,{:.2f}%,valid_loss,{:.8f}'.format(epoch, train_acc, train_loss, test_acc, test_loss))
188 |     if test_acc > best_test_acc:
189 |         best_test_acc = test_acc
190 |         if SAVE_BEST_MODEL:
191 |             torch.save(model.state_dict(), 'models/' + rnn +
192 |                        '_L' + str(N_LAYERS_RNN) + '.pt')
193 |         print('best epoch {}, train_acc {}, test_acc {}'.format(epoch, train_acc, test_acc))
194 | 
195 | 
196 | 
197 | 
198 | 
199 | 
200 | 
201 | 
202 | 
203 | 
204 | # class LSTM_Classifier(nn.Module):
205 | #
206 | #     def __init__(self, embedding_dim, hidden_dim, target_size=1):
207 | #         super(LSTM_Classifier, self).__init__()
208 | #         self.hidden_dim = hidden_dim
209 | #
210 | #         # The LSTM takes word embeddings as inputs, and outputs hidden states
211 | #         # with dimensionality hidden_dim.
212 | #         self.lstm = nn.LSTM(embedding_dim, hidden_dim)
213 | #
214 | #         # The linear layer that maps from hidden state space to tag space
215 | #         self.lc = nn.Linear(hidden_dim, target_size)
216 | #
217 | #     def forward(self, landmarks, lengths):
218 | #         # pack_padded_sequence so that padded items in the sequence won't be shown to the LSTM
219 | #         packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
220 | #         _, (ht, _) = self.lstm(packed_input)
221 | #         import pdb;
222 | #         pdb.set_trace()
223 | #         # packed_output, (ht, ct) = self.lstm(packed_input)   # ht is the final output of each batch! ht (1, 4, 272) can be found in output[:,input_sizes-1,:]
224 | #         # output, input_sizes = pad_packed_sequence(packed_output, batch_first=True)
225 | #         '''
226 | #         (Pdb) output[:,input_sizes-1,:]
227 | # tensor([[[-0.0176,  0.1605,  0.1339,  ..., -0.0914,  0.2951, -0.0065],
228 | #          [-0.0225,  0.1589,  0.1340,  ..., -0.0925,  0.2950, -0.0095],
229 | #          [-0.0253,  0.1574,  0.1431,  ..., -0.0865,  0.3022, -0.0119],
230 | #          [-0.0303,  0.1515,  0.1422,  ..., -0.1094,  0.2976, -0.0032]],
231 | #
232 | #         [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
233 | #          [-0.0165,  0.1666,  0.1344,  ..., -0.0698,  0.2945, -0.0163],
234 | #          [-0.0235,  0.1697,  0.1479,  ..., -0.0657,  0.3001, -0.0195],
235 | #          [-0.0235,  0.1734,  0.1515,  ..., -0.0608,  0.3029, -0.0201]],
236 | #
237 | #         [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
238 | #          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
239 | #          [-0.0492,  0.1666,  0.1444,  ..., -0.0749,  0.2816, -0.0188],
240 | #          [-0.0490,  0.1542,  0.1449,  ..., -0.0865,  0.2821, -0.0205]],
241 | #
242 | #         [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
243 | #          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
244 | #          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
245 | #          [-0.0460,  0.1522,  0.1381,  ..., -0.0959,  0.2843, -0.0071]]],
246 | #        device='cuda:2', grad_fn=<IndexBackward>)
247 | # (Pdb) ht.shape
248 | # torch.Size([1, 4, 272])
249 | # (Pdb) ht
250 | # tensor([[[-0.0176,  0.1605,  0.1339,  ..., -0.0914,  0.2951, -0.0065],
251 | #          [-0.0165,  0.1666,  0.1344,  ..., -0.0698,  0.2945, -0.0163],
252 | #          [-0.0492,  0.1666,  0.1444,  ..., -0.0749,  0.2816, -0.0188],
253 | #          [-0.0460,  0.1522,  0.1381,  ..., -0.0959,  0.2843, -0.0071]]],
254 | #        device='cuda:2', grad_fn=<CudnnRnnBackward>)
255 | #
256 | #         '''
257 | #         # import pdb;
258 | #         # pdb.set_trace()
259 | #         logit = self.lc(ht.squeeze(0))
260 | #         return logit


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
  5 | 
  6 | 
  7 | DROPOUT = 0.5
  8 | 
  9 | 
 10 | class LSTM_Classifier(nn.Module):
 11 | 
 12 |     def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False, n_layer=1):
 13 |         super(LSTM_Classifier, self).__init__()
 14 |         self.hidden_dim = hidden_dim
 15 | 
 16 |         # The LSTM takes word embeddings as inputs, and outputs hidden states
 17 |         # with dimensionality hidden_dim.
 18 |         self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layer, dropout=DROPOUT)
 19 | 
 20 |         # The linear layer that maps from hidden state space to tag space
 21 |         self.lc = nn.Linear(hidden_dim, target_size)
 22 |         self.dropout = nn.Dropout(DROPOUT)
 23 | 
 24 |     def forward(self, landmarks, lengths):
 25 |         packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
 26 |         _, (ht, _) = self.lstm(packed_input)
 27 |         ht = self.dropout(ht[-1])
 28 |         logit = self.lc(ht)
 29 |         return logit
 30 | 
 31 | 
 32 | class embed_GRU_Classifier(nn.Module):
 33 | 
 34 |     def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False, n_layer=1):
 35 |         super(embed_GRU_Classifier, self).__init__()
 36 |         self.hidden_dim = hidden_dim
 37 | 
 38 |         self.embed1 = nn.Linear(embedding_dim, int(hidden_dim*2), bias=False)
 39 |         self.embed2 = nn.Linear(int(hidden_dim*2), hidden_dim, bias=False)
 40 |         # The LSTM takes word embeddings as inputs, and outputs hidden states
 41 |         # with dimensionality hidden_dim
 42 |         self.gru = nn.GRU(hidden_dim, hidden_dim, num_layers=n_layer, bidirectional=bidirectional, dropout=DROPOUT)
 43 | 
 44 |         # The linear layer that maps from hidden state space to tag space
 45 |         self.lc1 = nn.Linear(hidden_dim,int(hidden_dim/2))
 46 |         self.lc2 = nn.Linear(int(hidden_dim/2), target_size)
 47 |         self.dropout = nn.Dropout(DROPOUT)
 48 | 
 49 |         # super(embed_GRU_Classifier, self).__init__()
 50 |         # self.hidden_dim = hidden_dim
 51 |         #
 52 |         # self.embed1 = nn.Linear(embedding_dim, int(embedding_dim/2), bias=False)
 53 |         # self.embed2 = nn.Linear(int(embedding_dim/2), int(embedding_dim/4), bias=False)
 54 |         # # The LSTM takes word embeddings as inputs, and outputs hidden states
 55 |         # # with dimensionality hidden_dim
 56 |         # self.gru = nn.GRU(int(embedding_dim/4), int(embedding_dim/4), num_layers=n_layer, bidirectional=bidirectional, dropout=DROPOUT)
 57 |         #
 58 |         # # The linear layer that maps from hidden state space to tag space
 59 |         # self.lc1 = nn.Linear(int(embedding_dim/4),int(embedding_dim/8))
 60 |         # self.lc2 = nn.Linear(int(embedding_dim/8), target_size)
 61 |         # self.dropout = nn.Dropout(DROPOUT)
 62 | 
 63 |     def forward(self, landmarks, lengths):
 64 |         # import pdb; pdb.set_trace()
 65 |         landmarks = F.tanh(self.embed2(F.tanh(self.embed1(landmarks))))
 66 |         packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
 67 |         _, ht = self.gru(packed_input)
 68 |         # import pdb; pdb.set_trace()
 69 |         ht = self.dropout(ht[-1])
 70 |         logit = self.lc2(F.tanh(self.lc1(ht)))
 71 |         return logit
 72 | 
 73 | 
 74 | class GRU_Classifier(nn.Module):
 75 | 
 76 |     def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False, n_layer=1):
 77 |         super(GRU_Classifier, self).__init__()
 78 |         self.hidden_dim = hidden_dim
 79 |         self.grad_clipping = 10.
 80 |         # The LSTM takes word embeddings as inputs, and outputs hidden states
 81 |         # with dimensionality hidden_dim
 82 |         self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=n_layer, bidirectional=bidirectional, dropout=DROPOUT)
 83 | 
 84 |         # The linear layer that maps from hidden state space to tag space
 85 |         self.lc1 = nn.Linear(hidden_dim,target_size)
 86 |         # self.lc1 = nn.Linear(hidden_dim,EMBEDDING_DIM)
 87 |         # self.lc2 = nn.Linear(EMBEDDING_DIM, target_size)
 88 |         self.dropout = nn.Dropout(DROPOUT)
 89 | 
 90 |     def forward(self, landmarks, lengths):
 91 |         packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
 92 |         _, ht = self.gru(packed_input)
 93 |         # import pdb; pdb.set_trace()
 94 |         if ht.requires_grad:
 95 |             ht.register_hook(lambda x: x.clamp(min=-self.grad_clipping, max=self.grad_clipping))
 96 |         ht = self.dropout(ht[-1])
 97 |         logit = self.lc1(ht)    # probably a 1x1 conv is need to do linear transform
 98 |         # logit = self.lc2(F.relu(self.lc1(ht)))
 99 |         return logit
100 | 
101 | 
102 | class biGRU_Classifier(nn.Module):
103 | 
104 |     def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=True, n_layer=1):
105 |         super(biGRU_Classifier, self).__init__()
106 |         self.hidden_dim = hidden_dim
107 |         self.grad_clipping = 10.
108 |         # The LSTM takes word embeddings as inputs, and outputs hidden states
109 |         # with dimensionality hidden_dim.
110 |         self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=n_layer, bidirectional=bidirectional, dropout=DROPOUT)
111 | 
112 |         # The linear layer that maps from hidden state space to tag space
113 |         self.lc1 = nn.Linear(hidden_dim*2, hidden_dim)
114 |         self.lc2 = nn.Linear(hidden_dim, target_size)
115 |         self.dropout = nn.Dropout(DROPOUT)
116 | 
117 |     def forward(self, landmarks, lengths):
118 |         packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
119 |         _, ht = self.gru(packed_input)
120 |         if ht.requires_grad:
121 |             ht.register_hook(lambda x: x.clamp(min=-self.grad_clipping, max=self.grad_clipping))
122 |         ht = self.dropout(torch.cat((ht[-2,:,:], ht[-1,:,:]), dim=1))
123 |         logit = self.lc2(F.relu(self.lc1(ht)))
124 |         return logit
125 | 
126 | 
127 | class Framewise_GRU_Classifier(nn.Module):
128 | 
129 |     def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False, n_layer=1):
130 |         super(Framewise_GRU_Classifier, self).__init__()
131 |         self.hidden_dim = hidden_dim
132 |         self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=n_layer, bidirectional=bidirectional, dropout=DROPOUT)
133 | 
134 |         # The linear layer that maps from hidden state space to tag space
135 |         # self.lc1 = nn.Linear(hidden_dim, target_size)
136 |         self.lc1 = nn.Linear(hidden_dim, embedding_dim)
137 |         self.lc2 = nn.Linear(embedding_dim, target_size)
138 |         self.dropout = nn.Dropout(DROPOUT)
139 | 
140 |     def forward(self, landmarks, lengths):
141 |         packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
142 |         packed_output, _ = self.gru(packed_input)
143 |         output, _ = pad_packed_sequence(packed_output, batch_first=True)
144 |         output = output.contiguous()
145 |         output = output.view(-1, self.hidden_dim)
146 |         output = self.dropout(output)
147 |         logit = self.lc1(output)    # probably a 1x1 conv is need to do linear transform
148 |         logit = self.lc2(self.dropout(F.relu(logit)))
149 |         return logit.view(len(lengths), -1, 1)
150 | 
151 | 
152 | class sumGRU(nn.Module):
153 | 
154 |     def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False, n_layer=1):
155 |         super(sumGRU, self).__init__()
156 |         self.hidden_dim = hidden_dim
157 |         self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=n_layer, bidirectional=bidirectional, dropout=DROPOUT)
158 | 
159 |         # The linear layer that maps from hidden state space to tag space
160 |         self.lc1 = nn.Linear(hidden_dim, embedding_dim)
161 |         self.lc2 = nn.Linear(embedding_dim, target_size)
162 |         self.dropout = nn.Dropout(DROPOUT)
163 | 
164 |     def forward(self, landmarks, lengths):
165 |         packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
166 |         packed_output, _ = self.gru(packed_input)
167 |         output, _ = pad_packed_sequence(packed_output, batch_first=True)
168 |         # import pdb; pdb.set_trace()
169 |         output = self.dropout(output.sum(1))
170 |         # logit = self.lc1(output)    # probably a 1x1 conv is need to do linear transform
171 |         logit = self.lc2(F.relu(self.lc1(output)))
172 |         return logit
173 | 
174 | 
175 | 
176 | class cnn_2d(nn.Module):
177 | 
178 |     def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False):
179 |         super(cnn_2d, self).__init__()
180 |         self.hidden_dim = hidden_dim
181 |         self.n_layers = 2  # 2, 4, 6 ,8
182 |         if self.n_layers >= 2:
183 |             self.conv1 = nn.Conv1d(in_channels=embedding_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
184 |             self.conv2 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
185 |             self.bn1 = nn.BatchNorm1d(num_features=self.hidden_dim)
186 |             self.bn2 = nn.BatchNorm1d(num_features=self.hidden_dim)
187 |             self.p1 = nn.MaxPool1d(kernel_size=2)
188 |         if self.n_layers >= 4:
189 |             self.conv3 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
190 |             self.conv4 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
191 |             self.bn3 = nn.BatchNorm1d(num_features=self.hidden_dim)
192 |             self.bn4 = nn.BatchNorm1d(num_features=self.hidden_dim)
193 |             self.p2 = nn.MaxPool1d(kernel_size=2)
194 |         if self.n_layers >= 6:
195 |             self.conv5 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
196 |             self.conv6 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
197 |             self.bn5 = nn.BatchNorm1d(num_features=self.hidden_dim)
198 |             self.bn6 = nn.BatchNorm1d(num_features=self.hidden_dim)
199 |             self.p3 = nn.MaxPool1d(kernel_size=2)
200 |         if self.n_layers == 8:
201 |             self.conv7 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
202 |             self.conv8 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
203 |             self.bn7 = nn.BatchNorm1d(num_features=self.hidden_dim)
204 |             self.bn8 = nn.BatchNorm1d(num_features=self.hidden_dim)
205 | 
206 |         self.glbAvgPool = nn.AdaptiveAvgPool1d(1)
207 | 
208 |         self.dropout = nn.Dropout(DROPOUT)
209 |         # The linear layer that maps from hidden state space to tag space
210 |         self.lc1 = nn.Linear(hidden_dim, int(hidden_dim*2))
211 |         self.lc2 = nn.Linear(int(hidden_dim*2), target_size)
212 | 
213 |     def forward(self, landmarks, lengths):
214 |         landmarks = landmarks.permute(0, 2, 1)  # (b, seq, dim) --> (b, dim, seq)
215 |         # Convolve on Seq for each dim to get (b, dim, seq)
216 |         if self.n_layers == 8:
217 |             landmarks = F.relu(self.bn8(self.conv8(F.relu(self.bn7(self.conv7(self.p3(F.relu(self.bn6(self.conv6(F.relu(self.bn5(self.conv5(self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))))))))))))))))))))))
218 |         elif self.n_layers == 6:
219 |             landmarks = self.p3(F.relu(self.bn6(self.conv6(F.relu(self.bn5(self.conv5(self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))))))))))))))))
220 |         elif self.n_layers == 4:
221 |             landmarks = self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks))))))))))))))
222 |         elif self.n_layers == 2:
223 |             landmarks = self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))
224 |         else:
225 |             print('Not specify n_layers')
226 |         # Permute back: (b, dim, d_seq) --> (b, seq, dim)
227 |         landmarks = landmarks.permute(0, 2, 1)
228 |         # flat it to feed into fc: (b x seq, dim)
229 |         landmarks = landmarks.contiguous()
230 |         batch_size, seq_len, dim_feature = landmarks.shape
231 |         landmarks = landmarks.view(-1, dim_feature)
232 |         landmarks = F.tanh(self.lc1(self.dropout(landmarks)))  # (b x seq, 1)
233 |         landmarks = self.lc2(self.dropout(landmarks))
234 |         # unflat back to (b, seq, 1)
235 |         landmarks = landmarks.view(batch_size, seq_len, 1)
236 | 
237 |         logit_list = []
238 |         if self.n_layers == 8 or self.n_layers == 6:
239 |             for i, landmark in enumerate(landmarks):
240 |                 logit_list.append(self.glbAvgPool(landmark[:int(lengths[i]/8)].unsqueeze(0).permute(0, 2, 1)).squeeze(-1))
241 |         if self.n_layers == 4:
242 |             for i, landmark in enumerate(landmarks):
243 |                 logit_list.append(self.glbAvgPool(landmark[:int(lengths[i]/4)].unsqueeze(0).permute(0, 2, 1)).squeeze(-1))
244 |         if self.n_layers == 2:
245 |             for i, landmark in enumerate(landmarks):
246 |                 logit_list.append(self.glbAvgPool(landmark[:int(lengths[i]/2)].unsqueeze(0).permute(0, 2, 1)).squeeze(-1))
247 | 
248 |         return torch.cat(logit_list)
249 | 
250 | 
251 | class cnn_Classifier(nn.Module):
252 | 
253 |     def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False):
254 |         super(cnn_Classifier, self).__init__()
255 |         self.hidden_dim = hidden_dim # can change to smaller ones 64 . 32. 16
256 |         self.n_layers = 2  # 2, 4, 6 ,8
257 |         self.use_bn = False
258 |         if self.n_layers >= 2:
259 |             self.conv1 = nn.Conv2d(in_channels=1, out_channels=self.hidden_dim, kernel_size=3, padding=1)
260 |             self.conv2 = nn.Conv2d(in_channels=self.hidden_dim, out_channels=self.hidden_dim, kernel_size=3, padding=1)
261 |             if self.use_bn:
262 |                 self.bn1 = nn.BatchNorm2d(num_features=self.hidden_dim)
263 |                 self.bn2 = nn.BatchNorm2d(num_features=self.hidden_dim)
264 |             self.p1 = nn.MaxPool2d(kernel_size=2)
265 |         if self.n_layers >= 4:
266 |             self.conv3 = nn.Conv2d(in_channels=self.hidden_dim, out_channels=self.hidden_dim, kernel_size=3, padding=1)
267 |             self.conv4 = nn.Conv2d(in_channels=self.hidden_dim, out_channels=self.hidden_dim, kernel_size=3, padding=1)
268 |             if self.use_bn:
269 |                 self.bn3 = nn.BatchNorm2d(num_features=self.hidden_dim)
270 |                 self.bn4 = nn.BatchNorm2d(num_features=self.hidden_dim)
271 |             self.p2 = nn.MaxPool2d(kernel_size=2)
272 |         if self.n_layers >= 6:
273 |             self.conv5 = nn.Conv2d(in_channels=self.hidden_dim, out_channels=self.hidden_dim, kernel_size=3, padding=1)
274 |             self.conv6 = nn.Conv2d(in_channels=self.hidden_dim, out_channels=self.hidden_dim, kernel_size=3, padding=1)
275 |             if self.use_bn:
276 |                 self.bn5 = nn.BatchNorm2d(num_features=self.hidden_dim)
277 |                 self.bn6 = nn.BatchNorm2d(num_features=self.hidden_dim)
278 |             self.p3 = nn.MaxPool2d(kernel_size=2)
279 |         if self.n_layers == 8:
280 |             self.conv7 = nn.Conv2d(in_channels=self.hidden_dim, out_channels=self.hidden_dim, kernel_size=3, padding=1)
281 |             self.conv8 = nn.Conv2d(in_channels=self.hidden_dim, out_channels=self.hidden_dim, kernel_size=3, padding=1)
282 |             if self.use_bn:
283 |                 self.bn7 = nn.BatchNorm2d(num_features=self.hidden_dim)
284 |                 self.bn8 = nn.BatchNorm2d(num_features=self.hidden_dim)
285 | 
286 |         self.dropout = nn.Dropout(DROPOUT)
287 |         # The linear layer that maps from hidden state space to tag space
288 |         self.lc1 = nn.Linear(hidden_dim, int(hidden_dim*2))
289 |         self.lc2 = nn.Linear(int(hidden_dim*2), target_size)
290 | 
291 |     def forward(self, landmarks, lengths):
292 |         landmarks = landmarks.permute(0, 2, 1)  # (b, seq, dim) --> (b, dim, seq)
293 |         # Convolve on Seq for each dim to get (b, dim, seq)
294 |         if self.use_bn:
295 |             if self.n_layers == 8:
296 |                 landmarks = F.relu(self.bn8(self.conv8(F.relu(self.bn7(self.conv7(self.p3(F.relu(self.bn6(self.conv6(F.relu(self.bn5(self.conv5(self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))))))))))))))))))))))
297 |             elif self.n_layers == 6:
298 |                 landmarks = self.p3(F.relu(self.bn6(self.conv6(F.relu(self.bn5(self.conv5(self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))))))))))))))))
299 |             elif self.n_layers == 4:
300 |                 landmarks = self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks))))))))))))))
301 |             elif self.n_layers == 2:
302 |                 landmarks = self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))
303 |             else:
304 |                 print('Not specify n_layers')
305 |         else:
306 |             if self.n_layers == 8:
307 |                 landmarks = F.relu(self.conv8(F.relu(self.conv7(self.p3(F.relu(self.conv6(F.relu(self.conv5(self.p2(F.relu(self.conv4(F.relu(self.conv3(self.p1(F.relu(self.conv2(F.relu(self.conv1(landmarks)))))))))))))))))))
308 |             elif self.n_layers == 6:
309 |                 landmarks = self.p3(F.relu(self.conv6(F.relu(self.conv5(self.p2(F.relu(self.conv4(F.relu(self.conv3(self.p1(F.relu(self.conv2(F.relu(self.conv1(landmarks)))))))))))))))
310 |             elif self.n_layers == 4:
311 |                 landmarks = self.p2(F.relu(self.conv4(F.relu(self.conv3(self.p1(F.relu(self.conv2(F.relu(self.conv1(landmarks))))))))))
312 |             elif self.n_layers == 2:
313 |                 landmarks = self.p1(F.relu(self.conv2(F.relu(self.conv1(landmarks)))))
314 |             else:
315 |                 print('Not specify n_layers')
316 |         # Permute back: (b, dim, d_seq) --> (b, seq, dim)
317 |         landmarks = landmarks.permute(0, 2, 1)
318 |         # flat it to feed into fc: (b x seq, dim)
319 |         landmarks = landmarks.contiguous()
320 |         batch_size, seq_len, dim_feature = landmarks.shape
321 |         landmarks = landmarks.view(-1, dim_feature)
322 |         landmarks = F.tanh(self.lc1(self.dropout(landmarks)))  # (b x seq, 1)
323 |         landmarks = self.lc2(self.dropout(landmarks))
324 |         # unflat back to (b, seq, 1)
325 |         landmarks = landmarks.view(batch_size, seq_len, 1)
326 | 
327 |         logit_list = []
328 |         if self.n_layers == 8 or self.n_layers == 6:
329 |             for i, landmark in enumerate(landmarks):
330 |                 logit_list.append(self.glbAvgPool(landmark[:int(lengths[i]/8)].unsqueeze(0).permute(0, 2, 1)).squeeze(-1))
331 |         if self.n_layers == 4:
332 |             for i, landmark in enumerate(landmarks):
333 |                 logit_list.append(self.glbAvgPool(landmark[:int(lengths[i]/4)].unsqueeze(0).permute(0, 2, 1)).squeeze(-1))
334 |         if self.n_layers == 2:
335 |             for i, landmark in enumerate(landmarks):
336 |                 logit_list.append(self.glbAvgPool(landmark[:int(lengths[i]/2)].unsqueeze(0).permute(0, 2, 1)).squeeze(-1))
337 | 
338 |         return torch.cat(logit_list)
339 | 
340 | 
341 | class crnn_Classifier(nn.Module):
342 | 
343 |     def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False, n_layer=1):
344 |         super(crnn_Classifier, self).__init__()
345 |         self.hidden_dim = hidden_dim
346 |         self.n_layers = 4 # 2, 4, 6 ,8
347 |         if self.n_layers >= 2:
348 |             self.conv1 = nn.Conv1d(in_channels=embedding_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
349 |             self.conv2 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
350 |             self.bn1 = nn.BatchNorm1d(num_features=self.hidden_dim)
351 |             self.bn2 = nn.BatchNorm1d(num_features=self.hidden_dim)
352 |             self.p1 = nn.MaxPool1d(kernel_size=2)
353 |             self.scale_pool = 2
354 |         if self.n_layers >= 4:
355 |             self.conv3 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
356 |             self.conv4 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
357 |             self.bn3 = nn.BatchNorm1d(num_features=self.hidden_dim)
358 |             self.bn4 = nn.BatchNorm1d(num_features=self.hidden_dim)
359 |             self.p2 = nn.MaxPool1d(kernel_size=2)
360 |             self.scale_pool = 4
361 |         if self.n_layers >= 6:
362 |             self.conv5 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
363 |             self.conv6 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
364 |             self.bn5 = nn.BatchNorm1d(num_features=self.hidden_dim)
365 |             self.bn6 = nn.BatchNorm1d(num_features=self.hidden_dim)
366 |             self.p3 = nn.MaxPool1d(kernel_size=2)
367 |             self.scale_pool = 8
368 |         if self.n_layers == 8:
369 |             self.conv7 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
370 |             self.conv8 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
371 |             self.bn7 = nn.BatchNorm1d(num_features=self.hidden_dim)
372 |             self.bn8 = nn.BatchNorm1d(num_features=self.hidden_dim)
373 |             self.scale_pool = 8
374 | 
375 |         self.dropout = nn.Dropout(DROPOUT)
376 |         self.gru = nn.GRU(hidden_dim, hidden_dim, num_layers=n_layer, bidirectional=bidirectional, dropout=DROPOUT)
377 |         self.grad_clipping = 10.
378 |         # The linear layer that maps from hidden state space to tag space
379 |         self.lc1 = nn.Linear(hidden_dim, embedding_dim)
380 |         self.lc2 = nn.Linear(embedding_dim, target_size)
381 | 
382 |     def forward(self, landmarks, lengths):
383 |         landmarks = landmarks.permute(0, 2, 1)  # (b, seq, dim) --> (b, dim, seq)
384 |         # Convolve on Seq for each dim to get (b, dim, seq)
385 |         if self.n_layers == 8:
386 |             landmarks = F.relu(self.bn8(self.conv8(F.relu(self.bn7(self.conv7(self.p3(F.relu(self.bn6(self.conv6(F.relu(self.bn5(self.conv5(self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))))))))))))))))))))))
387 |         elif self.n_layers == 6:
388 |             landmarks = self.p3(F.relu(self.bn6(self.conv6(F.relu(self.bn5(self.conv5(self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))))))))))))))))
389 |         elif self.n_layers == 4:
390 |             landmarks = self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks))))))))))))))
391 |         elif self.n_layers == 2:
392 |             landmarks = self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))
393 |         else:
394 |             print('Not specify n_layers')
395 | 
396 |         # Permute back: (b, dim, d_seq) --> (b, seq, dim) with shorter seq
397 |         landmarks = landmarks.permute(0, 2, 1)
398 |         # Feed into GRU
399 |         # import pdb; pdb.set_trace()
400 |         # packed_input = pack_padded_sequence(self.dropout(landmarks), torch.IntTensor(lengths)/self.scale_pool, batch_first=True)
401 |         packed_input = pack_padded_sequence(self.dropout(landmarks), tuple(int(x/self.scale_pool) for x in lengths), batch_first=True)
402 |         _, ht = self.gru(packed_input)
403 |         if ht.requires_grad:
404 |             ht.register_hook(lambda x: x.clamp(min=-self.grad_clipping, max=self.grad_clipping))
405 |         ht = self.dropout(ht[-1])
406 |         logit = F.relu(self.lc1(ht))
407 |         logit = self.lc2(self.dropout(logit))
408 |         return logit
409 | 
410 | # to be implemented
411 | class FrameCRNN(nn.Module):
412 | 
413 |     def __init__(self, embedding_dim, hidden_dim, target_size=1, bidirectional=False, n_layer=1):
414 |         super(FrameCRNN, self).__init__()
415 |         self.hidden_dim = hidden_dim
416 |         self.n_layers = 2  # 2, 4, 6 ,8
417 |         if self.n_layers >= 2:
418 |             self.conv1 = nn.Conv1d(in_channels=embedding_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
419 |             self.conv2 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
420 |             self.bn1 = nn.BatchNorm1d(num_features=self.hidden_dim)
421 |             self.bn2 = nn.BatchNorm1d(num_features=self.hidden_dim)
422 |             self.p1 = nn.MaxPool1d(kernel_size=2)
423 |             self.scale_pool = 2
424 |         if self.n_layers >= 4:
425 |             self.conv3 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
426 |             self.conv4 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
427 |             self.bn3 = nn.BatchNorm1d(num_features=self.hidden_dim)
428 |             self.bn4 = nn.BatchNorm1d(num_features=self.hidden_dim)
429 |             self.p2 = nn.MaxPool1d(kernel_size=2)
430 |             self.scale_pool = 4
431 |         if self.n_layers >= 6:
432 |             self.conv5 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
433 |             self.conv6 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
434 |             self.bn5 = nn.BatchNorm1d(num_features=self.hidden_dim)
435 |             self.bn6 = nn.BatchNorm1d(num_features=self.hidden_dim)
436 |             self.p3 = nn.MaxPool1d(kernel_size=2)
437 |             self.scale_pool = 8
438 |         if self.n_layers == 8:
439 |             self.conv7 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
440 |             self.conv8 = nn.Conv1d(in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
441 |             self.bn7 = nn.BatchNorm1d(num_features=self.hidden_dim)
442 |             self.bn8 = nn.BatchNorm1d(num_features=self.hidden_dim)
443 |             self.scale_pool = 8
444 | 
445 |         self.dropout = nn.Dropout(DROPOUT)
446 |         self.gru = nn.GRU(hidden_dim, hidden_dim, num_layers=n_layer, bidirectional=bidirectional, dropout=DROPOUT)
447 | 
448 |         # The linear layer that maps from hidden state space to tag space
449 |         self.lc1 = nn.Linear(hidden_dim, embedding_dim)
450 |         self.lc2 = nn.Linear(embedding_dim, target_size)
451 | 
452 |     def forward(self, landmarks, lengths):
453 |         landmarks = landmarks.permute(0, 2, 1)  # (b, seq, dim) --> (b, dim, seq)
454 |         # Convolve on Seq for each dim to get (b, dim, seq)
455 |         if self.n_layers == 8:
456 |             landmarks = F.relu(self.bn8(self.conv8(F.relu(self.bn7(self.conv7(self.p3(F.relu(self.bn6(self.conv6(F.relu(self.bn5(self.conv5(self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))))))))))))))))))))))
457 |         elif self.n_layers == 6:
458 |             landmarks = self.p3(F.relu(self.bn6(self.conv6(F.relu(self.bn5(self.conv5(self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))))))))))))))))
459 |         elif self.n_layers == 4:
460 |             landmarks = self.p2(F.relu(self.bn4(self.conv4(F.relu(self.bn3(self.conv3(self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks))))))))))))))
461 |         elif self.n_layers == 2:
462 |             landmarks = self.p1(F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(landmarks)))))))
463 |         else:
464 |             print('Not specify n_layers')
465 | 
466 |         # Permute back: (b, dim, d_seq) --> (b, seq, dim) with shorter seq
467 |         landmarks = landmarks.permute(0, 2, 1)
468 |         # Feed into GRU
469 |         packed_input = pack_padded_sequence(self.dropout(landmarks), torch.IntTensor(lengths)/self.scale_pool, batch_first=True)
470 |         _, ht = self.gru(packed_input)
471 |         ht = self.dropout(ht[-1])
472 |         logit = F.relu(self.lc1(ht))
473 |         logit = self.lc2(self.dropout(logit))
474 |         return logit
475 | 
476 | 
477 | 
478 | 
479 | 
480 | # class LSTM_Classifier(nn.Module):
481 | #
482 | #     def __init__(self, embedding_dim, hidden_dim, target_size=1):
483 | #         super(LSTM_Classifier, self).__init__()
484 | #         self.hidden_dim = hidden_dim
485 | #
486 | #         # The LSTM takes word embeddings as inputs, and outputs hidden states
487 | #         # with dimensionality hidden_dim.
488 | #         self.lstm = nn.LSTM(embedding_dim, hidden_dim)
489 | #
490 | #         # The linear layer that maps from hidden state space to tag space
491 | #         self.lc = nn.Linear(hidden_dim, target_size)
492 | #
493 | #     def forward(self, landmarks, lengths):
494 | #         # pack_padded_sequence so that padded items in the sequence won't be shown to the LSTM
495 | #         packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
496 | #         _, (ht, _) = self.lstm(packed_input)
497 | #         import pdb;
498 | #         pdb.set_trace()
499 | #         # packed_output, (ht, ct) = self.lstm(packed_input)   # ht is the final output of each batch! ht (1, 4, 272) can be found in output[:,input_sizes-1,:]
500 | #         # output, input_sizes = pad_packed_sequence(packed_output, batch_first=True)
501 | #         '''
502 | #         (Pdb) output[:,input_sizes-1,:]
503 | # tensor([[[-0.0176,  0.1605,  0.1339,  ..., -0.0914,  0.2951, -0.0065],
504 | #          [-0.0225,  0.1589,  0.1340,  ..., -0.0925,  0.2950, -0.0095],
505 | #          [-0.0253,  0.1574,  0.1431,  ..., -0.0865,  0.3022, -0.0119],
506 | #          [-0.0303,  0.1515,  0.1422,  ..., -0.1094,  0.2976, -0.0032]],
507 | #
508 | #         [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
509 | #          [-0.0165,  0.1666,  0.1344,  ..., -0.0698,  0.2945, -0.0163],
510 | #          [-0.0235,  0.1697,  0.1479,  ..., -0.0657,  0.3001, -0.0195],
511 | #          [-0.0235,  0.1734,  0.1515,  ..., -0.0608,  0.3029, -0.0201]],
512 | #
513 | #         [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
514 | #          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
515 | #          [-0.0492,  0.1666,  0.1444,  ..., -0.0749,  0.2816, -0.0188],
516 | #          [-0.0490,  0.1542,  0.1449,  ..., -0.0865,  0.2821, -0.0205]],
517 | #
518 | #         [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
519 | #          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
520 | #          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
521 | #          [-0.0460,  0.1522,  0.1381,  ..., -0.0959,  0.2843, -0.0071]]],
522 | #        device='cuda:2', grad_fn=<IndexBackward>)
523 | # (Pdb) ht.shape
524 | # torch.Size([1, 4, 272])
525 | # (Pdb) ht
526 | # tensor([[[-0.0176,  0.1605,  0.1339,  ..., -0.0914,  0.2951, -0.0065],
527 | #          [-0.0165,  0.1666,  0.1344,  ..., -0.0698,  0.2945, -0.0163],
528 | #          [-0.0492,  0.1666,  0.1444,  ..., -0.0749,  0.2816, -0.0188],
529 | #          [-0.0460,  0.1522,  0.1381,  ..., -0.0959,  0.2843, -0.0071]]],
530 | #        device='cuda:2', grad_fn=<CudnnRnnBackward>)
531 | #
532 | #         '''
533 | #         # import pdb;
534 | #         # pdb.set_trace()
535 | #         logit = self.lc(ht.squeeze(0))
536 | #         return logit


--------------------------------------------------------------------------------
/plot_log.py:
--------------------------------------------------------------------------------
  1 | import argparse 
  2 | import sys
  3 | import os
  4 | import matplotlib
  5 | matplotlib.use('Agg')
  6 | import matplotlib.pyplot as plt
  7 | 
  8 | def main(argv):
  9 | 
 10 |     parser = argparse.ArgumentParser()
 11 |     parser.add_argument(
 12 |         "log_file",
 13 |         help = "path to log file"
 14 |         )
 15 |     args = parser.parse_args()
 16 | 
 17 |     if os.path.isdir(args.log_file):
 18 |         for root, _, files in os.walk(args.log_file):
 19 |             if len(files) > 0:
 20 |                 for file in files:
 21 |                     if file.find('txt') > 0:
 22 |                         print(file)
 23 |                         f = open(root+'/'+file)
 24 |                         lines = [line.rstrip("\n") for line in f.readlines()]
 25 |                         epochs = []
 26 |                         train_acc, valid_acc = [], []
 27 |                         train_loss, valid_loss = [], []
 28 | 
 29 |                         for line in lines:
 30 |                             try:
 31 |                                 line_list = line.split(',')
 32 |                                 if line_list[0][:5] == 'Epoch':
 33 |                                     a = int(line_list[0][5:])
 34 |                                     b = float(line_list[2][:-1])
 35 |                                     c = float(line_list[4])
 36 |                                     d = float(line_list[6][:-1])
 37 |                                     e = float(line_list[8])
 38 |                                     epochs.append(a)
 39 |                                     train_acc.append(b)
 40 |                                     train_loss.append(c)
 41 |                                     valid_acc.append(d)
 42 |                                     valid_loss.append(e)
 43 |                             except:
 44 |                                 pass
 45 |                                 print('missing a few epoch')
 46 |                         fig = plt.figure(figsize=(14, 10))
 47 |                         ax1 = fig.add_subplot(2, 1, 1)
 48 |                         ax1.plot(epochs, train_acc, 'r', label='train_acc')
 49 |                         ax1.plot(epochs, valid_acc, 'b', label='valid_acc')
 50 |                         ax1.grid()
 51 |                         ax1.title.set_text('Accuracy')
 52 |                         ax1.set_xlabel('epochs')
 53 |                         ax1.set_ylabel('accuracy %')
 54 |                         ax1.legend()
 55 |                         ax2 = fig.add_subplot(2, 1, 2)
 56 |                         ax2.plot(epochs, train_loss, 'r', label='train_loss')
 57 |                         ax2.plot(epochs, valid_loss, 'b', label='valid_loss')
 58 |                         ax2.grid()
 59 |                         ax2.title.set_text('Loss')
 60 |                         ax2.set_xlabel('epochs')
 61 |                         ax2.set_ylabel('loss')
 62 |                         ax2.legend()
 63 |                         # plt.show()
 64 |                         plt.savefig(root + '/' + file[:-3] + 'png')
 65 |                         plt.close()
 66 |     else:
 67 |         f = open(args.log_file)
 68 |         lines = [line.rstrip("\n") for line in f.readlines()]
 69 |         epochs = []
 70 |         train_acc, valid_acc = [], []
 71 |         train_loss, valid_loss = [], []
 72 | 
 73 |         for line in lines:
 74 |             line_list = line.split(',')
 75 |             if line_list[0][:5] == 'Epoch':
 76 |                 line_list = line.split(',')
 77 |                 epochs.append(int(line_list[0][5:]))
 78 |                 train_acc.append(float(line_list[2][:-1]))
 79 |                 train_loss.append(float(line_list[4]))
 80 |                 valid_acc.append(float(line_list[6][:-1]))
 81 |                 valid_loss.append(float(line_list[8]))
 82 |         # import pdb;
 83 |         # pdb.set_trace()
 84 |         fig = plt.figure(figsize=(14, 10))
 85 |         # import pdb;
 86 |         # pdb.set_trace()
 87 |         ax1 = fig.add_subplot(2, 1, 1)
 88 |         ax1.plot(epochs, train_acc, 'r', label='train_acc')
 89 |         ax1.plot(epochs, valid_acc, 'b', label='valid_acc')
 90 |         ax1.grid()
 91 |         ax1.title.set_text('Accuracy')
 92 |         ax1.set_xlabel('epochs')
 93 |         ax1.set_ylabel('accuracy %')
 94 |         ax1.legend()
 95 |         ax2 = fig.add_subplot(2, 1, 2)
 96 |         ax2.plot(epochs, train_loss, 'r', label='train_loss')
 97 |         ax2.plot(epochs, valid_loss, 'b', label='valid_loss')
 98 |         ax2.grid()
 99 |         ax2.title.set_text('Loss')
100 |         ax2.set_xlabel('epochs')
101 |         ax2.set_ylabel('loss')
102 |         ax2.legend()
103 |         # plt.show()
104 |         plt.savefig(args.log_file[:-3]+'png')
105 |     
106 | if __name__ == "__main__":
107 |     main(sys.argv)


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import torch.optim as optim
  5 | from dataset import LandmarkList, LandmarkListTest
  6 | from torch.utils import data
  7 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
  8 | import argparse
  9 | 
 10 | from model import *
 11 | 
 12 | # rnn = 'frameGRU'
 13 | # rnn = 'sumGRU'
 14 | # rnn = 'crnn'
 15 | # rnn = 'cnn'
 16 | # rnn = 'GRU'
 17 | # rnn = 'framewise_GRU'
 18 | # rnn = 'embedGRU'
 19 | rnn = 'biGRU'
 20 | # rnn = 'LSTM'
 21 | EMBEDDING_DIM = int(68 * 67 /2)
 22 | HIDDEN_DIM = 128
 23 | N_LAYERS_RNN = 3
 24 | LR = 1e-4
 25 | DEVICES = 0
 26 | torch.cuda.set_device(DEVICES)
 27 | 
 28 | 
 29 | def compute_binary_accuracy(model, data_loader, th_list):
 30 |     len_th_list = len(th_list)
 31 |     correct_pred, num_examples, FP, FN = [0.]*len_th_list, 0, [0]*len_th_list, [0]*len_th_list
 32 |     FP_list = []
 33 |     FN_list = []
 34 |     for _ in range(len_th_list):
 35 |         FP_list.append([])
 36 |         FN_list.append([])
 37 |     model.eval()
 38 |     with torch.no_grad():
 39 |         if rnn == 'frameGRU':
 40 |             for batch, labels, lengths, f_names in data_loader:
 41 |                 logits = model(batch.cuda(), lengths)
 42 |                 out = torch.sigmoid(logits)
 43 |                 new_out_list = []
 44 |                 for i in range(len(lengths)):
 45 |                     new_out_list.append(out[i][:lengths[i]].mean(0, keepdim=True))
 46 |                 # import pdb; pdb.set_trace()
 47 |                 out = torch.cat(new_out_list, 0)
 48 |                 num_examples += len(lengths)
 49 |                 for i, th in enumerate(th_list):
 50 |                     predicted_labels = (out > th).long()
 51 |                     if predicted_labels.squeeze(1).cpu().long() == torch.LongTensor(labels):
 52 |                         correct_pred[i] += 1
 53 |                     elif labels == 0:
 54 |                         # print('FP: ', FP)
 55 |                         FP[i] += 1
 56 |                         FP_list[i].append(f_names[0] + '_' + str(labels.item()) + '_' + str(
 57 |                             out.squeeze(1).cpu().item()))
 58 |                     else:
 59 |                         # print('FN: ', FN)
 60 |                         FN[i] += 1
 61 |                         FN_list[i].append(f_names[0] + '_' + str(labels.item()) + '_' + str(
 62 |                             out.squeeze(1).cpu().item()))
 63 |             return [n_correct/num_examples * 100 for n_correct in correct_pred], FP, FN, FP_list, FN_list
 64 |         else:
 65 |             for batch, labels, lengths, f_names in data_loader:
 66 |                 #import pdb; pdb.set_trace()
 67 |                 logits = model(batch.cuda(), lengths)
 68 |                 num_examples += len(lengths)
 69 |                 for i, th in enumerate(th_list):
 70 |                     predicted_labels = (torch.sigmoid(logits) > th).long()
 71 |                     if predicted_labels.squeeze(1).cpu().long() == torch.LongTensor(labels):
 72 |                         correct_pred[i] += 1
 73 |                     elif labels == 0:
 74 |                         FP[i] += 1
 75 |                         FP_list[i].append(f_names[0]+'_'+str(labels.item())+'_'+str(torch.sigmoid(logits).squeeze(1).cpu().item()))
 76 |                     else:
 77 |                         FN[i] += 1
 78 |                         FN_list[i].append(f_names[0]+'_'+str(labels.item())+'_'+str(torch.sigmoid(logits).squeeze(1).cpu().item()))
 79 |             return [n_correct/num_examples * 100 for n_correct in correct_pred], FP, FN, FP_list, FN_list
 80 | 
 81 | 
 82 | 
 83 | if rnn == 'frameGRU':
 84 |     model = Framewise_GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
 85 | if rnn == 'sumGRU':
 86 |     model = sumGRU(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
 87 | if rnn == 'embedGRU':
 88 |     model = embed_GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
 89 |     model.load_state_dict(torch.load("models/" + str(rnn) + "_L" + str(N_LAYERS_RNN) + ".pt"))
 90 | if rnn == 'GRU':
 91 |     model = GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
 92 |     model.load_state_dict(torch.load("models/" + str(rnn) + "_L" + str(N_LAYERS_RNN) + ".pt"))
 93 | if rnn == 'biGRU':
 94 |     model = biGRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
 95 |     model.load_state_dict(torch.load("models/" + str(rnn) + "_L" + str(N_LAYERS_RNN) + ".pt"))
 96 | if rnn == 'LSTM':
 97 |     model = LSTM_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
 98 | if rnn == 'cnn':
 99 |     model = cnn_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1)
100 | if rnn == 'crnn':
101 |     model = crnn_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
102 | # model.load_state_dict(torch.load("models/"+str(rnn)+".pt"))
103 | model = model.cuda()
104 | 
105 | loss_function = torch.nn.BCEWithLogitsLoss()
106 | loss_function_eval_sum = torch.nn.BCEWithLogitsLoss(reduction='sum')
107 | optimizer = optim.Adam(model.parameters(), lr=LR)
108 | 
109 | dataset_train = LandmarkListTest(root='/datasets/move_closer/Data_Distortion/', fileList='/datasets/move_closer/TrainList.txt')
110 | dataloader_train = data.DataLoader(dataset_train, batch_size=1, shuffle=False, num_workers=0)
111 | 
112 | dataset_test = LandmarkListTest(root='/datasets/move_closer/Data_Distortion/', fileList='/datasets/move_closer/TestList.txt')
113 | dataloader_test = data.DataLoader(dataset_test, batch_size=1, shuffle=False, num_workers=0)
114 | 
115 | # thresholds = [x * 0.01 for x in range(30, 71)]
116 | thresholds = [0.5]
117 | 
118 | train_acc, train_fp, train_fn, train_fp_list, train_fn_list = compute_binary_accuracy(model, dataloader_train, thresholds)
119 | test_acc, test_fp, test_fn, test_fp_list, test_fn_list = compute_binary_accuracy(model, dataloader_test, thresholds)
120 | 
121 | for i in range(0, len(thresholds)):
122 |     print('\n\n-----------------Eval for threshold of {:.2f}-------------------\n\n'.format(thresholds[i]))
123 |     print('train_acc,{:.2f}%,train_fp,{},train_fn,{}\nvalid_acc,{:.2f}%,valid_fp,{},valid_fn,{}\n'
124 |           .format(train_acc[i], train_fp[i], train_fn[i], test_acc[i], test_fp[i], test_fn[i]))
125 |     print('Train FP')
126 |     for n in train_fp_list[i]:
127 |         print(n)
128 |     print('\nTrain FN')
129 |     for n in train_fn_list[i]:
130 |         print(n)
131 | 
132 |     print('\n\n\nTest FP')
133 |     for n in test_fp_list[i]:
134 |         print(n)
135 |     print('\nTest FN')
136 |     for n in test_fn_list[i]:
137 |         print(n)
138 | 
139 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import torch.optim as optim
  5 | from dataset import LandmarkList
  6 | from torch.utils import data
  7 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
  8 | import argparse
  9 | 
 10 | from model import *
 11 | 
 12 | # rnn = 'frameGRU'
 13 | # to be implemented - rnn = 'frameCRNN'
 14 | rnn = 'sumGRU'
 15 | # rnn = 'crnn'
 16 | # rnn = 'cnn'
 17 | # rnn = 'GRU'
 18 | # rnn = 'embedGRU'
 19 | # rnn = 'biGRU'
 20 | # rnn = 'LSTM'
 21 | EMBEDDING_DIM = int(68 * 67 /2)
 22 | HIDDEN_DIM = 128
 23 | N_LAYERS_RNN = 1
 24 | MAX_EPOCH = 30000
 25 | LR = 1e-4
 26 | DEVICES = 3
 27 | SAVE_BEST_MODEL = True
 28 | torch.cuda.set_device(DEVICES)
 29 | 
 30 | 
 31 | def compute_binary_accuracy(model, data_loader, loss_function):
 32 |     correct_pred, num_examples, total_loss = 0, 0, 0.
 33 |     model.eval()
 34 |     with torch.no_grad():
 35 |         if rnn == 'frameGRU' or rnn == 'frameCRNN':
 36 |             for batch, labels, lengths in data_loader:
 37 |                 logits = model(batch.cuda(), lengths)
 38 |                 out = torch.sigmoid(logits)
 39 |                 # if rnn == 'frameGRU':
 40 |                 #     new_out_list = []
 41 |                 #     new_labels_list = []
 42 |                 #     for i in range(len(lengths)):
 43 |                 #         new_out_list.append(out[i][:lengths[i]].sum())
 44 |                 #     out = torch.cat(new_out_list, 0)
 45 |                 new_labels_list = []
 46 |                 new_logits_list = []
 47 |                 new_out_list = []
 48 |                 for i in range(len(lengths)):
 49 |                     new_labels_list += [labels[i]] * lengths[i]
 50 |                     new_logits_list.append(out[i][:lengths[i]])
 51 |                     new_out_list.append(out[i][:lengths[i]].mean(0, keepdim=True))
 52 |                 # import pdb; pdb.set_trace()
 53 |                 logits_framewise = torch.cat(new_logits_list, 0)
 54 |                 labels_framewise = new_labels_list
 55 |                 out = torch.cat(new_out_list, 0)
 56 |                 total_loss += loss_function(logits_framewise, torch.FloatTensor(labels_framewise).unsqueeze(1).cuda()).item()
 57 |                 predicted_labels = (out > 0.5).long()
 58 |                 num_examples += len(lengths)
 59 |                 correct_pred += (predicted_labels.squeeze(1).cpu().long() == torch.LongTensor(labels)).sum()
 60 |             return correct_pred.float().item()/num_examples * 100, total_loss
 61 |         else:
 62 |             for batch, labels, lengths in data_loader:
 63 |                 logits = model(batch.cuda(), lengths)
 64 |                 total_loss += loss_function(logits, torch.FloatTensor(labels).unsqueeze(1).cuda()).item()
 65 |                 predicted_labels = (torch.sigmoid(logits) > 0.5).long()
 66 |                 num_examples += len(lengths)
 67 |                 correct_pred += (predicted_labels.squeeze(1).cpu().long() == torch.LongTensor(labels)).sum()
 68 |             return correct_pred.float().item()/num_examples * 100, total_loss
 69 | 
 70 | 
 71 | def pad_collate(batch):
 72 |     batch.sort(key=lambda x: x[2], reverse=True)
 73 |     lms, tgs, lens = zip(*batch)
 74 |     new_lms = torch.zeros((len(lms), lms[0].shape[0], lms[0].shape[1])) # batch x seq x feature(136)
 75 |     new_lms[0] = lms[0]
 76 |     for i in range(1, len(lms)):
 77 |         new_lms[i] = torch.cat((lms[i], torch.zeros((lens[0] - lens[i]),EMBEDDING_DIM)), 0)
 78 |     return new_lms, tgs, lens
 79 | 
 80 | if rnn == 'frameGRU':
 81 |     model = Framewise_GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
 82 | if rnn == 'frameCRNN':
 83 |     model = FrameCRNN(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
 84 | if rnn == 'sumGRU':
 85 |     model = sumGRU(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
 86 | if rnn == 'embedGRU':
 87 |     model = embed_GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
 88 | if rnn == 'GRU':
 89 |     model = GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
 90 | if rnn == 'biGRU':
 91 |     model = biGRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
 92 | if rnn == 'LSTM':
 93 |     model = LSTM_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
 94 | if rnn == 'cnn':
 95 |     model = cnn_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1)
 96 | if rnn == 'crnn':
 97 |     model = crnn_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
 98 | model = model.cuda()
 99 | 
100 | loss_function = torch.nn.BCEWithLogitsLoss()
101 | loss_function_eval_sum = torch.nn.BCEWithLogitsLoss(reduction='sum')
102 | optimizer = optim.Adam(model.parameters(), lr=LR)
103 | 
104 | dataset_train = LandmarkList(root='/datasets/move_closer/Data_Distortion/', fileList='/datasets/move_closer/TrainList.txt')
105 | dataloader_train = data.DataLoader(dataset_train, batch_size=128, shuffle=True, num_workers=0, collate_fn=pad_collate)
106 | # if rnn == 'frameGRU':
107 | #     dataloader_train = data.DataLoader(dataset_train, batch_size=8, shuffle=True, num_workers=2,
108 | #                                        collate_fn=pad_collate)
109 | 
110 | dataset_test = LandmarkList(root='/datasets/move_closer/Data_Distortion/', fileList='/datasets/move_closer/TestList.txt')
111 | dataloader_test = data.DataLoader(dataset_test, batch_size=64, shuffle=False, num_workers=0, collate_fn=pad_collate)
112 | 
113 | best_test_acc = 0.
114 | for epoch in range(MAX_EPOCH):
115 |     model.train()
116 |     n_iter = 0
117 |     for batch, labels, lengths in dataloader_train:
118 |         model.zero_grad()
119 |         out = model(batch.cuda(), lengths)  # we could do a classifcation for every output (probably better)
120 |         if rnn == 'frameGRU':
121 |             new_labels_list = []
122 |             new_out_list = []
123 |             for i in range(len(lengths)):
124 |                 new_labels_list += [labels[i]] * lengths[i]
125 |                 new_out_list.append(out[i][:lengths[i]])
126 |             out = torch.cat(new_out_list, 0)
127 |             labels = new_labels_list
128 |         loss = loss_function(out, torch.FloatTensor(labels).unsqueeze(1).cuda())
129 |         loss.backward()
130 |         optimizer.step()
131 |         n_iter += 1
132 |     train_acc, train_loss = compute_binary_accuracy(model, dataloader_train, loss_function_eval_sum)
133 |     test_acc, test_loss = compute_binary_accuracy(model, dataloader_test, loss_function_eval_sum)
134 |     print('Epoch{},train_acc,{:.2f}%,train_loss,{:.8f},valid_acc,{:.2f}%,valid_loss,{:.8f}'.format(epoch, train_acc, train_loss, test_acc, test_loss))
135 |     if test_acc > best_test_acc:
136 |         best_test_acc = test_acc
137 |         if SAVE_BEST_MODEL:
138 |             torch.save(model.state_dict(), 'models/' + rnn +
139 |                        '_L' + str(N_LAYERS_RNN) + '.pt')
140 |         print('best epoch {}, train_acc {}, test_acc {}'.format(epoch, train_acc, test_acc))
141 | 
142 | 
143 | 
144 | 
145 | 
146 | 
147 | 
148 | 
149 | # class LSTM_Classifier(nn.Module):
150 | #
151 | #     def __init__(self, embedding_dim, hidden_dim, target_size=1):
152 | #         super(LSTM_Classifier, self).__init__()
153 | #         self.hidden_dim = hidden_dim
154 | #
155 | #         # The LSTM takes word embeddings as inputs, and outputs hidden states
156 | #         # with dimensionality hidden_dim.
157 | #         self.lstm = nn.LSTM(embedding_dim, hidden_dim)
158 | #
159 | #         # The linear layer that maps from hidden state space to tag space
160 | #         self.lc = nn.Linear(hidden_dim, target_size)
161 | #
162 | #     def forward(self, landmarks, lengths):
163 | #         # pack_padded_sequence so that padded items in the sequence won't be shown to the LSTM
164 | #         packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
165 | #         _, (ht, _) = self.lstm(packed_input)
166 | #         import pdb;
167 | #         pdb.set_trace()
168 | #         # packed_output, (ht, ct) = self.lstm(packed_input)   # ht is the final output of each batch! ht (1, 4, 272) can be found in output[:,input_sizes-1,:]
169 | #         # output, input_sizes = pad_packed_sequence(packed_output, batch_first=True)
170 | #         '''
171 | #         (Pdb) output[:,input_sizes-1,:]
172 | # tensor([[[-0.0176,  0.1605,  0.1339,  ..., -0.0914,  0.2951, -0.0065],
173 | #          [-0.0225,  0.1589,  0.1340,  ..., -0.0925,  0.2950, -0.0095],
174 | #          [-0.0253,  0.1574,  0.1431,  ..., -0.0865,  0.3022, -0.0119],
175 | #          [-0.0303,  0.1515,  0.1422,  ..., -0.1094,  0.2976, -0.0032]],
176 | #
177 | #         [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
178 | #          [-0.0165,  0.1666,  0.1344,  ..., -0.0698,  0.2945, -0.0163],
179 | #          [-0.0235,  0.1697,  0.1479,  ..., -0.0657,  0.3001, -0.0195],
180 | #          [-0.0235,  0.1734,  0.1515,  ..., -0.0608,  0.3029, -0.0201]],
181 | #
182 | #         [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
183 | #          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
184 | #          [-0.0492,  0.1666,  0.1444,  ..., -0.0749,  0.2816, -0.0188],
185 | #          [-0.0490,  0.1542,  0.1449,  ..., -0.0865,  0.2821, -0.0205]],
186 | #
187 | #         [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
188 | #          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
189 | #          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
190 | #          [-0.0460,  0.1522,  0.1381,  ..., -0.0959,  0.2843, -0.0071]]],
191 | #        device='cuda:2', grad_fn=<IndexBackward>)
192 | # (Pdb) ht.shape
193 | # torch.Size([1, 4, 272])
194 | # (Pdb) ht
195 | # tensor([[[-0.0176,  0.1605,  0.1339,  ..., -0.0914,  0.2951, -0.0065],
196 | #          [-0.0165,  0.1666,  0.1344,  ..., -0.0698,  0.2945, -0.0163],
197 | #          [-0.0492,  0.1666,  0.1444,  ..., -0.0749,  0.2816, -0.0188],
198 | #          [-0.0460,  0.1522,  0.1381,  ..., -0.0959,  0.2843, -0.0071]]],
199 | #        device='cuda:2', grad_fn=<CudnnRnnBackward>)
200 | #
201 | #         '''
202 | #         # import pdb;
203 | #         # pdb.set_trace()
204 | #         logit = self.lc(ht.squeeze(0))
205 | #         return logit


--------------------------------------------------------------------------------
/train_2dcnn.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import torch.optim as optim
  5 | from dataset import LandmarkList
  6 | from torch.utils import data
  7 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
  8 | import argparse
  9 | 
 10 | from model import *
 11 | 
 12 | 
 13 | rnn = '2dcnn'
 14 | # rnn = 'frameGRU'
 15 | # to be implemented - rnn = 'frameCRNN'
 16 | # rnn = 'sumGRU'
 17 | # rnn = 'crnn'
 18 | # rnn = 'cnn'
 19 | # rnn = 'GRU'
 20 | # rnn = 'embedGRU'
 21 | # rnn = 'biGRU'
 22 | # rnn = 'LSTM'
 23 | EMBEDDING_DIM = int(68 * 67 /2)
 24 | HIDDEN_DIM = 128
 25 | N_LAYERS_RNN = 3
 26 | MAX_EPOCH = 1000
 27 | LR = 1e-4
 28 | DEVICES = 2
 29 | SAVE_BEST_MODEL = True
 30 | torch.cuda.set_device(DEVICES)
 31 | 
 32 | 
 33 | def compute_binary_accuracy(model, data_loader, loss_function):
 34 |     correct_pred, num_examples, total_loss = 0, 0, 0.
 35 |     model.eval()
 36 |     with torch.no_grad():
 37 |         if rnn == 'frameGRU' or rnn == 'frameCRNN':
 38 |             for batch, labels, lengths in data_loader:
 39 |                 logits = model(batch.cuda(), lengths)
 40 |                 out = torch.sigmoid(logits)
 41 |                 # if rnn == 'frameGRU':
 42 |                 #     new_out_list = []
 43 |                 #     new_labels_list = []
 44 |                 #     for i in range(len(lengths)):
 45 |                 #         new_out_list.append(out[i][:lengths[i]].sum())
 46 |                 #     out = torch.cat(new_out_list, 0)
 47 |                 new_labels_list = []
 48 |                 new_logits_list = []
 49 |                 new_out_list = []
 50 |                 for i in range(len(lengths)):
 51 |                     new_labels_list += [labels[i]] * lengths[i]
 52 |                     new_logits_list.append(out[i][:lengths[i]])
 53 |                     new_out_list.append(out[i][:lengths[i]].mean(0, keepdim=True))
 54 |                 # import pdb; pdb.set_trace()
 55 |                 logits_framewise = torch.cat(new_logits_list, 0)
 56 |                 labels_framewise = new_labels_list
 57 |                 out = torch.cat(new_out_list, 0)
 58 |                 total_loss += loss_function(logits_framewise, torch.FloatTensor(labels_framewise).unsqueeze(1).cuda()).item()
 59 |                 predicted_labels = (out > 0.5).long()
 60 |                 num_examples += len(lengths)
 61 |                 correct_pred += (predicted_labels.squeeze(1).cpu().long() == torch.LongTensor(labels)).sum()
 62 |             return correct_pred.float().item()/num_examples * 100, total_loss
 63 |         else:
 64 |             for batch, labels, lengths in data_loader:
 65 |                 logits = model(batch.cuda(), lengths)
 66 |                 total_loss += loss_function(logits, torch.FloatTensor(labels).unsqueeze(1).cuda()).item()
 67 |                 predicted_labels = (torch.sigmoid(logits) > 0.5).long()
 68 |                 num_examples += len(lengths)
 69 |                 correct_pred += (predicted_labels.squeeze(1).cpu().long() == torch.LongTensor(labels)).sum()
 70 |             return correct_pred.float().item()/num_examples * 100, total_loss
 71 | 
 72 | 
 73 | def pad_collate(batch):
 74 |     batch.sort(key=lambda x: x[2], reverse=True)
 75 |     lms, tgs, lens = zip(*batch)
 76 |     new_lms = torch.zeros((len(lms), lms[0].shape[0], lms[0].shape[1])) # batch x seq x feature(136)
 77 |     new_lms[0] = lms[0]
 78 |     for i in range(1, len(lms)):
 79 |         new_lms[i] = torch.cat((lms[i], torch.zeros((lens[0] - lens[i]),EMBEDDING_DIM)), 0)
 80 |     return new_lms, tgs, lens
 81 | 
 82 | if rnn == '2dcnn':
 83 |     model = cnn_2d(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
 84 | if rnn == 'frameGRU':
 85 |     model = Framewise_GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
 86 | if rnn == 'frameCRNN':
 87 |     model = FrameCRNN(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
 88 | if rnn == 'sumGRU':
 89 |     model = sumGRU(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
 90 | if rnn == 'embedGRU':
 91 |     model = embed_GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
 92 | if rnn == 'GRU':
 93 |     model = GRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
 94 | if rnn == 'biGRU':
 95 |     model = biGRU_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
 96 | if rnn == 'LSTM':
 97 |     model = LSTM_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
 98 | if rnn == 'cnn':
 99 |     model = cnn_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1)
100 | if rnn == 'crnn':
101 |     model = crnn_Classifier(EMBEDDING_DIM, HIDDEN_DIM, 1, n_layer=N_LAYERS_RNN)
102 | model = model.cuda()
103 | 
104 | loss_function = torch.nn.BCEWithLogitsLoss()
105 | loss_function_eval_sum = torch.nn.BCEWithLogitsLoss(reduction='sum')
106 | optimizer = optim.Adam(model.parameters(), lr=LR)
107 | 
108 | dataset_train = LandmarkList(root='/datasets/move_closer/Data_Distortion/', fileList='/datasets/move_closer/TrainList.txt')
109 | dataloader_train = data.DataLoader(dataset_train, batch_size=128, shuffle=True, num_workers=0, collate_fn=pad_collate)
110 | # if rnn == 'frameGRU':
111 | #     dataloader_train = data.DataLoader(dataset_train, batch_size=8, shuffle=True, num_workers=2,
112 | #                                        collate_fn=pad_collate)
113 | 
114 | dataset_test = LandmarkList(root='/datasets/move_closer/Data_Distortion/', fileList='/datasets/move_closer/TestList.txt')
115 | dataloader_test = data.DataLoader(dataset_test, batch_size=64, shuffle=False, num_workers=0, collate_fn=pad_collate)
116 | 
117 | best_test_acc = 0.
118 | for epoch in range(MAX_EPOCH):
119 |     model.train()
120 |     n_iter = 0
121 |     for batch, labels, lengths in dataloader_train:
122 |         model.zero_grad()
123 |         out = model(batch.cuda(), lengths)  # we could do a classifcation for every output (probably better)
124 |         if rnn == 'frameGRU':
125 |             new_labels_list = []
126 |             new_out_list = []
127 |             for i in range(len(lengths)):
128 |                 new_labels_list += [labels[i]] * lengths[i]
129 |                 new_out_list.append(out[i][:lengths[i]])
130 |             out = torch.cat(new_out_list, 0)
131 |             labels = new_labels_list
132 |         loss = loss_function(out, torch.FloatTensor(labels).unsqueeze(1).cuda())
133 |         loss.backward()
134 |         optimizer.step()
135 |         n_iter += 1
136 |     train_acc, train_loss = compute_binary_accuracy(model, dataloader_train, loss_function_eval_sum)
137 |     test_acc, test_loss = compute_binary_accuracy(model, dataloader_test, loss_function_eval_sum)
138 |     print('Epoch{},train_acc,{:.2f}%,train_loss,{:.8f},valid_acc,{:.2f}%,valid_loss,{:.8f}'.format(epoch, train_acc, train_loss, test_acc, test_loss))
139 |     if test_acc > best_test_acc:
140 |         best_test_acc = test_acc
141 |         if SAVE_BEST_MODEL:
142 |             torch.save(model.state_dict(), 'models/' + rnn +
143 |                        '_L' + str(N_LAYERS_RNN) + '.pt')
144 |         print('best epoch {}, train_acc {}, test_acc {}'.format(epoch, train_acc, test_acc))
145 | 
146 | 
147 | 
148 | 
149 | 
150 | 
151 | 
152 | 
153 | # class LSTM_Classifier(nn.Module):
154 | #
155 | #     def __init__(self, embedding_dim, hidden_dim, target_size=1):
156 | #         super(LSTM_Classifier, self).__init__()
157 | #         self.hidden_dim = hidden_dim
158 | #
159 | #         # The LSTM takes word embeddings as inputs, and outputs hidden states
160 | #         # with dimensionality hidden_dim.
161 | #         self.lstm = nn.LSTM(embedding_dim, hidden_dim)
162 | #
163 | #         # The linear layer that maps from hidden state space to tag space
164 | #         self.lc = nn.Linear(hidden_dim, target_size)
165 | #
166 | #     def forward(self, landmarks, lengths):
167 | #         # pack_padded_sequence so that padded items in the sequence won't be shown to the LSTM
168 | #         packed_input = pack_padded_sequence(landmarks, lengths, batch_first=True)
169 | #         _, (ht, _) = self.lstm(packed_input)
170 | #         import pdb;
171 | #         pdb.set_trace()
172 | #         # packed_output, (ht, ct) = self.lstm(packed_input)   # ht is the final output of each batch! ht (1, 4, 272) can be found in output[:,input_sizes-1,:]
173 | #         # output, input_sizes = pad_packed_sequence(packed_output, batch_first=True)
174 | #         '''
175 | #         (Pdb) output[:,input_sizes-1,:]
176 | # tensor([[[-0.0176,  0.1605,  0.1339,  ..., -0.0914,  0.2951, -0.0065],
177 | #          [-0.0225,  0.1589,  0.1340,  ..., -0.0925,  0.2950, -0.0095],
178 | #          [-0.0253,  0.1574,  0.1431,  ..., -0.0865,  0.3022, -0.0119],
179 | #          [-0.0303,  0.1515,  0.1422,  ..., -0.1094,  0.2976, -0.0032]],
180 | #
181 | #         [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
182 | #          [-0.0165,  0.1666,  0.1344,  ..., -0.0698,  0.2945, -0.0163],
183 | #          [-0.0235,  0.1697,  0.1479,  ..., -0.0657,  0.3001, -0.0195],
184 | #          [-0.0235,  0.1734,  0.1515,  ..., -0.0608,  0.3029, -0.0201]],
185 | #
186 | #         [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
187 | #          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
188 | #          [-0.0492,  0.1666,  0.1444,  ..., -0.0749,  0.2816, -0.0188],
189 | #          [-0.0490,  0.1542,  0.1449,  ..., -0.0865,  0.2821, -0.0205]],
190 | #
191 | #         [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
192 | #          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
193 | #          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
194 | #          [-0.0460,  0.1522,  0.1381,  ..., -0.0959,  0.2843, -0.0071]]],
195 | #        device='cuda:2', grad_fn=<IndexBackward>)
196 | # (Pdb) ht.shape
197 | # torch.Size([1, 4, 272])
198 | # (Pdb) ht
199 | # tensor([[[-0.0176,  0.1605,  0.1339,  ..., -0.0914,  0.2951, -0.0065],
200 | #          [-0.0165,  0.1666,  0.1344,  ..., -0.0698,  0.2945, -0.0163],
201 | #          [-0.0492,  0.1666,  0.1444,  ..., -0.0749,  0.2816, -0.0188],
202 | #          [-0.0460,  0.1522,  0.1381,  ..., -0.0959,  0.2843, -0.0071]]],
203 | #        device='cuda:2', grad_fn=<CudnnRnnBackward>)
204 | #
205 | #         '''
206 | #         # import pdb;
207 | #         # pdb.set_trace()
208 | #         logit = self.lc(ht.squeeze(0))
209 | #         return logit


--------------------------------------------------------------------------------