├── README.md
├── adjust_weights.py
├── bilinear_densenet161.py
├── bilinear_resnet152.py
├── bilinear_resnet34.py
├── bilinear_resnet34_double.py
├── bilinear_resnet50_densenet121.py
├── bilinear_vgg16.py
├── bilinear_vgg16_double.py
├── cub_200_2011.py
├── helper.py
├── main.py
├── model_global.py
├── model_object.py
├── model_test.py
└── utils.py


/README.md:
--------------------------------------------------------------------------------
 1 | # FGC_CUB-200-2011
 2 | 
 3 | Fine Grained Image Classification on CUB-200-2011
 4 | 
 5 | ## Environment
 6 | 
 7 | We suggest using [Anaconda](https://anaconda.org/) to create a virtual environment for this program. Visit official website or [here](https://mirrors.tuna.tsinghua.edu.cn/anaconda/archive/) to download the installer(Hope there is a GUI and a browser on your deep learning server machine).
 8 | 
 9 | Create a new virtual environment:
10 | `conda create -n pytorch python=3.6`
11 | 
12 | Activate the environment on MacOS/Linux:
13 | `source activate pytorch`
14 | 
15 | On Windows:
16 | `activate pytorch`
17 | 
18 | ### Requirements
19 | 
20 | **Note:** We suggest using `pip` instead of `conda` to install following requirements **on Windows**. The reason is that if you choose to use conda to install something like PyTorch or numpy, in order to speed up computation, another 3 packages start with `mkl` will also be downloaded. However, these `mkl` packages have conflicts with `conda` on Windows and you just cannot run the program.
21 | 
22 | If you're using MacOS or Linux, just ignore the note and enjoy `conda`~
23 | 
24 | If you want to speed up package download, you can add Tsinghua's package repository for `conda`:
25 | ```
26 | conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/
27 | conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/
28 | conda config --set show_channel_urls yes
29 | ```
30 | Visit [清华大学开源软件镜像站](https://mirrors.tuna.tsinghua.edu.cn/help/anaconda/) for more information.
31 | 
32 | #### PyTorch
33 | Visit [Official Website](https://pytorch.org/), choose correct OS/PM/Python-version/CUDA-version to get install command. Please install both `pytorch` and `torchvision`.
34 | 
35 | If your download speed is too slow, you can also add Tsinghua's repository specially for installing pytorch:
36 | 
37 | ```
38 | conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/
39 | conda install pytorch torchvision
40 | ```
41 | 
42 | #### requests
43 | `pip install requests`, note that `conda` doesn't contain this package.
44 | 
45 | #### Other requirements
46 | `conda install matplotlib pillow`
47 | 
48 | ## Train and evaluate globally
49 | 
50 | ```
51 | cd FGC_CUB-200-2011
52 | source activate pytorch
53 | python global.py
54 | ```
55 | 


--------------------------------------------------------------------------------
/adjust_weights.py:
--------------------------------------------------------------------------------
 1 | import cub_200_2011 as dataset
 2 | import helper
 3 | import torch
 4 | import utils
 5 | import time
 6 | 
 7 | model_glb_path = ''
 8 | model_obj_path = ''
 9 | 
10 | predict_weights = [0.2, 0.8]
11 | logger = utils.get_logger('weights-' + str(predict_weights[0]) + '-' + str(predict_weights[1]))
12 | logger.info('start loading dataset')
13 | begin_time = time.time()
14 | train_loader_glb, valid_loader_glb = dataset.get_train_validation_data_loader(
15 |   resize_size=224,
16 |   batch_size=32,
17 |   random_seed=96,
18 |   validation_size=0.1,
19 |   object_boxes_dict=None,
20 |   show_sample=False
21 | )
22 | test_loader_glb = dataset.get_test_data_loader(
23 |   resize_size=224,
24 |   batch_size=32,
25 |   object_boxes_dict=None
26 | )
27 | 
28 | bounding_boxes = utils.get_annotated_bounding_boxes()
29 | train_loader_obj, valid_loader_obj = dataset.get_train_validation_data_loader(
30 |   resize_size=224,
31 |   batch_size=32,
32 |   random_seed=96,
33 |   validation_size=0.1,
34 |   object_boxes_dict=bounding_boxes,
35 |   show_sample=False
36 | )
37 | test_loader_obj = dataset.get_test_data_loader(
38 |   resize_size=224,
39 |   batch_size=32,
40 |   object_boxes_dict=bounding_boxes
41 | )
42 | logger.info('loading dataset costs %.4fs' % (time.time() - begin_time))
43 | 
44 | logger.info('loading models')
45 | 
46 | begin_time = time.time()
47 | model_glb_name = 'resnet152'
48 | model_glb = helper.get_model_by_name(model_glb_name, pretrained=False)
49 | helper.replace_model_fc(model_glb_name, model_glb)
50 | model_glb.load_state_dict(torch.load(model_glb_path))
51 | 
52 | model_obj_name = 'densenet161'
53 | model_obj = helper.get_model_by_name(model_obj_name, pretrained=False)
54 | helper.replace_model_fc(model_obj_name, model_obj)
55 | model_obj.load_state_dict(torch.load(model_obj_path))
56 | logger.info('loading models costs %.4fs' % (time.time() - begin_time))
57 | 
58 | models = [model_glb, model_obj]
59 | validation_loaders = [valid_loader_glb, valid_loader_obj]
60 | test_loaders = [test_loader_glb, test_loader_obj]
61 | 
62 | helper.evaluate(
63 |   logger=logger,
64 |   models=models,
65 |   data_loaders=validation_loaders,
66 |   set_name='validation set',
67 |   predict_weights=predict_weights
68 | )
69 | helper.evaluate(
70 |   logger=logger,
71 |   models=models,
72 |   data_loaders=test_loaders,
73 |   set_name='test set',
74 |   predict_weights=predict_weights
75 | )


--------------------------------------------------------------------------------
/bilinear_densenet161.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import os
  4 | 
  5 | import torch
  6 | import torchvision
  7 | import time
  8 | import cub_200_2011 as dataset
  9 | 
 10 | torch.manual_seed(0)
 11 | torch.cuda.manual_seed_all(0)
 12 | 
 13 | class BCNN(torch.nn.Module):
 14 | 
 15 |     def __init__(self):
 16 |         """Declare all needed layers."""
 17 |         torch.nn.Module.__init__(self)
 18 |         self.features = torchvision.models.densenet161(pretrained=True).features
 19 |         self.in_features = torchvision.models.densenet161().classifier.in_features #2208
 20 |         self.conv = torch.nn.Conv2d(self.in_features, 512, 1)
 21 |         self.bn = torch.nn.BatchNorm2d(512)
 22 |         self.relu = torch.nn.ReLU(inplace=True)
 23 |         # Linear classifier.
 24 |         self.fc = torch.nn.Linear(512**2, 200)
 25 |         # Initialize the fc layers.
 26 |         torch.nn.init.kaiming_normal_(self.fc.weight.data)
 27 |         if self.fc.bias is not None:
 28 |             torch.nn.init.constant_(self.fc.bias.data, val=0)
 29 | 
 30 |     def forward(self, X):
 31 | 
 32 |         N = X.size()[0]
 33 |         assert X.size() == (N, 3, 224, 224)
 34 |         X = self.features(X)
 35 |         assert X.size() == (N, self.in_features, 7, 7)
 36 |         X = self.conv(X)
 37 |         X = self.bn(X)
 38 |         X = self.relu(X)
 39 |         X = X.view(N, 512, 7**2)
 40 |         X = torch.bmm(X, torch.transpose(X, 1, 2)) / (7**2)  # Bilinear
 41 |         assert X.size() == (N, 512, 512)
 42 |         X = X.view(N, 512**2)
 43 |         X = torch.sqrt(X + 1e-5)
 44 |         X = torch.nn.functional.normalize(X)
 45 |         X = self.fc(X)
 46 |         assert X.size() == (N, 200)
 47 |         return X
 48 | 
 49 |     def freeze_layers(self):
 50 |         # Freeze all previous layers.
 51 |         for param in self.features.parameters():
 52 |             param.requires_grad = False
 53 | 
 54 | class BCNNManager(object):
 55 |     """Manager class to train bilinear CNN.
 56 | 
 57 |     Attributes:
 58 |         _options: Hyperparameters.
 59 |         _path: Useful paths.
 60 |         _net: Bilinear CNN.
 61 |         _criterion: Cross-entropy loss.
 62 |         _solver: SGD with momentum.
 63 |         _scheduler: Reduce learning rate by a fator of 0.1 when plateau.
 64 |         _train_loader: Training data.
 65 |         _test_loader: Testing data.
 66 |     """
 67 |     def __init__(self, options, path, freeze=True, pre_model_path=None):
 68 |         """Prepare the network, criterion, solver, and data.
 69 | 
 70 |         Args:
 71 |             options, dict: Hyperparameters.
 72 |         """
 73 |         print('Prepare the network and data.')
 74 |         self._options = options
 75 |         self._path = path
 76 |         # Network.
 77 |         self._net = torch.nn.DataParallel(BCNN()).cuda()
 78 |         #print(self._net)
 79 |         if freeze is True:
 80 |             self._net.module.freeze_layers()
 81 |         if pre_model_path is not None:
 82 |             self._net.load_state_dict(torch.load(pre_model_path))
 83 |         # Criterion.
 84 |         self._criterion = torch.nn.CrossEntropyLoss().cuda()
 85 |         # Solver.
 86 |         if freeze is True:
 87 |             self._solver = torch.optim.SGD(
 88 |                 list(self._net.module.conv.parameters())+list(self._net.module.bn.parameters())+list(self._net.module.fc.parameters()),
 89 |                 lr=self._options['base_lr'],
 90 |                 momentum=0.9, weight_decay=self._options['weight_decay'])
 91 |         else:
 92 |             self._solver = torch.optim.SGD(
 93 |                 self._net.parameters(), lr=self._options['base_lr'],
 94 |                 momentum=0.9, weight_decay=self._options['weight_decay'])
 95 |         self._scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
 96 |             self._solver, mode='max', factor=0.1, patience=3, verbose=True,
 97 |             threshold=1e-4)
 98 |         self._train_loader, self._valid_loader = dataset.get_train_validation_data_loader(
 99 |             resize_size=224,
100 |             batch_size=self._options['batch_size'],
101 |             random_seed=96,
102 |             validation_size=0,
103 |             object_boxes_dict=None,
104 |             show_sample=False,
105 |             augment=True
106 |         )
107 |         self._test_loader = dataset.get_test_data_loader(
108 |             resize_size=224,
109 |             batch_size=32,
110 |             object_boxes_dict=None
111 |         )
112 | 
113 |     def train(self):
114 |         """Train the network."""
115 |         print('Training.')
116 |         best_acc = 0.0
117 |         best_epoch = None
118 |         print('Epoch\tTrain loss\tTrain acc\tTest acc')
119 |         for t in range(self._options['epochs']):
120 |             epoch_loss = []
121 |             num_correct = 0
122 |             num_total = 0
123 |             for i, (_, X, y) in enumerate(self._train_loader, 0):
124 |                 # Data.
125 |                 X = torch.autograd.Variable(X.cuda())
126 |                 y = torch.autograd.Variable(y.cuda(async=True))
127 | 
128 |                 # Clear the existing gradients.
129 |                 self._solver.zero_grad()
130 |                 # Forward pass.
131 |                 score = self._net(X)
132 |                 loss = self._criterion(score, y)
133 |                 epoch_loss.append(loss.data.item())
134 |                 # Prediction.
135 |                 _, prediction = torch.max(score.data, 1)
136 |                 num_total += y.size(0)
137 |                 num_correct += torch.sum(prediction == y.data).float()
138 |                 # Backward pass.
139 |                 loss.backward()
140 |                 self._solver.step()
141 |             train_acc = 100.0 * num_correct / num_total
142 |             #valid_acc = 1.0 * self._accuracy(self._valid_loader)
143 |             test_acc = 1.0 * self._accuracy(self._test_loader)
144 |             self._scheduler.step(test_acc)
145 |             if test_acc > best_acc:
146 |                 best_acc = test_acc
147 |                 best_epoch = t + 1
148 |                 print('*', end='')
149 |                 # Save model onto disk.
150 |                 torch.save(self._net.state_dict(),self._path)
151 |             print('%d\t%4.3f\t\t%4.2f%%\t\t%4.2f%%' %
152 |                   (t+1, sum(epoch_loss) / len(epoch_loss), train_acc, test_acc))
153 |             torch.cuda.empty_cache()
154 |         print('Best at epoch %d, test accuaray %f' % (best_epoch, best_acc))
155 | 
156 |     def _accuracy(self, data_loader):
157 |         """Compute the train/test accuracy.
158 | 
159 |         Args:
160 |             data_loader: Train/Test DataLoader.
161 | 
162 |         Returns:
163 |             Train/Test accuracy in percentage.
164 |         """
165 |         self._net.train(False)
166 |         num_correct = 0
167 |         num_total = 0
168 |         for i, (_, X, y) in enumerate(data_loader, 0):
169 |             # Data.
170 |             X = torch.autograd.Variable(X.cuda())
171 |             y = torch.autograd.Variable(y.cuda(async=True))
172 | 
173 |             # Prediction.
174 |             score = self._net(X)
175 |             _, prediction = torch.max(score.data, 1)
176 |             num_total += y.size(0)
177 |             num_correct += torch.sum(prediction == y.data).float()
178 |         self._net.train(True)  # Set the model to training phase
179 |         return 100.0 * num_correct / num_total
180 | 
181 | def fc():
182 |     options = {
183 |         'base_lr': 1.0,
184 |         'batch_size': 64,
185 |         'epochs': 55,
186 |         'weight_decay': 1e-8,
187 |     }
188 |     pre_model_path = None
189 |     path_save='models/densenet152_fc.pth'
190 |     manager = BCNNManager(options, path_save, freeze=True, pre_model_path=pre_model_path)
191 |     manager.train()
192 | 
193 | def all_layers():
194 |     options = {
195 |         'base_lr': 0.01,
196 |         'batch_size': 32,
197 |         'epochs': 30,
198 |         'weight_decay': 1e-5,
199 |     }
200 |     pre_model_path = 'models/densenet152_fc.pth'
201 |     path_save='models/densenet152_all.pth'
202 |     manager = BCNNManager(options, path_save, freeze=False, pre_model_path=pre_model_path)
203 |     manager.train()
204 | 
205 | if __name__ == '__main__':
206 |     #dataset.use_less_data=True
207 |     fc()
208 |     torch.cuda.empty_cache()
209 |     all_layers()
210 | 
211 | 
212 | 
213 | 


--------------------------------------------------------------------------------
/bilinear_resnet152.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import os
  4 | 
  5 | import torch
  6 | import torchvision
  7 | import time
  8 | import cub_200_2011 as dataset
  9 | import torch.nn as nn
 10 | torch.manual_seed(0)
 11 | torch.cuda.manual_seed_all(0)
 12 | 
 13 | class BCNN(torch.nn.Module):
 14 | 
 15 |     def __init__(self):
 16 |         """Declare all needed layers."""
 17 |         torch.nn.Module.__init__(self)
 18 |         resnet_model = torchvision.models.resnet152(pretrained=True)
 19 |         self.conv1 = resnet_model.conv1
 20 |         self.bn1 = resnet_model.bn1
 21 |         self.relu = resnet_model.relu
 22 |         self.maxpool = resnet_model.maxpool
 23 |         self.layer1 = resnet_model.layer1
 24 |         self.layer2 = resnet_model.layer2
 25 |         self.layer3 = resnet_model.layer3
 26 |         self.layer4 = resnet_model.layer4
 27 |         in_channels = 2048
 28 |         out_channels = 512
 29 |         self.conv2 = nn.Conv2d(in_channels, out_channels, 1)
 30 |         self.bn2 = nn.BatchNorm2d(out_channels)
 31 |         self.relu2 = nn.ReLU(inplace=True)
 32 |         # Linear classifier.
 33 |         self.fc = torch.nn.Linear(512**2, 200)
 34 |         # Initialize the fc layers.
 35 |         torch.nn.init.kaiming_normal_(self.fc.weight.data)
 36 |         if self.fc.bias is not None:
 37 |             torch.nn.init.constant_(self.fc.bias.data, val=0)
 38 | 
 39 |     def forward(self, X):
 40 |         N = X.size()[0]
 41 |         assert X.size() == (N, 3, 224, 224)
 42 |         x = self.conv1(X)
 43 |         x = self.bn1(x)
 44 |         x = self.relu(x)
 45 |         x = self.maxpool(x)
 46 |         x = self.layer1(x)
 47 |         x = self.layer2(x)
 48 |         x = self.layer3(x)
 49 |         X = self.layer4(x)
 50 |         assert X.size() == (N, 2048, 7, 7)
 51 |         X = self.conv2(X)
 52 |         X = self.bn2(X)
 53 |         X = self.relu2(X)
 54 |         X = X.view(N, 512, 7**2)
 55 |         X = torch.bmm(X, torch.transpose(X, 1, 2)) / (7**2)  # Bilinear
 56 |         assert X.size() == (N, 512, 512)
 57 |         X = X.view(N, 512**2)
 58 |         X = torch.sqrt(X + 1e-5)
 59 |         X = torch.nn.functional.normalize(X)
 60 |         X = self.fc(X)
 61 |         assert X.size() == (N, 200)
 62 |         return X
 63 | 
 64 |     def freeze_layers(self):
 65 |         # Freeze all previous layers.
 66 |         for param in self.conv1.parameters():
 67 |             param.requires_grad = False
 68 |         for param in self.bn1.parameters():
 69 |             param.requires_grad = False
 70 |         for param in self.layer1.parameters():
 71 |             param.requires_grad = False
 72 |         for param in self.layer2.parameters():
 73 |             param.requires_grad = False
 74 |         for param in self.layer3.parameters():
 75 |             param.requires_grad = False
 76 |         for param in self.layer4.parameters():
 77 |             param.requires_grad = False
 78 | 
 79 | class BCNNManager(object):
 80 |     """Manager class to train bilinear CNN.
 81 | 
 82 |     Attributes:
 83 |         _options: Hyperparameters.
 84 |         _path: Useful paths.
 85 |         _net: Bilinear CNN.
 86 |         _criterion: Cross-entropy loss.
 87 |         _solver: SGD with momentum.
 88 |         _scheduler: Reduce learning rate by a fator of 0.1 when plateau.
 89 |         _train_loader: Training data.
 90 |         _test_loader: Testing data.
 91 |     """
 92 |     def __init__(self, options, path, freeze=True, pre_model_path=None):
 93 |         """Prepare the network, criterion, solver, and data.
 94 | 
 95 |         Args:
 96 |             options, dict: Hyperparameters.
 97 |         """
 98 |         print('Prepare the network and data.')
 99 |         self._options = options
100 |         self._path = path
101 |         # Network.
102 |         self._net = torch.nn.DataParallel(BCNN()).cuda()
103 |         #print(self._net)
104 |         if freeze is True:
105 |             self._net.module.freeze_layers()
106 |         if pre_model_path is not None:
107 |             self._net.load_state_dict(torch.load(pre_model_path))
108 |         # Criterion.
109 |         self._criterion = torch.nn.CrossEntropyLoss().cuda()
110 |         # Solver.
111 |         if freeze is True:
112 |             self._solver = torch.optim.SGD(
113 |                 list(self._net.module.conv2.parameters())+list(self._net.module.bn2.parameters())+list(self._net.module.fc.parameters()),
114 |                 lr=self._options['base_lr'],
115 |                 momentum=0.9, weight_decay=self._options['weight_decay'])
116 |         else:
117 |             self._solver = torch.optim.SGD(
118 |                 self._net.parameters(), lr=self._options['base_lr'],
119 |                 momentum=0.9, weight_decay=self._options['weight_decay'])
120 |         self._scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
121 |             self._solver, mode='max', factor=0.1, patience=3, verbose=True,
122 |             threshold=1e-4)
123 |         self._train_loader, self._valid_loader = dataset.get_train_validation_data_loader(
124 |             resize_size=224,
125 |             batch_size=self._options['batch_size'],
126 |             random_seed=96,
127 |             validation_size=0,
128 |             object_boxes_dict=None,
129 |             show_sample=False,
130 |             augment=True
131 |         )
132 |         self._test_loader = dataset.get_test_data_loader(
133 |             resize_size=224,
134 |             batch_size=32,
135 |             object_boxes_dict=None
136 |         )
137 | 
138 |     def train(self):
139 |         """Train the network."""
140 |         print('Training.')
141 |         best_acc = 0.0
142 |         best_epoch = None
143 |         print('Epoch\tTrain loss\tTrain acc\tTest acc')
144 |         for t in range(self._options['epochs']):
145 |             epoch_loss = []
146 |             num_correct = 0
147 |             num_total = 0
148 |             for i, (_, X, y) in enumerate(self._train_loader, 0):
149 |                 # Data.
150 |                 X = torch.autograd.Variable(X.cuda())
151 |                 y = torch.autograd.Variable(y.cuda(async=True))
152 | 
153 |                 # Clear the existing gradients.
154 |                 self._solver.zero_grad()
155 |                 # Forward pass.
156 |                 score = self._net(X)
157 |                 loss = self._criterion(score, y)
158 |                 epoch_loss.append(loss.data.item())
159 |                 # Prediction.
160 |                 _, prediction = torch.max(score.data, 1)
161 |                 num_total += y.size(0)
162 |                 num_correct += torch.sum(prediction == y.data).float()
163 |                 # Backward pass.
164 |                 loss.backward()
165 |                 self._solver.step()
166 |             train_acc = 100.0 * num_correct / num_total
167 |             #valid_acc = 1.0 * self._accuracy(self._valid_loader)
168 |             test_acc = 1.0 * self._accuracy(self._test_loader)
169 |             self._scheduler.step(test_acc)
170 |             if test_acc > best_acc:
171 |                 best_acc = test_acc
172 |                 best_epoch = t + 1
173 |                 print('*', end='')
174 |                 # Save model onto disk.
175 |                 torch.save(self._net.state_dict(),self._path)
176 |             print('%d\t%4.3f\t\t%4.2f%%\t\t%4.2f%%' %
177 |                   (t+1, sum(epoch_loss) / len(epoch_loss), train_acc, test_acc))
178 |         print('Best at epoch %d, test accuaray %f' % (best_epoch, best_acc))
179 | 
180 |     def _accuracy(self, data_loader):
181 |         """Compute the train/test accuracy.
182 | 
183 |         Args:
184 |             data_loader: Train/Test DataLoader.
185 | 
186 |         Returns:
187 |             Train/Test accuracy in percentage.
188 |         """
189 |         self._net.train(False)
190 |         num_correct = 0
191 |         num_total = 0
192 |         for i, (_, X, y) in enumerate(data_loader, 0):
193 |             # Data.
194 |             X = torch.autograd.Variable(X.cuda())
195 |             y = torch.autograd.Variable(y.cuda(async=True))
196 | 
197 |             # Prediction.
198 |             score = self._net(X)
199 |             _, prediction = torch.max(score.data, 1)
200 |             num_total += y.size(0)
201 |             num_correct += torch.sum(prediction == y.data).float()
202 |         self._net.train(True)  # Set the model to training phase
203 |         return 100.0 * num_correct / num_total
204 | 
205 | def fc():
206 |     options = {
207 |         'base_lr': 1.0,
208 |         'batch_size': 64,
209 |         'epochs': 50,
210 |         'weight_decay': 1e-8,
211 |     }
212 |     pre_model_path = None
213 |     path_save='models/resnet152_fc_224.pth'
214 |     manager = BCNNManager(options, path_save, freeze=True, pre_model_path=pre_model_path)
215 |     manager.train()
216 | 
217 | def all_layers():
218 |     options = {
219 |         'base_lr': 0.01,
220 |         'batch_size': 32,
221 |         'epochs': 30,
222 |         'weight_decay': 1e-5,
223 |     }
224 |     pre_model_path = 'models/resnet152_fc_224.pth'
225 |     path_save='models/resnet152_all_224.pth'
226 |     manager = BCNNManager(options, path_save, freeze=False, pre_model_path=pre_model_path)
227 |     manager.train()
228 | 
229 | if __name__ == '__main__':
230 |     #dataset.use_less_data=True
231 |     fc()
232 |     torch.cuda.empty_cache()
233 |     all_layers()


--------------------------------------------------------------------------------
/bilinear_resnet34.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import os
  4 | 
  5 | import torch
  6 | import torchvision
  7 | import time
  8 | import cub_200_2011 as dataset
  9 | 
 10 | torch.manual_seed(0)
 11 | torch.cuda.manual_seed_all(0)
 12 | 
 13 | class BCNN(torch.nn.Module):
 14 | 
 15 |   def __init__(self):
 16 |     """Declare all needed layers."""
 17 |     torch.nn.Module.__init__(self)
 18 |     resnet_model = torchvision.models.resnet34(pretrained=True)
 19 |     self.conv1 = resnet_model.conv1
 20 |     self.bn1 = resnet_model.bn1
 21 |     self.relu = resnet_model.relu
 22 |     self.maxpool = resnet_model.maxpool
 23 |     self.layer1 = resnet_model.layer1
 24 |     self.layer2 = resnet_model.layer2
 25 |     self.layer3 = resnet_model.layer3
 26 |     self.layer4 = resnet_model.layer4
 27 |     # Linear classifier.
 28 |     self.fc = torch.nn.Linear(512**2, 200)
 29 |     # Initialize the fc layers.
 30 |     torch.nn.init.kaiming_normal_(self.fc.weight.data)
 31 |     if self.fc.bias is not None:
 32 |       torch.nn.init.constant_(self.fc.bias.data, val=0)
 33 | 
 34 |   def forward(self, X):
 35 | 
 36 |     N = X.size()[0]
 37 |     assert X.size() == (N, 3, 448, 448)
 38 |     x = self.conv1(X)
 39 |     x = self.bn1(x)
 40 |     x = self.relu(x)
 41 |     x = self.maxpool(x)
 42 | 
 43 |     x = self.layer1(x)
 44 |     x = self.layer2(x)
 45 |     x = self.layer3(x)
 46 |     X = self.layer4(x)
 47 |     assert X.size() == (N, 512, 14, 14)
 48 |     X = X.view(N, 512, 14**2)
 49 |     X = torch.bmm(X, torch.transpose(X, 1, 2)) / (14**2)  # Bilinear
 50 |     assert X.size() == (N, 512, 512)
 51 |     X = X.view(N, 512**2)
 52 |     X = torch.sqrt(X + 1e-5)
 53 |     X = torch.nn.functional.normalize(X)
 54 |     X = self.fc(X)
 55 |     assert X.size() == (N, 200)
 56 |     return X
 57 |   def freeze_layers(self):
 58 |     # Freeze all previous layers.
 59 |     for param in self.conv1.parameters():
 60 |       param.requires_grad = False
 61 |     for param in self.bn1.parameters():
 62 |       param.requires_grad = False
 63 |     for param in self.layer1.parameters():
 64 |       param.requires_grad = False
 65 |     for param in self.layer2.parameters():
 66 |       param.requires_grad = False
 67 |     for param in self.layer3.parameters():
 68 |       param.requires_grad = False
 69 |     for param in self.layer4.parameters():
 70 |       param.requires_grad = False
 71 | 
 72 | class BCNNManager(object):
 73 |   """Manager class to train bilinear CNN.
 74 | 
 75 |   Attributes:
 76 |       _options: Hyperparameters.
 77 |       _path: Useful paths.
 78 |       _net: Bilinear CNN.
 79 |       _criterion: Cross-entropy loss.
 80 |       _solver: SGD with momentum.
 81 |       _scheduler: Reduce learning rate by a fator of 0.1 when plateau.
 82 |       _train_loader: Training data.
 83 |       _test_loader: Testing data.
 84 |   """
 85 |   def __init__(self, options, path, freeze=True, pre_model_path=None):
 86 |     """Prepare the network, criterion, solver, and data.
 87 | 
 88 |     Args:
 89 |         options, dict: Hyperparameters.
 90 |     """
 91 |     print('Prepare the network and data.')
 92 |     self._options = options
 93 |     self._path = path
 94 |     # Network.
 95 |     self._net = torch.nn.DataParallel(BCNN()).cuda()
 96 |     #print(self._net)
 97 |     if freeze is True:
 98 |       self._net.module.freeze_layers()
 99 |     if pre_model_path is not None:
100 |       self._net.load_state_dict(torch.load(pre_model_path))
101 |     # Criterion.
102 |     self._criterion = torch.nn.CrossEntropyLoss().cuda()
103 |     # Solver.
104 |     if freeze is True:
105 |       self._solver = torch.optim.SGD(
106 |         self._net.module.fc.parameters(), lr=self._options['base_lr'],
107 |         momentum=0.9, weight_decay=self._options['weight_decay'])
108 |     else:
109 |       self._solver = torch.optim.SGD(
110 |         self._net.parameters(), lr=self._options['base_lr'],
111 |         momentum=0.9, weight_decay=self._options['weight_decay'])
112 |     self._scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
113 |       self._solver, mode='max', factor=0.1, patience=3, verbose=True,
114 |       threshold=1e-4)
115 |     self._train_loader, self._valid_loader = dataset.get_train_validation_data_loader(
116 |       resize_size=448,
117 |       batch_size=self._options['batch_size'],
118 |       random_seed=96,
119 |       validation_size=0,
120 |       object_boxes_dict=None,
121 |       show_sample=False,
122 |       augment=True
123 |     )
124 |     self._test_loader = dataset.get_test_data_loader(
125 |       resize_size=448,
126 |       batch_size=32,
127 |       object_boxes_dict=None
128 |     )
129 | 
130 |   def train(self):
131 |     """Train the network."""
132 |     print('Training.')
133 |     best_acc = 0.0
134 |     best_epoch = None
135 |     print('Epoch\tTrain loss\tTrain acc\tValid acc\tTest acc')
136 |     for t in range(self._options['epochs']):
137 |       epoch_loss = []
138 |       num_correct = 0
139 |       num_total = 0
140 |       for i, (_, X, y) in enumerate(self._train_loader, 0):
141 |         # Data.
142 |         X = torch.autograd.Variable(X.cuda())
143 |         y = torch.autograd.Variable(y.cuda(async=True))
144 | 
145 |         # Clear the existing gradients.
146 |         self._solver.zero_grad()
147 |         # Forward pass.
148 |         score = self._net(X)
149 |         loss = self._criterion(score, y)
150 |         epoch_loss.append(loss.data.item())
151 |         # Prediction.
152 |         _, prediction = torch.max(score.data, 1)
153 |         num_total += y.size(0)
154 |         num_correct += torch.sum(prediction == y.data).float()
155 |         # Backward pass.
156 |         loss.backward()
157 |         self._solver.step()
158 |       train_acc = 100.0 * num_correct / num_total
159 |       #valid_acc = 1.0 * self._accuracy(self._valid_loader)
160 |       test_acc = 1.0 * self._accuracy(self._test_loader)
161 |       self._scheduler.step(test_acc)
162 |       if test_acc > best_acc:
163 |         best_acc = test_acc
164 |         best_epoch = t + 1
165 |         print('*', end='')
166 |         # Save model onto disk.
167 |         torch.save(self._net.state_dict(),self._path)
168 |       print('%d\t%4.3f\t\t%4.2f%%\t\t%4.2f%%' %
169 |             (t+1, sum(epoch_loss) / len(epoch_loss), train_acc, test_acc))
170 |     print('Best at epoch %d, test accuaray %f' % (best_epoch, best_acc))
171 | 
172 |   def _accuracy(self, data_loader):
173 |     """Compute the train/test accuracy.
174 | 
175 |     Args:
176 |         data_loader: Train/Test DataLoader.
177 | 
178 |     Returns:
179 |         Train/Test accuracy in percentage.
180 |     """
181 |     self._net.train(False)
182 |     num_correct = 0
183 |     num_total = 0
184 |     for i, (_, X, y) in enumerate(data_loader, 0):
185 |       # Data.
186 |       X = torch.autograd.Variable(X.cuda())
187 |       y = torch.autograd.Variable(y.cuda(async=True))
188 | 
189 |       # Prediction.
190 |       score = self._net(X)
191 |       _, prediction = torch.max(score.data, 1)
192 |       num_total += y.size(0)
193 |       num_correct += torch.sum(prediction == y.data).float()
194 |     self._net.train(True)  # Set the model to training phase
195 |     return 100.0 * num_correct / num_total
196 | 
197 | def fc():
198 |   options = {
199 |     'base_lr': 1.0,
200 |     'batch_size': 64,
201 |     'epochs': 50,
202 |     'weight_decay': 1e-8,
203 |   }
204 |   pre_model_path = None
205 |   path_save='models/resnet34_fc_448.pth'
206 |   manager = BCNNManager(options, path_save, freeze=True, pre_model_path=pre_model_path)
207 |   manager.train()
208 | 
209 | def all_layers():
210 |   options = {
211 |     'base_lr': 0.01,
212 |     'batch_size': 32,
213 |     'epochs': 30,
214 |     'weight_decay': 1e-5,
215 |   }
216 |   pre_model_path = 'models/resnet34_fc_448.pth'
217 |   path_save='models/resnet34_all_448.pth'
218 |   manager = BCNNManager(options, path_save, freeze=False, pre_model_path=pre_model_path)
219 |   #manager.train()
220 | 
221 | if __name__ == '__main__':
222 |   #fc()
223 |   #torch.cuda.empty_cache()
224 |   all_layers()
225 | 
226 | import numpy as np
227 | import torch.nn as nn
228 | import torch.nn.functional as F
229 | class ScaledDotProductAttention(nn.Module):
230 |   ''' Scaled Dot-Product Attention '''
231 | 
232 |   def __init__(self, temperature, attn_dropout=0.1):
233 |     super().__init__()
234 |     self.temperature = temperature
235 |     self.dropout = nn.Dropout(attn_dropout)
236 |     self.softmax = nn.Softmax(dim=2)
237 | 
238 |   def forward(self, q, k, v, mask=None):
239 | 
240 |     attn = torch.bmm(q, k.transpose(1, 2))
241 |     attn = attn / self.temperature
242 | 
243 |     if mask is not None:
244 |       attn = attn.masked_fill(mask, -np.inf)
245 | 
246 |     attn = self.softmax(attn)
247 |     attn = self.dropout(attn)
248 |     output = torch.bmm(attn, v)
249 | 
250 |     return output, attn
251 | 
252 | class MultiHeadAttention(nn.Module):
253 |   ''' Multi-Head Attention module '''
254 | 
255 |   def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1):
256 |     super().__init__()
257 | 
258 |     self.n_head = n_head
259 |     self.d_k = d_k
260 |     self.d_v = d_v
261 | 
262 |     self.w_qs = nn.Linear(d_model, n_head * d_k)
263 |     self.w_ks = nn.Linear(d_model, n_head * d_k)
264 |     self.w_vs = nn.Linear(d_model, n_head * d_v)
265 |     nn.init.normal_(self.w_qs.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_k)))
266 |     nn.init.normal_(self.w_ks.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_k)))
267 |     nn.init.normal_(self.w_vs.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_v)))
268 | 
269 |     self.attention = ScaledDotProductAttention(temperature=np.power(d_k, 0.5))
270 |     self.layer_norm = nn.LayerNorm(d_model)
271 | 
272 |     self.fc = nn.Linear(n_head * d_v, d_model)
273 |     nn.init.xavier_normal_(self.fc.weight)
274 | 
275 |     self.dropout = nn.Dropout(dropout)
276 | 
277 | 
278 |   def forward(self, q, k, v, mask=None):
279 | 
280 |     d_k, d_v, n_head = self.d_k, self.d_v, self.n_head
281 | 
282 |     sz_b, len_q, _ = q.size() # batch, 79, 256
283 |     sz_b, len_k, _ = k.size()
284 |     sz_b, len_v, _ = v.size()
285 | 
286 |     residual = q
287 | 
288 |     q = self.w_qs(q).view(sz_b, len_q, n_head, d_k) # N, 79, 8, 64
289 |     k = self.w_ks(k).view(sz_b, len_k, n_head, d_k)
290 |     v = self.w_vs(v).view(sz_b, len_v, n_head, d_v)
291 | 
292 |     q = q.permute(2, 0, 1, 3).contiguous().view(-1, len_q, d_k) # (n*b) x lq x dk
293 |     k = k.permute(2, 0, 1, 3).contiguous().view(-1, len_k, d_k) # (n*b) x lk x dk
294 |     v = v.permute(2, 0, 1, 3).contiguous().view(-1, len_v, d_v) # (n*b) x lv x dv
295 | 
296 |     mask = mask.repeat(n_head, 1, 1) # (n*b) x .. x ..
297 |     output, attn = self.attention(q, k, v, mask=mask)
298 | 
299 |     output = output.view(n_head, sz_b, len_q, d_v)
300 |     output = output.permute(1, 2, 0, 3).contiguous().view(sz_b, len_q, -1) # b x lq x (n*dv)
301 | 
302 |     output = self.dropout(self.fc(output))
303 |     output = self.layer_norm(output + residual)
304 | 
305 |     return output, attn
306 | 


--------------------------------------------------------------------------------
/bilinear_resnet34_double.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import os
  4 | 
  5 | import torch
  6 | import torchvision
  7 | import time
  8 | import cub_200_2011 as dataset
  9 | 
 10 | torch.manual_seed(0)
 11 | torch.cuda.manual_seed_all(0)
 12 | 
 13 | class BCNN(torch.nn.Module):
 14 | 
 15 |     def __init__(self):
 16 |         """Declare all needed layers."""
 17 |         torch.nn.Module.__init__(self)
 18 |         resnet_model = torchvision.models.resnet34(pretrained=True)
 19 | 
 20 |         self.conv1 = resnet_model.conv1
 21 |         self.bn1 = resnet_model.bn1
 22 |         self.relu = resnet_model.relu
 23 |         self.maxpool = resnet_model.maxpool
 24 |         self.layer1 = resnet_model.layer1
 25 |         self.layer2 = resnet_model.layer2
 26 |         self.layer3 = resnet_model.layer3
 27 |         self.layer4 = resnet_model.layer4
 28 | 
 29 |         resnet_model = torchvision.models.resnet34(pretrained=True)
 30 |         self._conv1 = resnet_model.conv1
 31 |         self._bn1 = resnet_model.bn1
 32 |         self._relu = resnet_model.relu
 33 |         self._maxpool = resnet_model.maxpool
 34 |         self._layer1 = resnet_model.layer1
 35 |         self._layer2 = resnet_model.layer2
 36 |         self._layer3 = resnet_model.layer3
 37 |         self._layer4 = resnet_model.layer4
 38 |         # Linear classifier.
 39 |         self.fc = torch.nn.Linear(512**2, 200)
 40 |         # Initialize the fc layers.
 41 |         torch.nn.init.kaiming_normal_(self.fc.weight.data)
 42 |         if self.fc.bias is not None:
 43 |             torch.nn.init.constant_(self.fc.bias.data, val=0)
 44 | 
 45 |     def forward(self, X):
 46 | 
 47 |         N = X.size()[0]
 48 |         assert X.size() == (N, 3, 224, 224)
 49 |         x1 = self.conv1(X)
 50 |         x1 = self.bn1(x1)
 51 |         x1 = self.relu(x1)
 52 |         x1 = self.maxpool(x1)
 53 |         x1 = self.layer1(x1)
 54 |         x1 = self.layer2(x1)
 55 |         x1 = self.layer3(x1)
 56 |         x1 = self.layer4(x1)
 57 | 
 58 |         x2 = self._conv1(X)
 59 |         x2 = self._bn1(x2)
 60 |         x2 = self._relu(x2)
 61 |         x2 = self._maxpool(x2)
 62 |         x2 = self._layer1(x2)
 63 |         x2 = self._layer2(x2)
 64 |         x2 = self._layer3(x2)
 65 |         x2 = self._layer4(x2)
 66 |         assert x1.size() == (N, 512, 7, 7)
 67 |         x1 = x1.view(N, 512, 7**2)
 68 |         x2 = x2.view(N, 512, 7**2)
 69 |         X = torch.bmm(x1, torch.transpose(x2, 1, 2)) / (7**2)  # Bilinear
 70 |         assert X.size() == (N, 512, 512)
 71 |         X = X.view(N, 512**2)
 72 |         X = torch.sqrt(X + 1e-5)
 73 |         X = torch.nn.functional.normalize(X)
 74 |         X = self.fc(X)
 75 |         assert X.size() == (N, 200)
 76 |         return X
 77 |     def freeze_layers(self):
 78 |         # Freeze all previous layers.
 79 |         for param in self.conv1.parameters():
 80 |             param.requires_grad = False
 81 |         for param in self.bn1.parameters():
 82 |             param.requires_grad = False
 83 |         for param in self.layer1.parameters():
 84 |             param.requires_grad = False
 85 |         for param in self.layer2.parameters():
 86 |             param.requires_grad = False
 87 |         for param in self.layer3.parameters():
 88 |             param.requires_grad = False
 89 |         for param in self.layer4.parameters():
 90 |             param.requires_grad = False
 91 | 
 92 |         for param in self._conv1.parameters():
 93 |             param.requires_grad = False
 94 |         for param in self._bn1.parameters():
 95 |             param.requires_grad = False
 96 |         for param in self._layer1.parameters():
 97 |             param.requires_grad = False
 98 |         for param in self._layer2.parameters():
 99 |             param.requires_grad = False
100 |         for param in self._layer3.parameters():
101 |             param.requires_grad = False
102 |         for param in self._layer4.parameters():
103 |             param.requires_grad = False
104 | 
105 | class BCNNManager(object):
106 |     """Manager class to train bilinear CNN.
107 | 
108 |     Attributes:
109 |         _options: Hyperparameters.
110 |         _path: Useful paths.
111 |         _net: Bilinear CNN.
112 |         _criterion: Cross-entropy loss.
113 |         _solver: SGD with momentum.
114 |         _scheduler: Reduce learning rate by a fator of 0.1 when plateau.
115 |         _train_loader: Training data.
116 |         _test_loader: Testing data.
117 |     """
118 |     def __init__(self, options, path, freeze=True, pre_model_path=None):
119 |         """Prepare the network, criterion, solver, and data.
120 | 
121 |         Args:
122 |             options, dict: Hyperparameters.
123 |         """
124 |         print('Prepare the network and data.')
125 |         self._options = options
126 |         self._path = path
127 |         # Network.
128 |         self._net = torch.nn.DataParallel(BCNN()).cuda()
129 |         #print(self._net)
130 |         if freeze is True:
131 |             self._net.module.freeze_layers()
132 |         if pre_model_path is not None:
133 |             self._net.load_state_dict(torch.load(pre_model_path))
134 |         # Criterion.
135 |         self._criterion = torch.nn.CrossEntropyLoss().cuda()
136 |         # Solver.
137 |         if freeze is True:
138 |             self._solver = torch.optim.SGD(
139 |                 self._net.module.fc.parameters(), lr=self._options['base_lr'],
140 |                 momentum=0.9, weight_decay=self._options['weight_decay'])
141 |         else:
142 |             self._solver = torch.optim.SGD(
143 |                 self._net.parameters(), lr=self._options['base_lr'],
144 |                 momentum=0.9, weight_decay=self._options['weight_decay'])
145 |         self._scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
146 |             self._solver, mode='max', factor=0.1, patience=3, verbose=True,
147 |             threshold=1e-4)
148 |         self._train_loader, self._valid_loader = dataset.get_train_validation_data_loader(
149 |             resize_size=224,
150 |             batch_size=self._options['batch_size'],
151 |             random_seed=96,
152 |             validation_size=0,
153 |             object_boxes_dict=None,
154 |             show_sample=False,
155 |             augment=True
156 |         )
157 |         self._test_loader = dataset.get_test_data_loader(
158 |             resize_size=224,
159 |             batch_size=32,
160 |             object_boxes_dict=None
161 |         )
162 | 
163 |     def train(self):
164 |         """Train the network."""
165 |         print('Training.')
166 |         best_acc = 0.0
167 |         best_epoch = None
168 |         print('Epoch\tTrain loss\tTrain acc\tValid acc\tTest acc')
169 |         for t in range(self._options['epochs']):
170 |             epoch_loss = []
171 |             num_correct = 0
172 |             num_total = 0
173 |             for i, (_, X, y) in enumerate(self._train_loader, 0):
174 |                 # Data.
175 |                 X = torch.autograd.Variable(X.cuda())
176 |                 y = torch.autograd.Variable(y.cuda(async=True))
177 | 
178 |                 # Clear the existing gradients.
179 |                 self._solver.zero_grad()
180 |                 # Forward pass.
181 |                 score = self._net(X)
182 |                 loss = self._criterion(score, y)
183 |                 epoch_loss.append(loss.data.item())
184 |                 # Prediction.
185 |                 _, prediction = torch.max(score.data, 1)
186 |                 num_total += y.size(0)
187 |                 num_correct += torch.sum(prediction == y.data).float()
188 |                 # Backward pass.
189 |                 loss.backward()
190 |                 self._solver.step()
191 |             train_acc = 100.0 * num_correct / num_total
192 |             #valid_acc = 1.0 * self._accuracy(self._valid_loader)
193 |             test_acc = 1.0 * self._accuracy(self._test_loader)
194 |             self._scheduler.step(test_acc)
195 |             if test_acc > best_acc:
196 |                 best_acc = test_acc
197 |                 best_epoch = t + 1
198 |                 print('*', end='')
199 |                 # Save model onto disk.
200 |                 torch.save(self._net.state_dict(),self._path)
201 |             print('%d\t%4.3f\t\t%4.2f%%\t\t%4.2f%%' %
202 |                   (t+1, sum(epoch_loss) / len(epoch_loss), train_acc, test_acc))
203 |         print('Best at epoch %d, test accuaray %f' % (best_epoch, best_acc))
204 | 
205 |     def _accuracy(self, data_loader):
206 |         """Compute the train/test accuracy.
207 | 
208 |         Args:
209 |             data_loader: Train/Test DataLoader.
210 | 
211 |         Returns:
212 |             Train/Test accuracy in percentage.
213 |         """
214 |         self._net.train(False)
215 |         num_correct = 0
216 |         num_total = 0
217 |         for i, (_, X, y) in enumerate(data_loader, 0):
218 |             # Data.
219 |             X = torch.autograd.Variable(X.cuda())
220 |             y = torch.autograd.Variable(y.cuda(async=True))
221 | 
222 |             # Prediction.
223 |             score = self._net(X)
224 |             _, prediction = torch.max(score.data, 1)
225 |             num_total += y.size(0)
226 |             num_correct += torch.sum(prediction == y.data).float()
227 |         self._net.train(True)  # Set the model to training phase
228 |         return 100.0 * num_correct / num_total
229 | 
230 | def fc():
231 |     options = {
232 |         'base_lr': 1.0,
233 |         'batch_size': 64,
234 |         'epochs': 55,
235 |         'weight_decay': 1e-8,
236 |     }
237 |     pre_model_path = None
238 |     path_save='models/resnet34_fc_double_224.pth'
239 |     manager = BCNNManager(options, path_save, freeze=True, pre_model_path=pre_model_path)
240 |     manager.train()
241 | 
242 | def all_layers():
243 |     options = {
244 |         'base_lr': 0.01,
245 |         'batch_size': 64,
246 |         'epochs': 30,
247 |         'weight_decay': 1e-5,
248 |     }
249 |     pre_model_path = 'models/resnet34_fc_double_224.pth'
250 |     path_save='models/resnet34_all_double_224.pth'
251 |     manager = BCNNManager(options, path_save, freeze=False, pre_model_path=pre_model_path)
252 |     manager.train()
253 | 
254 | if __name__ == '__main__':
255 |     #dataset.use_less_data=True
256 |     fc()
257 |     torch.cuda.empty_cache()
258 |     all_layers()


--------------------------------------------------------------------------------
/bilinear_resnet50_densenet121.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import os
  4 | 
  5 | import torch
  6 | import torchvision
  7 | import time
  8 | import cub_200_2011 as dataset
  9 | 
 10 | torch.manual_seed(0)
 11 | torch.cuda.manual_seed_all(0)
 12 | 
 13 | class BCNN(torch.nn.Module):
 14 | 
 15 |     def __init__(self):
 16 |         """Declare all needed layers."""
 17 |         torch.nn.Module.__init__(self)
 18 |         # resnet152
 19 |         resnet_model = torchvision.models.resnet50(pretrained=True)
 20 |         self.conv1 = resnet_model.conv1
 21 |         self.bn1 = resnet_model.bn1
 22 |         self.relu = resnet_model.relu
 23 |         self.maxpool = resnet_model.maxpool
 24 |         self.layer1 = resnet_model.layer1
 25 |         self.layer2 = resnet_model.layer2
 26 |         self.layer3 = resnet_model.layer3
 27 |         self.layer4 = resnet_model.layer4
 28 |         in_channels = 2048
 29 |         out_channels = 512
 30 |         self.conv2 = torch.nn.Conv2d(in_channels, out_channels, 1)
 31 |         self.bn2 = torch.nn.BatchNorm2d(out_channels)
 32 |         self.relu2 = torch.nn.ReLU(inplace=True)
 33 | 
 34 |         # densenet161
 35 |         self.features = torchvision.models.densenet121(pretrained=True).features
 36 |         self.in_features = torchvision.models.densenet121().classifier.in_features #2208
 37 |         self.conv = torch.nn.Conv2d(self.in_features, 512, 1)
 38 |         self.bn = torch.nn.BatchNorm2d(512)
 39 |         self.relu = torch.nn.ReLU(inplace=True)
 40 | 
 41 |         # Linear classifier.
 42 |         self.fc = torch.nn.Linear(512**2, 200)
 43 | 
 44 |         # Initialize the fc layers.
 45 |         torch.nn.init.kaiming_normal_(self.fc.weight.data)
 46 |         if self.fc.bias is not None:
 47 |             torch.nn.init.constant_(self.fc.bias.data, val=0)
 48 | 
 49 |     def forward(self, X):
 50 | 
 51 |         N = X.size()[0]
 52 |         assert X.size() == (N, 3, 224, 224)
 53 |         # resnet
 54 |         x1 = self.conv1(X)
 55 |         x1 = self.bn1(x1)
 56 |         x1 = self.relu(x1)
 57 |         x1 = self.maxpool(x1)
 58 |         x1 = self.layer1(x1)
 59 |         x1 = self.layer2(x1)
 60 |         x1 = self.layer3(x1)
 61 |         x1 = self.layer4(x1)
 62 |         assert x1.size() == (N, 2048, 7, 7)
 63 |         x1 = self.conv2(x1)
 64 |         x1 = self.bn2(x1)
 65 |         x1 = self.relu2(x1)
 66 |         x1 = x1.view(N, 512, 7**2)
 67 | 
 68 |         # densenet
 69 |         x2 = self.features(X)
 70 |         assert x2.size() == (N, self.in_features, 7, 7)
 71 |         x2 = self.conv(x2)
 72 |         x2 = self.bn(x2)
 73 |         x2 = self.relu(x2)
 74 |         x2 = x2.view(N, 512, 7**2)
 75 | 
 76 |         X = torch.bmm(x1, torch.transpose(x2, 1, 2)) / (7**2)  # Bilinear
 77 |         assert X.size() == (N, 512, 512)
 78 |         X = X.view(N, 512**2)
 79 |         X = torch.sqrt(X + 1e-5)
 80 |         X = torch.nn.functional.normalize(X)
 81 |         X = self.fc(X)
 82 |         assert X.size() == (N, 200)
 83 |         return X
 84 | 
 85 |     def freeze_layers(self):
 86 |         # Freeze all previous layers.
 87 |         for param in self.conv1.parameters():
 88 |             param.requires_grad = False
 89 |         for param in self.bn1.parameters():
 90 |             param.requires_grad = False
 91 |         for param in self.layer1.parameters():
 92 |             param.requires_grad = False
 93 |         for param in self.layer2.parameters():
 94 |             param.requires_grad = False
 95 |         for param in self.layer3.parameters():
 96 |             param.requires_grad = False
 97 |         for param in self.layer4.parameters():
 98 |             param.requires_grad = False
 99 | 
100 |         for param in self.features.parameters():
101 |             param.requires_grad = False
102 | 
103 | 
104 | class BCNNManager(object):
105 |     """Manager class to train bilinear CNN.
106 | 
107 |     Attributes:
108 |         _options: Hyperparameters.
109 |         _path: Useful paths.
110 |         _net: Bilinear CNN.
111 |         _criterion: Cross-entropy loss.
112 |         _solver: SGD with momentum.
113 |         _scheduler: Reduce learning rate by a fator of 0.1 when plateau.
114 |         _train_loader: Training data.
115 |         _test_loader: Testing data.
116 |     """
117 |     def __init__(self, options, path, freeze=True, pre_model_path=None):
118 |         """Prepare the network, criterion, solver, and data.
119 | 
120 |         Args:
121 |             options, dict: Hyperparameters.
122 |         """
123 |         print('Prepare the network and data.')
124 |         self._options = options
125 |         self._path = path
126 |         # Network.
127 |         self._net = torch.nn.DataParallel(BCNN()).cuda()
128 |         #print(self._net)
129 |         if freeze is True:
130 |             self._net.module.freeze_layers()
131 |         if pre_model_path is not None:
132 |             self._net.load_state_dict(torch.load(pre_model_path))
133 |         # Criterion.
134 |         self._criterion = torch.nn.CrossEntropyLoss().cuda()
135 |         # Solver.
136 |         if freeze is True:
137 |             self._solver = torch.optim.SGD(
138 |                 list(self._net.module.conv2.parameters())+list(self._net.module.bn2.parameters())+list(self._net.module.conv.parameters())+
139 |                 list(self._net.module.bn.parameters())+ list(self._net.module.fc.parameters()),
140 |                 lr=self._options['base_lr'],
141 |                 momentum=0.9, weight_decay=self._options['weight_decay'])
142 |         else:
143 |             self._solver = torch.optim.SGD(
144 |                 self._net.parameters(), lr=self._options['base_lr'],
145 |                 momentum=0.9, weight_decay=self._options['weight_decay'])
146 |         self._scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
147 |             self._solver, mode='max', factor=0.1, patience=3, verbose=True,
148 |             threshold=1e-4)
149 |         self._train_loader, self._valid_loader = dataset.get_train_validation_data_loader(
150 |             resize_size=224,
151 |             batch_size=self._options['batch_size'],
152 |             random_seed=96,
153 |             validation_size=0,
154 |             object_boxes_dict=None,
155 |             show_sample=False,
156 |             augment=True
157 |         )
158 |         self._test_loader = dataset.get_test_data_loader(
159 |             resize_size=224,
160 |             batch_size=32,
161 |             object_boxes_dict=None
162 |         )
163 | 
164 |     def train(self):
165 |         """Train the network."""
166 |         print('Training.')
167 |         best_acc = 0.0
168 |         best_epoch = None
169 |         print('Epoch\tTrain loss\tTrain acc\tValid acc\tTest acc')
170 |         for t in range(self._options['epochs']):
171 |             epoch_loss = []
172 |             num_correct = 0
173 |             num_total = 0
174 |             for i, (_, X, y) in enumerate(self._train_loader, 0):
175 |                 # Data.
176 |                 X = torch.autograd.Variable(X.cuda())
177 |                 y = torch.autograd.Variable(y.cuda(async=True))
178 | 
179 |                 # Clear the existing gradients.
180 |                 self._solver.zero_grad()
181 |                 # Forward pass.
182 |                 score = self._net(X)
183 |                 loss = self._criterion(score, y)
184 |                 epoch_loss.append(loss.data.item())
185 |                 # Prediction.
186 |                 _, prediction = torch.max(score.data, 1)
187 |                 num_total += y.size(0)
188 |                 num_correct += torch.sum(prediction == y.data).float()
189 |                 # Backward pass.
190 |                 loss.backward()
191 |                 self._solver.step()
192 |             train_acc = 100.0 * num_correct / num_total
193 |             #valid_acc = 1.0 * self._accuracy(self._valid_loader)
194 |             test_acc = 1.0 * self._accuracy(self._test_loader)
195 |             self._scheduler.step(test_acc)
196 |             if test_acc > best_acc:
197 |                 best_acc = test_acc
198 |                 best_epoch = t + 1
199 |                 print('*', end='')
200 |                 # Save model onto disk.
201 |                 torch.save(self._net.state_dict(),self._path)
202 |             print('%d\t%4.3f\t\t%4.2f%%\t\t%4.2f%%' %
203 |                   (t+1, sum(epoch_loss) / len(epoch_loss), train_acc, test_acc))
204 |         print('Best at epoch %d, test accuaray %f' % (best_epoch, best_acc))
205 | 
206 |     def _accuracy(self, data_loader):
207 |         """Compute the train/test accuracy.
208 | 
209 |         Args:
210 |             data_loader: Train/Test DataLoader.
211 | 
212 |         Returns:
213 |             Train/Test accuracy in percentage.
214 |         """
215 |         self._net.train(False)
216 |         num_correct = 0
217 |         num_total = 0
218 |         for i, (_, X, y) in enumerate(data_loader, 0):
219 |             # Data.
220 |             X = torch.autograd.Variable(X.cuda())
221 |             y = torch.autograd.Variable(y.cuda(async=True))
222 | 
223 |             # Prediction.
224 |             score = self._net(X)
225 |             _, prediction = torch.max(score.data, 1)
226 |             num_total += y.size(0)
227 |             num_correct += torch.sum(prediction == y.data).float()
228 |         self._net.train(True)  # Set the model to training phase
229 |         return 100.0 * num_correct / num_total
230 | 
231 | def fc():
232 |     options = {
233 |         'base_lr': 1.0,
234 |         'batch_size': 64,
235 |         'epochs': 55,
236 |         'weight_decay': 1e-8,
237 |     }
238 |     pre_model_path = None
239 |     path_save='models/resnet_densenet_fc_224.pth'
240 |     manager = BCNNManager(options, path_save, freeze=True, pre_model_path=pre_model_path)
241 |     manager.train()
242 | 
243 | def all_layers():
244 |     options = {
245 |         'base_lr': 0.01,
246 |         'batch_size': 16,
247 |         'epochs': 30,
248 |         'weight_decay': 1e-5,
249 |     }
250 |     pre_model_path = 'models/resnet_densenet_fc_224.pth'
251 |     path_save='models/resnet_densenet_all_224.pth'
252 |     manager = BCNNManager(options, path_save, freeze=False, pre_model_path=pre_model_path)
253 |     manager.train()
254 | 
255 | if __name__ == '__main__':
256 |     #dataset.use_less_data=True
257 |     fc()
258 |     torch.cuda.empty_cache()
259 |     all_layers()


--------------------------------------------------------------------------------
/bilinear_vgg16.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import os
  4 | 
  5 | import torch
  6 | import torchvision
  7 | import time
  8 | import cub_200_2011 as dataset
  9 | 
 10 | torch.manual_seed(0)
 11 | torch.cuda.manual_seed_all(0)
 12 | 
 13 | class BCNN(torch.nn.Module):
 14 | 
 15 |     def __init__(self):
 16 |         """Declare all needed layers."""
 17 |         torch.nn.Module.__init__(self)
 18 |         # Convolution and pooling layers of VGG-16.
 19 |         self.features = torchvision.models.vgg16(pretrained=True).features
 20 |         self.features = torch.nn.Sequential(*list(self.features.children())
 21 |         [:-1])  # Remove pool5.
 22 |         # Linear classifier.
 23 |         self.fc = torch.nn.Linear(512**2, 200)
 24 | 
 25 |         # Initialize the fc layers.
 26 |         torch.nn.init.kaiming_normal_(self.fc.weight.data)
 27 |         if self.fc.bias is not None:
 28 |             torch.nn.init.constant_(self.fc.bias.data, val=0)
 29 | 
 30 |     def forward(self, X):
 31 | 
 32 |         N = X.size()[0]
 33 |         assert X.size() == (N, 3, 448, 448)
 34 |         X = self.features(X)
 35 |         assert X.size() == (N, 512, 28, 28)
 36 |         X = X.view(N, 512, 28**2)
 37 |         X = torch.bmm(X, torch.transpose(X, 1, 2)) / (28**2)  # Bilinear
 38 |         assert X.size() == (N, 512, 512)
 39 |         X = X.view(N, 512**2)
 40 |         X = torch.sqrt(X + 1e-5)
 41 |         X = torch.nn.functional.normalize(X)
 42 |         X = self.fc(X)
 43 |         assert X.size() == (N, 200)
 44 |         return X
 45 |     def freeze_layers(self):
 46 |         # Freeze all previous layers.
 47 |         for param in self.features.parameters():
 48 |             param.requires_grad = False
 49 | 
 50 | class BCNNManager(object):
 51 |     """Manager class to train bilinear CNN.
 52 | 
 53 |     Attributes:
 54 |         _options: Hyperparameters.
 55 |         _path: Useful paths.
 56 |         _net: Bilinear CNN.
 57 |         _criterion: Cross-entropy loss.
 58 |         _solver: SGD with momentum.
 59 |         _scheduler: Reduce learning rate by a fator of 0.1 when plateau.
 60 |         _train_loader: Training data.
 61 |         _test_loader: Testing data.
 62 |     """
 63 |     def __init__(self, options, path, freeze=True, pre_model_path=None):
 64 |         """Prepare the network, criterion, solver, and data.
 65 | 
 66 |         Args:
 67 |             options, dict: Hyperparameters.
 68 |         """
 69 |         print('Prepare the network and data.')
 70 |         self._options = options
 71 |         self._path = path
 72 |         # Network.
 73 |         self._net = torch.nn.DataParallel(BCNN()).cuda()
 74 |         #print(self._net)
 75 |         if freeze is True:
 76 |             self._net.module.freeze_layers()
 77 |         if pre_model_path is not None:
 78 |             self._net.load_state_dict(torch.load(pre_model_path))
 79 |         # Criterion.
 80 |         self._criterion = torch.nn.CrossEntropyLoss().cuda()
 81 |         # Solver.
 82 |         if freeze is True:
 83 |             self._solver = torch.optim.SGD(
 84 |                 self._net.module.fc.parameters(), lr=self._options['base_lr'],
 85 |                 momentum=0.9, weight_decay=self._options['weight_decay'])
 86 |         else:
 87 |             self._solver = torch.optim.SGD(
 88 |                 self._net.parameters(), lr=self._options['base_lr'],
 89 |                 momentum=0.9, weight_decay=self._options['weight_decay'])
 90 |         self._scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
 91 |             self._solver, mode='max', factor=0.1, patience=3, verbose=True,
 92 |             threshold=1e-4)
 93 |         self._train_loader, self._valid_loader = dataset.get_train_validation_data_loader(
 94 |             resize_size=448,
 95 |             batch_size=self._options['batch_size'],
 96 |             random_seed=96,
 97 |             validation_size=0,
 98 |             object_boxes_dict=None,
 99 |             show_sample=False,
100 |             augment=True
101 |         )
102 |         self._test_loader = dataset.get_test_data_loader(
103 |             resize_size=448,
104 |             batch_size=32,
105 |             object_boxes_dict=None
106 |         )
107 | 
108 |     def train(self):
109 |         """Train the network."""
110 |         print('Training.')
111 |         best_acc = 0.0
112 |         best_epoch = None
113 |         print('Epoch\tTrain loss\tTrain acc\tTest acc')
114 |         for t in range(self._options['epochs']):
115 |             epoch_loss = []
116 |             num_correct = 0
117 |             num_total = 0
118 |             for i, (_, X, y) in enumerate(self._train_loader, 0):
119 |                 # Data.
120 |                 X = torch.autograd.Variable(X.cuda())
121 |                 y = torch.autograd.Variable(y.cuda(async=True))
122 | 
123 |                 # Clear the existing gradients.
124 |                 self._solver.zero_grad()
125 |                 # Forward pass.
126 |                 score = self._net(X)
127 |                 loss = self._criterion(score, y)
128 |                 epoch_loss.append(loss.data.item())
129 |                 # Prediction.
130 |                 _, prediction = torch.max(score.data, 1)
131 |                 num_total += y.size(0)
132 |                 num_correct += torch.sum(prediction == y.data).float()
133 |                 # Backward pass.
134 |                 loss.backward()
135 |                 self._solver.step()
136 |             train_acc = 100.0 * num_correct / num_total
137 |             #valid_acc = 1.0 * self._accuracy(self._valid_loader)
138 |             test_acc = 1.0 * self._accuracy(self._test_loader)
139 |             self._scheduler.step(test_acc)
140 |             if test_acc > best_acc:
141 |                 best_acc = test_acc
142 |                 best_epoch = t + 1
143 |                 print('*', end='')
144 |                 # Save model onto disk.
145 |                 torch.save(self._net.state_dict(),self._path)
146 |             print('%d\t%4.3f\t\t%4.2f%%\t\t%4.2f%%' %
147 |                   (t+1, sum(epoch_loss) / len(epoch_loss), train_acc, test_acc))
148 |         print('Best at epoch %d, test accuaray %f' % (best_epoch, best_acc))
149 | 
150 |     def _accuracy(self, data_loader):
151 |         """Compute the train/test accuracy.
152 | 
153 |         Args:
154 |             data_loader: Train/Test DataLoader.
155 | 
156 |         Returns:
157 |             Train/Test accuracy in percentage.
158 |         """
159 |         self._net.train(False)
160 |         num_correct = 0
161 |         num_total = 0
162 |         for i, (_, X, y) in enumerate(data_loader, 0):
163 |             # Data.
164 |             X = torch.autograd.Variable(X.cuda())
165 |             y = torch.autograd.Variable(y.cuda(async=True))
166 | 
167 |             # Prediction.
168 |             score = self._net(X)
169 |             _, prediction = torch.max(score.data, 1)
170 |             num_total += y.size(0)
171 |             num_correct += torch.sum(prediction == y.data).float()
172 |         self._net.train(True)  # Set the model to training phase
173 |         return 100.0 * num_correct / num_total
174 | 
175 | def fc():
176 |     options = {
177 |         'base_lr': 1.0,
178 |         'batch_size': 64,
179 |         'epochs': 55,
180 |         'weight_decay': 1e-8,
181 |     }
182 |     pre_model_path = None
183 |     path_save='models/vgg16_fc.pth'
184 |     manager = BCNNManager(options, path_save, freeze=True, pre_model_path=pre_model_path)
185 |     manager.train()
186 | 
187 | def all_layers():
188 |     options = {
189 |         'base_lr': 0.01,
190 |         'batch_size': 32,
191 |         'epochs': 30,
192 |         'weight_decay': 1e-5,
193 |     }
194 |     pre_model_path = 'models/vgg16_fc.pth'
195 |     path_save='models/vgg16_all.pth'
196 |     manager = BCNNManager(options, path_save, freeze=False, pre_model_path=pre_model_path)
197 |     manager.train()
198 | 
199 | if __name__ == '__main__':
200 |     fc()
201 |     torch.cuda.empty_cache()
202 |     all_layers()


--------------------------------------------------------------------------------
/bilinear_vgg16_double.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import os
  4 | 
  5 | import torch
  6 | import torchvision
  7 | import time
  8 | import cub_200_2011 as dataset
  9 | 
 10 | torch.manual_seed(0)
 11 | torch.cuda.manual_seed_all(0)
 12 | 
 13 | class BCNN(torch.nn.Module):
 14 | 
 15 |     def __init__(self):
 16 |         """Declare all needed layers."""
 17 |         torch.nn.Module.__init__(self)
 18 |         # Convolution and pooling layers of VGG-16.
 19 |         self.features = torchvision.models.vgg16(pretrained=True).features
 20 |         self.features = torch.nn.Sequential(*list(self.features.children())
 21 |         [:-1])  # Remove pool5.
 22 |         self._features = torchvision.models.vgg16(pretrained=True).features
 23 |         self._features = torch.nn.Sequential(*list(self._features.children())
 24 |         [:-1])  # Remove pool5.
 25 |         # Linear classifier.
 26 |         self.fc = torch.nn.Linear(512**2, 200)
 27 | 
 28 |         # Initialize the fc layers.
 29 |         torch.nn.init.kaiming_normal_(self.fc.weight.data)
 30 |         if self.fc.bias is not None:
 31 |             torch.nn.init.constant_(self.fc.bias.data, val=0)
 32 | 
 33 |     def forward(self, X):
 34 | 
 35 |         N = X.size()[0]
 36 |         assert X.size() == (N, 3, 224, 224)
 37 |         X1 = self.features(X)
 38 |         X2 = self._features(X)
 39 |         assert X1.size() == (N, 512, 14, 14)
 40 |         X1 = X1.view(N, 512, 14**2)
 41 |         X2 = X2.view(N, 512, 14**2)
 42 |         X = torch.bmm(X1, torch.transpose(X2, 1, 2)) / (14**2)  # Bilinear
 43 |         assert X.size() == (N, 512, 512)
 44 |         X = X.view(N, 512**2)
 45 |         X = torch.sqrt(X + 1e-5)
 46 |         X = torch.nn.functional.normalize(X)
 47 |         X = self.fc(X)
 48 |         assert X.size() == (N, 200)
 49 |         return X
 50 |     def freeze_layers(self):
 51 |         # Freeze all previous layers.
 52 |         for param in self.features.parameters():
 53 |             param.requires_grad = False
 54 |         for param in self._features.parameters():
 55 |             param.requires_grad = False
 56 | class BCNNManager(object):
 57 |     """Manager class to train bilinear CNN.
 58 | 
 59 |     Attributes:
 60 |         _options: Hyperparameters.
 61 |         _path: Useful paths.
 62 |         _net: Bilinear CNN.
 63 |         _criterion: Cross-entropy loss.
 64 |         _solver: SGD with momentum.
 65 |         _scheduler: Reduce learning rate by a fator of 0.1 when plateau.
 66 |         _train_loader: Training data.
 67 |         _test_loader: Testing data.
 68 |     """
 69 |     def __init__(self, options, path, freeze=True, pre_model_path=None):
 70 |         """Prepare the network, criterion, solver, and data.
 71 | 
 72 |         Args:
 73 |             options, dict: Hyperparameters.
 74 |         """
 75 |         print('Prepare the network and data.')
 76 |         self._options = options
 77 |         self._path = path
 78 |         # Network.
 79 |         self._net = torch.nn.DataParallel(BCNN()).cuda()
 80 |         #print(self._net)
 81 |         if freeze is True:
 82 |             self._net.module.freeze_layers()
 83 |         if pre_model_path is not None:
 84 |             self._net.load_state_dict(torch.load(pre_model_path))
 85 |         # Criterion.
 86 |         self._criterion = torch.nn.CrossEntropyLoss().cuda()
 87 |         # Solver.
 88 |         if freeze is True:
 89 |             self._solver = torch.optim.SGD(
 90 |                 self._net.module.fc.parameters(), lr=self._options['base_lr'],
 91 |                 momentum=0.9, weight_decay=self._options['weight_decay'])
 92 |         else:
 93 |             self._solver = torch.optim.SGD(
 94 |                 self._net.parameters(), lr=self._options['base_lr'],
 95 |                 momentum=0.9, weight_decay=self._options['weight_decay'])
 96 |         self._scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
 97 |             self._solver, mode='max', factor=0.1, patience=3, verbose=True,
 98 |             threshold=1e-4)
 99 |         self._train_loader, self._valid_loader = dataset.get_train_validation_data_loader(
100 |             resize_size=224,
101 |             batch_size=self._options['batch_size'],
102 |             random_seed=96,
103 |             validation_size=0,
104 |             object_boxes_dict=None,
105 |             show_sample=False,
106 |             augment=True
107 |         )
108 |         self._test_loader = dataset.get_test_data_loader(
109 |             resize_size=224,
110 |             batch_size=32,
111 |             object_boxes_dict=None
112 |         )
113 | 
114 |     def train(self):
115 |         """Train the network."""
116 |         print('Training.')
117 |         best_acc = 0.0
118 |         best_epoch = None
119 |         print('Epoch\tTrain loss\tTrain acc\tTest acc')
120 |         for t in range(self._options['epochs']):
121 |             epoch_loss = []
122 |             num_correct = 0
123 |             num_total = 0
124 |             for i, (_, X, y) in enumerate(self._train_loader, 0):
125 |                 # Data.
126 |                 X = torch.autograd.Variable(X.cuda())
127 |                 y = torch.autograd.Variable(y.cuda(async=True))
128 | 
129 |                 # Clear the existing gradients.
130 |                 self._solver.zero_grad()
131 |                 # Forward pass.
132 |                 score = self._net(X)
133 |                 loss = self._criterion(score, y)
134 |                 epoch_loss.append(loss.data.item())
135 |                 # Prediction.
136 |                 _, prediction = torch.max(score.data, 1)
137 |                 num_total += y.size(0)
138 |                 num_correct += torch.sum(prediction == y.data).float()
139 |                 # Backward pass.
140 |                 loss.backward()
141 |                 self._solver.step()
142 |             train_acc = 100.0 * num_correct / num_total
143 |             #valid_acc = 1.0 * self._accuracy(self._valid_loader)
144 |             test_acc = 1.0 * self._accuracy(self._test_loader)
145 |             self._scheduler.step(test_acc)
146 |             if test_acc > best_acc:
147 |                 best_acc = test_acc
148 |                 best_epoch = t + 1
149 |                 print('*', end='')
150 |                 # Save model onto disk.
151 |                 torch.save(self._net.state_dict(),self._path)
152 |             print('%d\t%4.3f\t\t%4.2f%%\t\t%4.2f%%' %
153 |                   (t+1, sum(epoch_loss) / len(epoch_loss), train_acc, test_acc))
154 |         print('Best at epoch %d, test accuaray %f' % (best_epoch, best_acc))
155 | 
156 |     def _accuracy(self, data_loader):
157 |         """Compute the train/test accuracy.
158 | 
159 |         Args:
160 |             data_loader: Train/Test DataLoader.
161 | 
162 |         Returns:
163 |             Train/Test accuracy in percentage.
164 |         """
165 |         self._net.train(False)
166 |         num_correct = 0
167 |         num_total = 0
168 |         for i, (_, X, y) in enumerate(data_loader, 0):
169 |             # Data.
170 |             X = torch.autograd.Variable(X.cuda())
171 |             y = torch.autograd.Variable(y.cuda(async=True))
172 | 
173 |             # Prediction.
174 |             score = self._net(X)
175 |             _, prediction = torch.max(score.data, 1)
176 |             num_total += y.size(0)
177 |             num_correct += torch.sum(prediction == y.data).float()
178 |         self._net.train(True)  # Set the model to training phase
179 |         return 100.0 * num_correct / num_total
180 | 
181 | def fc():
182 |     options = {
183 |         'base_lr': 1.0,
184 |         'batch_size': 64,
185 |         'epochs': 55,
186 |         'weight_decay': 1e-8,
187 |     }
188 |     pre_model_path = None
189 |     path_save='models/vgg16_fc_double.pth'
190 |     manager = BCNNManager(options, path_save, freeze=True, pre_model_path=pre_model_path)
191 |     manager.train()
192 | 
193 | def all_layers():
194 |     options = {
195 |         'base_lr': 0.01,
196 |         'batch_size': 32,
197 |         'epochs': 30,
198 |         'weight_decay': 1e-5,
199 |     }
200 |     pre_model_path = 'models/vgg16_fc_double.pth'
201 |     path_save='models/vgg16_all_double.pth'
202 |     manager = BCNNManager(options, path_save, freeze=False, pre_model_path=pre_model_path)
203 |     manager.train()
204 | 
205 | if __name__ == '__main__':
206 |     fc()
207 |     torch.cuda.empty_cache()
208 |     all_layers()


--------------------------------------------------------------------------------
/cub_200_2011.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Class of Dataset
  3 | 
  4 | call  get_train_validation_data_loader(resize_shape, batch_size, random_seed,
  5 |                                      augment=False, validation_size=0.3,
  6 |                                      object_boxes_dict=None,
  7 |                                      shuffle=True, show_sample=False)
  8 |       return  (train_loader, valid_loader)
  9 | call  get_test_data_loader(resize_shape, batch_size, object_boxes_dict=None, shuffle=True) 
 10 |       return (test_loader)
 11 | 
 12 | """
 13 | 
 14 | 
 15 | import os
 16 | 
 17 | import torch
 18 | import numpy as np
 19 | 
 20 | import utils
 21 | 
 22 | from PIL import Image
 23 | from torchvision import transforms
 24 | from torch.utils.data import Dataset
 25 | from torch.utils.data.sampler import SubsetRandomSampler
 26 | 
 27 | use_less_data = False  # this flag is just for debugging multiple-process task
 28 | less_data_count_train = 500
 29 | less_data_count_test = 400
 30 | 
 31 | class BirdsDataset(Dataset):
 32 | 
 33 |   dataset_url = 'http://www.vision.caltech.edu/visipedia-data/CUB-200-2011/CUB_200_2011.tgz'
 34 |   tar_file_name = 'CUB_200_2011.tgz'
 35 |   root_dir = 'CUB_200_2011'
 36 |   img_dir = 'images'
 37 |   data_split_file_name = 'train_test_split.txt'
 38 |   image_path_file_name = 'images.txt'
 39 |   image_label_file_name = 'image_class_labels.txt'
 40 |   classes_file_name = 'classes.txt'
 41 | 
 42 |   def __init__(self, object_boxes_dict=None, train=True, transform=None):
 43 |     self.train = train
 44 |     self.transform = transform
 45 |     self.download()  # download before loading
 46 | 
 47 |     train_indexes, test_indexes = self._get_train_test_indexes()
 48 |     img_label_dict = self._get_labels_of_images()
 49 |     img_path_dict = self._get_path_of_images()
 50 |     count = 0
 51 |     if train:
 52 |       self.image_indexes = []
 53 |       self.train_data = []
 54 |       self.train_labels = []
 55 |       for i in train_indexes:
 56 |         if use_less_data and count == less_data_count_train:
 57 |           break
 58 |         self.image_indexes.append(i)
 59 |         img_path = os.path.join(self.root_dir, self.img_dir, img_path_dict[i])
 60 |         img = self.__get_image_data(i, img_path, object_boxes_dict)
 61 |         self.train_data.append(img)
 62 |         self.train_labels.append(img_label_dict[i])
 63 |         count += 1
 64 |     else:
 65 |       self.image_indexes = []
 66 |       self.test_data = []
 67 |       self.test_labels = []
 68 |       for i in test_indexes:
 69 |         if use_less_data and count == less_data_count_test:
 70 |           break
 71 |         self.image_indexes.append(i)
 72 |         img_path = os.path.join(self.root_dir, self.img_dir, img_path_dict[i])
 73 |         img = self.__get_image_data(i, img_path, object_boxes_dict)
 74 |         self.test_data.append(img)
 75 |         self.test_labels.append(img_label_dict[i])
 76 |         count += 1
 77 | 
 78 |   def __getitem__(self, index):
 79 |     if self.train:
 80 |       data = self.train_data[index]
 81 |       label = self.train_labels[index]
 82 |     else:
 83 |       data = self.test_data[index]
 84 |       label = self.test_labels[index]
 85 |     if self.transform is not None:
 86 |       data = self.transform(data)
 87 |     return self.image_indexes[index], data, label
 88 | 
 89 |   def __len__(self):
 90 |     if self.train:
 91 |       return len(self.train_data)
 92 |     else:
 93 |       return len(self.test_data)
 94 | 
 95 |   def download(self):
 96 |     if self.__check_exist():
 97 |       return
 98 |     utils.download_file(self.dataset_url, self.tar_file_name)
 99 |     utils.extract_tgz(self.tar_file_name)
100 | 
101 |   def __check_exist(self):
102 |     return os.path.exists(self.root_dir + '/images')
103 | 
104 |   def __get_image_data(self, img_idx, fpath, object_boxes_dict):
105 |     img = Image.open(fpath)
106 |     img = img.convert('RGB')  # it seems some of original images are png(???), which is ridiculous
107 |     if object_boxes_dict is not None:
108 |       box = object_boxes_dict[img_idx]  # (x, y, w, h)
109 |       box = (box[0], box[1], box[0] + box[2], box[1] + box[3])  # (left, upper, right, lower)
110 |       img = img.crop(box)
111 |     return img
112 | 
113 |   def _get_train_test_indexes(self):
114 |     fpath = os.path.join(self.root_dir, self.data_split_file_name)
115 |     train_indexes = []
116 |     test_indexes = []
117 |     with open(fpath, 'r') as file:
118 |       for line in file:
119 |         tmp = line.split(' ')
120 |         flag = tmp[1][0]
121 |         if flag == '1':
122 |           train_indexes.append(int(tmp[0]))
123 |         else:
124 |           test_indexes.append(int(tmp[0]))
125 |     return train_indexes, test_indexes
126 | 
127 |   def _get_path_of_images(self):
128 |     fpath = os.path.join(self.root_dir, self.image_path_file_name)
129 |     img_path_dict = {}
130 |     with open(fpath, 'r') as file:
131 |       for line in file:
132 |         tmp = line.split(' ')
133 |         img_path_dict[int(tmp[0])] = tmp[1].strip('\n')
134 |     return img_path_dict
135 | 
136 |   def _get_labels_of_images(self):
137 |     fpath = os.path.join(self.root_dir, self.image_label_file_name)
138 |     img_label_dict = {}
139 |     with open(fpath, 'r') as file:
140 |       for line in file:
141 |         tmp = line.split(' ')
142 |         img_label_dict[int(tmp[0])] = int(tmp[1]) - 1  # starts from 0, otherwise pytorch will throw an exception when training...
143 |     return img_label_dict
144 | 
145 |   def get_classes_names(self):
146 |     fpath = os.path.join(self.root_dir, self.classes_file_name)
147 |     classes_names = []
148 |     with open(fpath, 'r') as file:
149 |       for line in file:
150 |         tmp = line.split(' ')
151 |         classes_names.append(tmp[1].strip('\n'))
152 |     return classes_names
153 | 
154 | 
155 | def get_train_validation_data_loader(resize_size, batch_size, random_seed,
156 |                                      augment=False, validation_size=0.3,
157 |                                      object_boxes_dict=None,
158 |                                      shuffle=True, show_sample=False):
159 |   normalize = transforms.Normalize(
160 |     mean=[0.485, 0.456, 0.406],
161 |     std=[0.229, 0.224, 0.225],
162 |   )
163 |   if augment:
164 |     transforms_random_apply = transforms.RandomApply([
165 |       transforms.RandomChoice([
166 |         transforms.RandomHorizontalFlip(),
167 |         transforms.RandomVerticalFlip(),
168 |         transforms.RandomRotation(15),
169 |         transforms.RandomRotation(60)
170 |       ]),
171 |     ], p=0.4)
172 |     if isinstance(resize_size, int):
173 |       # shorter edges should be scaled to this size and original ratio will be kept
174 |       # as a result, we should also do a random crop
175 |       train_transform = transforms.Compose([
176 |         transforms.Resize(resize_size),
177 |         transforms_random_apply,
178 |         transforms.RandomCrop(resize_size),
179 |         transforms.ToTensor(),
180 |         normalize
181 |       ])
182 |     else:  # should be a tuple like (224, 224)
183 |       train_transform = transforms.Compose([
184 |         transforms.Resize(resize_size),
185 |         transforms_random_apply,
186 |         transforms.ToTensor(),
187 |         normalize
188 |       ])
189 |   else:
190 |     if isinstance(resize_size, int):
191 |       train_transform = transforms.Compose([
192 |         transforms.Resize(resize_size),
193 |         transforms.RandomCrop(resize_size),
194 |         transforms.ToTensor(),
195 |         normalize
196 |       ])
197 |     else:
198 |       train_transform = transforms.Compose([
199 |         transforms.Resize(resize_size),
200 |         transforms.ToTensor(),
201 |         normalize
202 |       ])
203 | 
204 |   if isinstance(resize_size, int):  # for validation, we should keep all information of an image
205 |     resize_size = (resize_size, resize_size)
206 |   valid_transform = transforms.Compose([
207 |     transforms.Resize(resize_size),
208 |     transforms.ToTensor(),
209 |     normalize
210 |   ])
211 | 
212 |   train_dataset = BirdsDataset(
213 |     train=True, transform=train_transform, object_boxes_dict=object_boxes_dict)
214 |   valid_dataset = BirdsDataset(
215 |     train=True, transform=valid_transform, object_boxes_dict=object_boxes_dict)
216 | 
217 |   num_train = len(train_dataset)
218 |   indices = list(range(num_train))
219 |   split = int(np.floor(validation_size * num_train))
220 | 
221 |   if shuffle:
222 |     np.random.seed(random_seed)
223 |     np.random.shuffle(indices)
224 | 
225 |   train_idx, valid_idx = indices[split:], indices[:split]
226 |   train_sampler = SubsetRandomSampler(train_idx)
227 |   valid_sampler = SubsetRandomSampler(valid_idx)
228 | 
229 |   train_loader = torch.utils.data.DataLoader(
230 |     train_dataset, batch_size=batch_size, sampler=train_sampler, num_workers=4
231 |   )
232 |   valid_loader = torch.utils.data.DataLoader(
233 |     valid_dataset, batch_size=batch_size, sampler=valid_sampler, num_workers=4
234 |   )
235 | 
236 |   # visualize some images
237 |   if show_sample:
238 |     sample_loader = torch.utils.data.DataLoader(
239 |       train_dataset, batch_size=9, shuffle=shuffle
240 |     )
241 |     data_iter = iter(sample_loader)
242 |     images, labels = data_iter.next()
243 |     X = images.numpy().transpose([0, 2, 3, 1])
244 |     utils.plot_images(train_dataset.get_classes_names(), X, labels)
245 | 
246 |   return train_loader, valid_loader
247 | 
248 | 
249 | def get_test_data_loader(resize_size, batch_size, object_boxes_dict=None, shuffle=True):
250 |   normalize = transforms.Normalize(
251 |     mean=[0.485, 0.456, 0.406],
252 |     std=[0.229, 0.224, 0.225],
253 |   )
254 |   if isinstance(resize_size, int):
255 |     resize_size = (resize_size, resize_size)
256 |   transform = transforms.Compose([
257 |     transforms.Resize(resize_size),
258 |     transforms.ToTensor(),
259 |     normalize
260 |   ])
261 |   test_dataset = BirdsDataset(
262 |     train=False, transform=transform, object_boxes_dict=object_boxes_dict)
263 |   test_loader = torch.utils.data.DataLoader(
264 |     test_dataset, batch_size=batch_size, shuffle=shuffle
265 |   )
266 |   return test_loader
267 | 


--------------------------------------------------------------------------------
/helper.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | 1.Valuate trained model(s) on dataset(s) and 
  3 | combine their results into one output prediction
  4 | 
  5 | def evaluate(log, 
  6 |   models, 
  7 |   data_loaders, 
  8 |   set_name, 
  9 |   predict_weights=None,
 10 |   use_gpu=cuda.is_available(), 
 11 |   cuda_device_idx=0):
 12 | 
 13 |   return acc
 14 | 
 15 | 
 16 | 2.Train a model and evaluate it after training
 17 | 
 18 | train_and_evaluate(
 19 |   log = None,
 20 |   model_name = 'resnet152',
 21 |   pre_model = None,
 22 |   use_pretrained_params = True,
 23 |   fine_tune_all_layers = False,
 24 | 
 25 |   data_loaders=None,
 26 |   is_object_level=False,
 27 | 
 28 |   num_epochs = 4,
 29 |   learning_rate = 1e-3,
 30 |   weight_decay = 5e-4,
 31 |   train_batch_size = 32,
 32 |   eval_epoch_step = 4,
 33 | 
 34 |   use_gpu = cuda.is_available(),
 35 |   cuda_device_idx = 0,
 36 |   use_multiple_gpu = False,
 37 | 
 38 |   save_model = True
 39 | ):
 40 | 
 41 | return model, train_acc, valid_acc, test_acc, model_path
 42 | 
 43 | 
 44 | '''
 45 | 
 46 | import torch
 47 | import torch.nn as nn
 48 | import torch.cuda as cuda
 49 | import torch.optim as optim
 50 | from torchvision.models import *
 51 | import torchvision.transforms as transforms
 52 | 
 53 | import matplotlib.pyplot as plt
 54 | 
 55 | import os, time
 56 | 
 57 | import cub_200_2011 as dataset
 58 | import utils
 59 | 
 60 | def get_model_by_name(name, pretrained):
 61 |   if name == 'resnet18':    return resnet18(pretrained=pretrained)
 62 |   if name == 'resnet34':    return resnet34(pretrained=pretrained)
 63 |   if name == 'resnet50':    return resnet50(pretrained=pretrained)
 64 |   if name == 'resnet101':   return resnet101(pretrained=pretrained)
 65 |   if name == 'resnet152':   return resnet152(pretrained=pretrained)
 66 |   if name == 'vgg16':       return vgg16_bn(pretrained=pretrained)
 67 |   if name == 'vgg19':       return vgg19_bn(pretrained=pretrained)
 68 |   if name == 'inception':   return inception_v3(pretrained=pretrained)
 69 |   if name == 'densenet121': return densenet121(pretrained=pretrained)
 70 |   if name == 'densenet169': return densenet169(pretrained=pretrained)
 71 |   if name == 'densenet201': return densenet201(pretrained=pretrained)
 72 |   if name == 'densenet161': return densenet161(pretrained=pretrained)
 73 | 
 74 | #in  order to modify resnet.fc , used in 'replace_model_fc'
 75 | resnet_block_dict = {
 76 |   'resnet18': 1, 'resnet34': 1, 'resnet50': 1,
 77 |   'resnet101': 4, 'resnet152': 4,
 78 | }
 79 | def replace_model_fc(model_name, model):
 80 |   """ Replace fully connected layer of a neural network model in order to correct output class number
 81 | 
 82 |   :param model_name: model's name
 83 |   :param model: model itself, a pytorch's nn.Module object
 84 |   """
 85 |   if model_name.startswith('bilinear_densenet'):
 86 |     for param in model.conv.parameters():
 87 |       param.requires_grad = True
 88 |     for param in model.bn.parameters():
 89 |       param.requires_grad = True
 90 |     for param in model.fc.parameters():
 91 |       param.requires_grad = True
 92 |     return None
 93 | 
 94 |   # change the num_classes to 200
 95 |   if model_name.startswith('resnet'):
 96 |     model.fc = nn.Linear(512 * resnet_block_dict[model_name], 200)
 97 |   elif model_name.startswith('vgg'):
 98 |     pass  # todo find out how we can change num_classes to fine tune vgg
 99 |   elif model_name == 'inception':
100 |     model.fc = nn.Linear(2048, 200)
101 |   elif model_name.startswith('densenet'):
102 |     model.classifier = nn.Linear(model.classifier.in_features, 200)
103 |   elif model_name.startswith('bilinear_resnet'):
104 |     if model_name.endswith('152'):
105 |       model.conv2 = nn.Conv2d(2048, 512, 1)
106 |       model.bn2 = nn.BatchNorm2d(512)
107 |     model.fc = nn.Linear(512**2, 200)
108 | 
109 | 
110 | def get_model_parameters(model_name, model, pretrained, fine_tune_all_layers):
111 |   """ Get model's parameters to optimize
112 | 
113 |   :param model_name: model's name
114 |   :param model: model itself
115 |   :param pretrained: True if we should use pretrained model parameters
116 |   :param fine_tune_all_layers: True if we should fine tune all layers of the model
117 |   """
118 |   if not pretrained or fine_tune_all_layers:
119 |     return model.parameters()
120 |   else:  # fine tune only fully connected layer
121 |     if model_name.startswith('resnet') or model_name.startswith('inception'):
122 |       return model.fc.parameters()
123 |     elif model_name.startswith('densenet'):
124 |       return model.classifier.parameters()
125 |     elif model_name.startswith('bilinear_densenet'):
126 |       return list(model.conv.parameters()) + list(model.bn.parameters()) +  list(model.fc.parameters())
127 |     elif model_name.startswith('bilinear'):
128 |       if model_name.endswith('34'):
129 |         return model.fc.parameters()
130 |       elif model_name.endswith('152'):
131 |         return list(model.conv2.parameters()) + list(model.bn2.parameters()) + list(model.fc.parameters())
132 |     else:  # vgg
133 |       pass
134 | 
135 | 
136 | def save_model_parameters(parameters, file_name_prefix):
137 |   # parameters should come from model.state_dict()
138 |   if not os.path.exists('models/'):
139 |     os.makedirs('models/')
140 |   fp = 'models/' + file_name_prefix + '_' + time.strftime("%m-%d-%H-%M", time.localtime()) + '.pth'
141 |   torch.save(parameters, fp)
142 |   return fp
143 | 
144 | 
145 | def save_evaluation_result(prefix, epochs_arr, losses, epochs_step_arr, train_accuracies, valid_accuracies):
146 |   if not os.path.exists("result"):
147 |     os.mkdir("result")
148 | 
149 |   post_fix = time.strftime("%m-%d-%H-%M", time.localtime())
150 | 
151 |   plt.clf()  # clear existing figure content
152 |   plt.plot(epochs_arr, losses)
153 |   plt.xlabel('epoch')
154 |   plt.ylabel('loss')
155 |   plt.savefig("result/" + prefix + "_loss_" + post_fix + ".png")
156 | 
157 |   plt.clf()  # clear existing figure content
158 |   plt.plot(epochs_step_arr, train_accuracies)
159 |   plt.xlabel('epoch')
160 |   plt.ylabel('train accuracy')
161 |   plt.savefig("result/" + prefix + "_acc_train_" + post_fix + ".png")
162 | 
163 |   plt.clf()  # clear existing figure content
164 |   plt.plot(epochs_step_arr, valid_accuracies)
165 |   plt.xlabel('epoch')
166 |   plt.ylabel('validation accuracy')
167 |   plt.savefig("result/" + prefix + "_acc_valid_" + post_fix + ".png")
168 | 
169 | 
170 | def predict(model_glb, img_pil, resize_shape=(224, 224), model_obj=None, obj_bounding_box=None, predict_weights=None, use_gpu=cuda.is_available(), cuda_device_idx=0):
171 |   """ predict input's class
172 | 
173 |   :param model_glb: classification model for global level
174 |   :param img_pil: image as PIL.Image
175 |   :param resize_shape: resize shape
176 |   :param model_obj: classification model for object level
177 |   :param obj_bounding_box: as its name
178 |   :param predict_weights: weights of different levels' prediction, [0] should be for global level
179 |   :param use_gpu: as its name
180 |   :param cuda_device_idx: as its name
181 |   :return: the top5 [probabilities(???), indices] list
182 |   """
183 | 
184 |   img = img_pil.resize(resize_shape)
185 |   img_obj = None
186 |   if obj_bounding_box is not None:
187 |     img_obj = img_pil.crop(obj_bounding_box)
188 |     img_obj = img_obj.resize(resize_shape)
189 | 
190 |   normalize = transforms.Normalize(
191 |     mean=[0.485, 0.456, 0.406],
192 |     std=[0.229, 0.224, 0.225],
193 |   )
194 |   transform = transforms.Compose([
195 |     transforms.ToTensor(),
196 |     normalize
197 |   ])
198 | 
199 |   img_tensor_glb = transform(img)
200 |   # tmp = img_tensor_glb.numpy().transpose([1, 2, 0])
201 |   # plt.imshow(tmp)
202 |   # plt.show()
203 | 
204 |   img_tensor_glb = img_tensor_glb.unsqueeze(0)  # convert shape (3, 224, 224) to (1, 3, 224, 224)
205 |   img_tensor_obj = None
206 |   if model_obj is not None:
207 |     img_tensor_obj = transform(img_obj)
208 |     # tmp = img_tensor_obj.numpy().transpose([1, 2, 0])
209 |     # plt.imshow(tmp)
210 |     # plt.show()
211 |     img_tensor_obj = img_tensor_obj.unsqueeze(0)  # convert shape (3, 224, 224) to (1, 3, 224, 224)
212 | 
213 |   if use_gpu:
214 |     cuda_device = torch.device('cuda', cuda_device_idx)
215 |     img_tensor_glb = img_tensor_glb.cuda(cuda_device)
216 |     model_glb = model_glb.cuda(cuda_device_idx)
217 |     if model_obj is not None:
218 |       img_tensor_obj = img_tensor_obj.cuda(cuda_device)
219 |       model_obj = model_obj.cuda(cuda_device_idx)
220 | 
221 |   predict_prob_arr_glb = model_glb(img_tensor_glb)  # probabilities
222 |   predict_prob_arr = predict_prob_arr_glb
223 |   if model_obj is not None:
224 |     predict_prob_arr_obj = model_obj(img_tensor_obj)
225 |     predict_prob_arr = predict_weights[0] * predict_prob_arr_glb + predict_weights[1] * predict_prob_arr_obj
226 |   top5 = torch.topk(predict_prob_arr, 5)
227 |   if use_gpu:
228 |     top5 = [top5[0].cpu(), top5[1].cpu()]  # back to cpu so that we can detach them
229 |   probs = top5[0].detach().numpy()  # 2-d nparray
230 |   classes = top5[1].detach().numpy()
231 |   probs = probs[0]
232 |   classes = classes[0]
233 |   list_prob = [probs[0], probs[1], probs[2], probs[3], probs[4], ]
234 |   list_cls = [classes[0] + 1, classes[1] + 1, classes[2] + 1, classes[3] + 1, classes[4] + 1, ]
235 |   return [list_prob, list_cls]
236 | 
237 | 
238 | def evaluate(logger, models, data_loaders, set_name, predict_weights=None, use_gpu=cuda.is_available(), cuda_device_idx=0,
239 |              use_multiple_gpu=False):
240 |   """ Evaluate trained model(s) on dataset(s) and combine their results into one output prediction
241 |   Note: there should be an one-to-one match between models and data_loaders.
242 | 
243 |   :param logger: the utils.LoggerS object to print logs
244 |   :param models: a list of models
245 |   :param data_loaders: a list of dataset loaders.
246 |   :param set_name: dataset's name, can be 'train_set', 'validation set' or 'test set'
247 | 
248 |   :param predict_weights: a list of weights for each models' prediction result
249 |     Example: for a specific input, models[0] gives an output as [0.7, 0.3] (probability of class 0 and class 1),
250 |     and models[1] gives [0.4, 0.6]. If the predict_weights is [0.2, 0.8], then the final output should be
251 |     [0.7*0.2 + 0.4*0.8, 0.3*0.2 + 0.6*0.8], i.e. [0.46, 0.54], so the prediction is class 1
252 | 
253 |   :param use_gpu: use GPU to run the model or not
254 |   :param cuda_device_idx: an int value that indicates which cuda device that we want to use for inputs
255 | 
256 |   :return: prediction accuracy
257 |   """
258 |   for model in models:
259 |     model.eval()  # evaluation mode
260 | 
261 |   if predict_weights is None:
262 |     predict_weights = [1]
263 |   logger.info('computing classification accuracy on ' + set_name)
264 |   _begin_time = time.time()
265 |   #acc = correct_num / sample_num
266 |   correct_num = 0
267 |   sample_num = 0
268 |   has_multiple_gpu = cuda.device_count() > 1
269 |   cuda_device = None
270 |   if use_gpu:
271 |     cuda_device = torch.device('cuda', cuda_device_idx)
272 |   with torch.no_grad():
273 |     labels_dict = {}
274 |     predicts_dict = {}
275 |     for i in range(len(models)):  # each model is only valid on corresponding data loader
276 |       model = models[i]
277 |       data_loader = data_loaders[i]
278 |       for data in data_loader:
279 |         image_indexes, images, labels = data
280 |         if has_multiple_gpu and use_gpu:
281 |           if use_multiple_gpu:
282 |             images = torch.autograd.Variable(images.cuda())
283 |             labels = torch.autograd.Variable(labels.cuda(async=True))
284 |           else:
285 |             images = images.cuda(cuda_device)
286 |             labels = labels.cuda(cuda_device)  # shape is (batch_size, 1)
287 | 
288 |         batch_size = labels.size(0)
289 |         predict = model(images)  # shape is (batch_size, 200)
290 | 
291 |         if i == 0:
292 |           sample_num += labels.size(0)
293 |           for j in range(batch_size):
294 |             img_idx = image_indexes[j].item()
295 |             labels_dict[img_idx] = labels[j].item()
296 |             predicts_dict[img_idx] = predict_weights[i] * predict.data[j]
297 |         else:
298 |           for j in range(batch_size):
299 |             img_idx = image_indexes[j].item()
300 |             predict_data = predict_weights[i] * predict.data[j] + predicts_dict[img_idx]
301 |             predicts_dict[img_idx] = predict_data
302 | 
303 |   for img_idx in predicts_dict:
304 |     label = labels_dict[img_idx]
305 |     predict = predicts_dict[img_idx]
306 |     predict_cls = torch.argmax(predict)
307 |     if predict_cls == label:
308 |       correct_num += 1
309 | 
310 |   for model in models:
311 |     model.train()  # back to train mode
312 | 
313 |   acc = 100.0 * correct_num / sample_num
314 |   logger.info('accuracy: %.4f%%, cost time: %.4fs' % (acc, time.time() - _begin_time))
315 |   return acc
316 | 
317 | 
318 | # ----------------------- This is a very important method ----------------------
319 | 
320 | def train_and_evaluate(
321 |         logger = None,
322 | 
323 |         model_name = 'resnet152',
324 |         pre_model = None,
325 |         use_pretrained_params = True,
326 |         fine_tune_all_layers = False,
327 | 
328 |         data_loaders=None,
329 |         is_object_level=False,
330 | 
331 |         num_epochs = 4,
332 |         learning_rate = 1e-3,
333 |         use_scheduler = False,
334 |         weight_decay = 5e-4,
335 |         train_batch_size = 32,
336 |         eval_epoch_step = 4,
337 | 
338 |         use_gpu = cuda.is_available(),
339 |         cuda_device_idx = 0,
340 |         use_multiple_gpu = False,
341 | 
342 |         save_model = True
343 | ):
344 |   """ Train a model and evaluate it after training
345 | 
346 |   :param logger: the utils.LoggerS object to print logs onto file and console
347 |   :param model_name: model's name, used to create the model and help provide more detailed log
348 |   :param pre_model: if this is not None, we will train and evaluate on it instead of creating a new model
349 |   :param use_pretrained_params: True if we initialize the model with pretrained parameters
350 |   :param fine_tune_all_layers: True if we want to fine tune all layers of the model
351 | 
352 |   :param data_loaders: a list of data loaders for train, validation and test set. The order must be correct
353 |   :param is_object_level: as its name
354 | 
355 |   :param num_epochs: the number of training iterations on whole train set
356 |   :param learning_rate: as its name
357 |   :param weight_decay: as its name
358 |   :param train_batch_size: batch size of train set
359 |   :param eval_epoch_step: evaluation step
360 | 
361 |   :param use_gpu: use GPU to train/evaluate or not
362 |   :param cuda_device_idx: an int value that indicates which cuda device that we want to use for inputs and model
363 |   :param use_multiple_gpu: use multiple GPU to train/evaluate or not; todo currently this flag is useless
364 | 
365 |   :param save_model: True if we want to save the model that has best validation accuracy when training
366 | 
367 |   :return: trained model, accuracies on train, validation and test set,
368 |             and stored model path if :param save_model is set to True
369 |   """
370 |   # obj -- object
371 |   # glb -- global
372 |   # prtrn -- pretrain
373 |   # ep -- epoch
374 |   # bt -- batch_size
375 |   if is_object_level:
376 |     res_file_name_prefix = 'obj'
377 |   else:
378 |     res_file_name_prefix = 'glb'
379 |   res_file_name_prefix += '_' + model_name
380 |   if use_pretrained_params:
381 |     res_file_name_prefix += '_prtrn'
382 |     if fine_tune_all_layers:
383 |       res_file_name_prefix += 'All'
384 |   res_file_name_prefix += '_ep' + str(num_epochs) + '_bt' + str(train_batch_size) + '_' + str(learning_rate)
385 |   if logger is None:
386 |     logger = utils.get_logger(res_file_name_prefix)
387 | 
388 |   # get train/valid/test_loader
389 |   if data_loaders is None:
390 |     logger.info('start loading dataset')
391 |     begin_time = time.time()
392 |     train_loader, valid_loader = dataset.get_train_validation_data_loader(
393 |       resize_size=224,
394 |       batch_size=train_batch_size,
395 |       random_seed=96,
396 |       validation_size=0.2,
397 |       object_boxes_dict=None,
398 |       show_sample=False
399 |     )
400 |     test_loader = dataset.get_test_data_loader(
401 |       resize_size=224,
402 |       batch_size=32,
403 |       object_boxes_dict=None
404 |     )
405 |     logger.info('loading dataset costs ' + str(time.time() - begin_time))
406 |   else:
407 |     train_loader = data_loaders[0]
408 |     valid_loader = data_loaders[1]
409 |     test_loader  = data_loaders[2]
410 | 
411 |   # Create nn model
412 |   if pre_model is not None:
413 |     model = pre_model
414 |     # pre_model should have been trained
415 |     if not fine_tune_all_layers:
416 |       for param in model.parameters():
417 |         param.requires_grad = False
418 |       replace_model_fc(model_name, model)
419 |   else:
420 |     model = get_model_by_name(model_name, use_pretrained_params)
421 |     if use_pretrained_params and not fine_tune_all_layers:
422 |       # only fine tune fully connected layer, which means we should not upgrade network layers except for last one
423 |       for param in model.parameters():
424 |         param.requires_grad = False
425 |     replace_model_fc(model_name, model)
426 | 
427 |   has_multiple_gpu = cuda.device_count() > 1
428 | 
429 |   cuda_device = None  # declare this just in order to remove IDE warnings ...
430 |   if use_gpu:
431 |     if has_multiple_gpu and use_multiple_gpu: model = nn.DataParallel(model).cuda()
432 |     else :
433 |       model = model.cuda(cuda_device_idx)
434 |       cuda_device = torch.device('cuda', cuda_device_idx)
435 | 
436 | 
437 |   criterion = nn.CrossEntropyLoss().cuda()
438 |   if has_multiple_gpu and use_multiple_gpu: _model=model.module
439 |   else:_model=model
440 |   optimizer = optim.SGD(
441 |     get_model_parameters(model_name, _model, use_pretrained_params, fine_tune_all_layers),
442 |     lr=learning_rate,
443 |     momentum=0.9,
444 |     weight_decay=weight_decay
445 |   )
446 |   # Reduce learning rate when a metric has stopped improving.
447 |   if use_scheduler is True:
448 |       scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
449 |           optimizer, mode='min', factor=0.1, patience=3, verbose=True, threshold=1e-4
450 |       )
451 | 
452 | 
453 |   logger.info('start training')
454 |   train_cost_time = 0.0
455 |   epochs_arr = []
456 |   losses_arr = []
457 |   epochs_step_arr = []
458 |   train_acc_arr = []
459 |   valid_acc_arr = []
460 |   best_valid_acc = 0.0
461 |   best_valid_acc_model_params = None
462 |   for epoch in range(num_epochs):
463 |     running_loss = 0.0
464 |     batch_num = 0
465 |     for i, (_, inputs, labels) in enumerate(train_loader, 0):
466 |       begin_time = time.time()
467 |       # get the inputs
468 | 
469 |       if use_gpu:
470 |         if has_multiple_gpu and use_multiple_gpu:
471 |           inputs = torch.autograd.Variable(inputs.cuda())
472 |           labels = torch.autograd.Variable(labels.cuda(async=True))
473 |         else:
474 |           inputs = inputs.cuda(cuda_device)
475 |           labels = labels.cuda(cuda_device)
476 | 
477 | 
478 |       # zero the parameter gradients
479 |       optimizer.zero_grad()
480 | 
481 |       # forward + backward + optimize
482 |       outputs = model(inputs)
483 |       loss = criterion(outputs, labels)
484 |       loss.backward()
485 |       optimizer.step()
486 |       running_loss += loss.item()
487 |       # print statistics
488 |       logger.info('[%d, %5d] loss: %.6f' % (epoch + 1, i + 1, loss.item()))
489 |       cost_time_i = time.time() - begin_time
490 |       train_cost_time += cost_time_i
491 |       logger.info('cost time: %.4fs' % cost_time_i)
492 |       batch_num = i
493 |     if use_scheduler is True:
494 |         scheduler.step(running_loss)
495 |     epochs_arr.append(epoch + 1)
496 |     losses_arr.append(running_loss / batch_num)
497 |     if epoch == 0 or (epoch + 1) % eval_epoch_step == 0:  # compute classification accuracy on train and validation set
498 |       epochs_step_arr.append(epoch + 1)
499 |       logger.info('')
500 |       train_acc = evaluate(logger=logger, models=[model], data_loaders=[train_loader],
501 |                            set_name='train set', cuda_device_idx=cuda_device_idx, use_multiple_gpu=use_multiple_gpu)
502 |       train_acc_arr.append(train_acc)
503 |       valid_acc = evaluate(logger=logger, models=[model], data_loaders=[valid_loader],
504 |                            set_name='validation set', cuda_device_idx=cuda_device_idx, use_multiple_gpu=use_multiple_gpu)
505 |       valid_acc_arr.append(valid_acc)
506 |       if valid_acc > best_valid_acc:
507 |         best_valid_acc = valid_acc
508 |         best_valid_acc_model_params = model.state_dict()
509 |       logger.info('')
510 | 
511 |   logger.info('Finished Training, cost time: %.4fs' % train_cost_time)
512 |   logger.info('')
513 | 
514 |   test_acc = evaluate(logger=logger, models=[model], data_loaders=[test_loader],
515 |                       set_name='test set', cuda_device_idx=cuda_device_idx, use_multiple_gpu=use_multiple_gpu)
516 |   logger.info('')
517 | 
518 |   save_evaluation_result(res_file_name_prefix, epochs_arr, losses_arr, epochs_step_arr, train_acc_arr, valid_acc_arr)
519 | 
520 |   saved_model_path = None
521 |   if save_model:
522 |     logger.info('')
523 |     logger.info('saving model parameters')
524 |     if is_object_level:
525 |       model_file_name_prefix = 'obj_'
526 |     else:
527 |       model_file_name_prefix = 'glb_'
528 |     model_file_name_prefix += model_name + ('_acc%.4f' % best_valid_acc)
529 |     saved_model_path = save_model_parameters(best_valid_acc_model_params, model_file_name_prefix)
530 |     logger.info('parameters have been saved successfully to ' + saved_model_path)
531 |     logger.info('')
532 | 
533 |   return model, train_acc_arr[len(train_acc_arr) - 1], valid_acc_arr[len(valid_acc_arr) - 1], test_acc, saved_model_path
534 | 
535 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import cub_200_2011 as dataset
  2 | import helper, utils
  3 | import model_global as glb, model_object as obj
  4 | import time
  5 | import torch
  6 | import torch.multiprocessing as mp
  7 | 
  8 | train_batch_size = 32
  9 | test_batch_size = 32
 10 | random_seed = 96
 11 | validation_size = 0.1
 12 | predict_weights = [0.2, 0.8]
 13 | 
 14 | use_multiple_gpu = False  # run global model on 1 gpu and object-level model on another
 15 | pre_models = [None,None]
 16 | 
 17 | def print_summary_log(logger, trn_acc_glb, val_acc_glb, tst_acc_glb, trn_acc_obj, val_acc_obj, tst_acc_obj):
 18 |   logger.info('')
 19 | 
 20 |   logger.info('global-level model: ' + str(glb.model_name))
 21 |   logger.info('pretrained: ' + str(glb.use_pretrained_params))
 22 |   logger.info('fine tune all layers: ' + str(glb.fine_tune_all_layers))
 23 |   logger.info('epochs: ' + str(glb.num_epochs))
 24 |   logger.info('batch size: ' + str(train_batch_size))
 25 |   logger.info('learning rate: ' + str(glb.learning_rate))
 26 |   logger.info('prediction accuracy: %.4f%%, %.4f%%, %.4f%%' % (trn_acc_glb, val_acc_glb, tst_acc_glb))
 27 | 
 28 |   logger.info('')
 29 | 
 30 |   logger.info('object-level model: ' + str(obj.model_name))
 31 |   logger.info('pretrained: ' + str(obj.use_pretrained_params))
 32 |   logger.info('fine tune all layers: ' + str(obj.fine_tune_all_layers))
 33 |   logger.info('epochs: ' + str(obj.num_epochs))
 34 |   logger.info('batch size: ' + str(train_batch_size))
 35 |   logger.info('learning rate: ' + str(obj.learning_rate))
 36 |   logger.info('prediction accuracy: %.4f%%, %.4f%%, %.4f%%' % (trn_acc_obj, val_acc_obj, tst_acc_obj))
 37 | 
 38 | 
 39 | def evaluate(logger, models, train_loaders, validation_loaders, test_loaders):
 40 |   logger.info('')
 41 |   logger.info('evaluating model on multiple sets combining both global-level and object-level models\' predictions')
 42 |   logger.info('predict weights: ' + str(predict_weights[0]) + ', ' + str(predict_weights[1]))
 43 |   begin_time = time.time()
 44 | 
 45 |   helper.evaluate(
 46 |     logger=logger,
 47 |     models=models,
 48 |     data_loaders=train_loaders,
 49 |     set_name='train set',
 50 |     predict_weights=predict_weights
 51 |   )
 52 |   helper.evaluate(
 53 |     logger=logger,
 54 |     models=models,
 55 |     data_loaders=validation_loaders,
 56 |     set_name='validation set',
 57 |     predict_weights=predict_weights
 58 |   )
 59 |   helper.evaluate(
 60 |     logger=logger,
 61 |     models=models,
 62 |     data_loaders=test_loaders,
 63 |     set_name='test set',
 64 |     predict_weights=predict_weights
 65 |   )
 66 | 
 67 |   logger.info('evaluation has been done! total time: %.4fs' % (time.time() - begin_time))
 68 | 
 69 | 
 70 | def get_model_with_saved_parameters(model_path_glb, model_path_obj):
 71 |   model_glb = helper.get_model_by_name(glb.model_name, pretrained=False)
 72 |   helper.replace_model_fc(glb.model_name, model_glb)
 73 |   model_glb.load_state_dict(torch.load(model_path_glb))
 74 |   model_glb = model_glb.cuda()
 75 | 
 76 |   model_obj = helper.get_model_by_name(obj.model_name, pretrained=False)
 77 |   helper.replace_model_fc(obj.model_name, model_obj)
 78 |   model_obj.load_state_dict(torch.load(model_path_obj))
 79 |   model_obj = model_obj.cuda()
 80 | 
 81 |   return model_glb, model_obj
 82 | 
 83 | 
 84 | def run_on_single_gpu(logger, data_loaders_glb, data_loaders_obj,
 85 |                       train_loaders, valid_loaders, test_loaders, pre_models, fine_tune_all_layers, num_epochs):
 86 |   # if you want to change hyper-parameters like number of epochs or learning rate for each level's training,
 87 |   # please go to corresponding module file
 88 |   _, trn_acc_glb, val_acc_glb, tst_acc_glb, model_path_glb = glb.get_trained_model_global(
 89 |     logger=logger, data_loaders=data_loaders_glb, train_batch_size=train_batch_size,
 90 |     save_model=True, pre_model=pre_models[0], fine_tune_all_layers=fine_tune_all_layers, num_epochs=num_epochs)
 91 |   _, trn_acc_obj, val_acc_obj, tst_acc_obj, model_path_obj = obj.get_trained_model_object(
 92 |     logger=logger, data_loaders=data_loaders_obj, train_batch_size=train_batch_size,
 93 |     save_model=True, pre_model=pre_models[1], fine_tune_all_layers=fine_tune_all_layers, num_epochs=num_epochs)
 94 | 
 95 |   print_summary_log(logger, trn_acc_glb, val_acc_glb, tst_acc_glb, trn_acc_obj, val_acc_obj, tst_acc_obj)
 96 |   model_glb, model_obj = get_model_with_saved_parameters(model_path_glb, model_path_obj)
 97 |   evaluate(
 98 |     logger=logger,
 99 |     models=[model_glb, model_obj],
100 |     train_loaders=train_loaders,
101 |     validation_loaders=valid_loaders,
102 |     test_loaders=test_loaders
103 |   )
104 |   return model_glb, model_obj
105 | 
106 | def target_model_global(q_glb, data_loaders_glb, pre_model, fine_tune_all_layers, num_epochs):
107 |   logger_glb = glb.get_logger(train_batch_size, add_console_log_prefix=True)
108 |   logger_glb.info('target model global starts')
109 |   _, trn_acc_glb, val_acc_glb, tst_acc_glb, model_path_glb = glb.get_trained_model_global(
110 |     logger=logger_glb, data_loaders=data_loaders_glb, train_batch_size=train_batch_size,
111 |     cuda_device_idx=0, save_model=True, pre_model=pre_model, fine_tune_all_layers=fine_tune_all_layers,
112 |     num_epochs=num_epochs )
113 |   q_glb.put(trn_acc_glb)
114 |   q_glb.put(val_acc_glb)
115 |   q_glb.put(tst_acc_glb)
116 |   q_glb.put(model_path_glb)
117 |   logger_glb.info('target model global stops')
118 | 
119 | def target_model_object(q_obj, data_loaders_obj, pre_model, fine_tune_all_layers, num_epochs):
120 |   logger_obj = obj.get_logger(train_batch_size, add_console_log_prefix=True)
121 |   logger_obj.info('target model object starts')
122 |   _, trn_acc_obj, val_acc_obj, tst_acc_obj, model_path_obj = obj.get_trained_model_object(
123 |     logger=logger_obj, data_loaders=data_loaders_obj, train_batch_size=train_batch_size,
124 |     cuda_device_idx=1, save_model=True, pre_model=pre_model,fine_tune_all_layers=fine_tune_all_layers,
125 |     num_epochs=num_epochs )
126 |   q_obj.put(trn_acc_obj)
127 |   q_obj.put(val_acc_obj)
128 |   q_obj.put(tst_acc_obj)
129 |   q_obj.put(model_path_obj)
130 |   logger_obj.info('target model object stops')
131 | 
132 | def run_on_multiple_gpus(logger, data_loaders_glb, data_loaders_obj,
133 |                          train_loaders, valid_loaders, test_loaders, pre_models, fine_tune_all_layers, num_epochs):
134 | 
135 |   q_glb = mp.Queue()  # store models and accuracies
136 |   q_obj = mp.Queue()
137 |   process_glb = mp.Process(target=target_model_global,
138 |                            args=(q_glb, data_loaders_glb, pre_models[0], fine_tune_all_layers, num_epochs,))
139 |   process_obj = mp.Process(target=target_model_object,
140 |                            args=(q_obj, data_loaders_obj, pre_models[1], fine_tune_all_layers, num_epochs,))
141 | 
142 |   process_glb.start()
143 |   process_obj.start()
144 | 
145 |   process_glb.join()  # join current process(main process), then current process will stop until process_glb finishes
146 |   process_obj.join()
147 | 
148 |   trn_acc_glb    = q_glb.get() # FIFO
149 |   val_acc_glb    = q_glb.get()
150 |   tst_acc_glb    = q_glb.get()
151 |   model_path_glb = q_glb.get()
152 | 
153 |   trn_acc_obj    = q_obj.get()
154 |   val_acc_obj    = q_obj.get()
155 |   tst_acc_obj    = q_obj.get()
156 |   model_path_obj = q_obj.get()
157 | 
158 |   print_summary_log(logger, trn_acc_glb, val_acc_glb, tst_acc_glb, trn_acc_obj, val_acc_obj, tst_acc_obj)
159 |   model_glb, model_obj = get_model_with_saved_parameters(model_path_glb, model_path_obj)
160 |   evaluate(
161 |     logger=logger,
162 |     models=[model_glb, model_obj],
163 |     train_loaders=train_loaders,
164 |     validation_loaders=valid_loaders,
165 |     test_loaders=test_loaders
166 |   )
167 |   return model_glb, model_obj
168 | 
169 | if __name__ == "__main__":
170 |   log_file_name_prefix = 'combined'
171 |   logger = utils.get_logger(log_file_name_prefix)
172 | 
173 |   logger.info('start loading dataset')
174 |   begin_time = time.time()
175 |   train_loader_glb, valid_loader_glb = dataset.get_train_validation_data_loader(
176 |     resize_size=224,  # apply random crop for train set
177 |     batch_size=train_batch_size,
178 |     random_seed=random_seed,
179 |     augment=True,
180 |     validation_size=validation_size,
181 |     object_boxes_dict=None,
182 |     show_sample=False
183 |   )
184 |   test_loader_glb = dataset.get_test_data_loader(
185 |     resize_size=224,  # no any crop
186 |     batch_size=test_batch_size,
187 |     object_boxes_dict=None
188 |   )
189 | 
190 |   bounding_boxes = utils.get_annotated_bounding_boxes()
191 |   train_loader_obj, valid_loader_obj = dataset.get_train_validation_data_loader(
192 |     resize_size=(224, 224),  # for object level model, we don't need cropping any more!
193 |     batch_size=train_batch_size,
194 |     random_seed=random_seed,
195 |     augment=True,
196 |     validation_size=validation_size,
197 |     object_boxes_dict=bounding_boxes,
198 |     show_sample=False
199 |   )
200 |   test_loader_obj = dataset.get_test_data_loader(
201 |     resize_size=224,
202 |     batch_size=test_batch_size,
203 |     object_boxes_dict=bounding_boxes
204 |   )
205 |   logger.info('loading dataset costs %.4fs' % (time.time() - begin_time))
206 | 
207 |   data_loaders_glb = [train_loader_glb, valid_loader_glb, test_loader_glb]
208 |   data_loaders_obj = [train_loader_obj, valid_loader_obj, test_loader_obj]
209 | 
210 |   train_loaders = [train_loader_glb, train_loader_obj]
211 |   valid_loaders = [valid_loader_glb, valid_loader_obj]
212 |   test_loaders = [test_loader_glb, test_loader_obj]
213 |   pre_models = [None, None]
214 | 
215 |   # test: it seems ResNet is better for global model and DenseNet better for object-level model
216 |   glb.model_name = 'resnet152'
217 |   obj.model_name = 'densenet161'
218 |   fine_tune_all_layers=False
219 |   glb.use_multiple_gpu=False
220 |   obj.use_multiple_gpu=False
221 |   num_epochs = 160
222 |   if not use_multiple_gpu:
223 |     pre_models[0], pre_models[1] = run_on_single_gpu(logger, data_loaders_glb, data_loaders_obj,
224 |                                                      train_loaders, valid_loaders, test_loaders, pre_models, fine_tune_all_layers, num_epochs)
225 |   else:
226 |     mp.set_start_method('spawn')  # CUDA requires this
227 |     pre_models[0], pre_models[1] = run_on_multiple_gpus(logger, data_loaders_glb, data_loaders_obj,
228 |                                                         train_loaders, valid_loaders, test_loaders, pre_models, fine_tune_all_layers, num_epochs)
229 | 
230 |   fine_tune_all_layers = True
231 |   num_epochs = 120
232 |   if not use_multiple_gpu:
233 |     run_on_single_gpu(logger, data_loaders_glb, data_loaders_obj, train_loaders, valid_loaders, test_loaders, pre_models, fine_tune_all_layers, num_epochs)
234 |   else:
235 |     run_on_multiple_gpus(logger, data_loaders_glb, data_loaders_obj, train_loaders, valid_loaders, test_loaders, pre_models, fine_tune_all_layers, num_epochs)
236 | 
237 | 
238 | 


--------------------------------------------------------------------------------
/model_global.py:
--------------------------------------------------------------------------------
  1 | import cub_200_2011 as dataset
  2 | import helper, utils
  3 | import time
  4 | import torch
  5 | 
  6 | model_name = 'resnet152'
  7 | use_pretrained_params = True
  8 | fine_tune_all_layers = False
  9 | 
 10 | num_epochs = 100
 11 | # use to generate same train/validation data splits
 12 | random_seed = 96
 13 | # we use a part of train set as validation set
 14 | validation_size = 0.1
 15 | learning_rate = 8e-4
 16 | weight_decay = 5e-4
 17 | eval_epoch_step = 4
 18 | 
 19 | use_gpu = True
 20 | cuda_device_idx=0
 21 | use_multiple_gpu = False
 22 | 
 23 | 
 24 | def get_logger(train_batch_size, add_console_log_prefix=False):
 25 |   log_file_name_prefix = 'glb_' + model_name
 26 |   if use_pretrained_params:
 27 |     log_file_name_prefix += '_prtrn'
 28 |     if fine_tune_all_layers:
 29 |       log_file_name_prefix += 'All'
 30 |   log_file_name_prefix += '_ep' + str(num_epochs) + '_bt' + str(train_batch_size) + '_' + str(learning_rate)
 31 |   if add_console_log_prefix:
 32 |     return utils.get_logger(log_file_name_prefix, 'glb_' + model_name)
 33 |   else:
 34 |     return utils.get_logger(log_file_name_prefix)
 35 | 
 36 | 
 37 | # Why do we need param train_batch_size? Because log output must be precise
 38 | def get_trained_model_global(logger, data_loaders, train_batch_size, cuda_device_idx=cuda_device_idx, save_model=True,
 39 |                              pre_model=None, fine_tune_all_layers=fine_tune_all_layers, num_epochs=num_epochs):
 40 |   # if pre_model is None:
 41 |   #   use_pretrained_params = True
 42 |   #   fine_tune_all_layers = False
 43 |   # else :
 44 |   #   use_pretrained_params = False
 45 |   #   fine_tune_all_layers = True
 46 | 
 47 |   return helper.train_and_evaluate(
 48 |     logger=logger,
 49 | 
 50 |     model_name=model_name,
 51 |     pre_model=pre_model,
 52 |     use_pretrained_params=use_pretrained_params,
 53 |     fine_tune_all_layers=fine_tune_all_layers,
 54 | 
 55 |     data_loaders=data_loaders,
 56 |     is_object_level=False,
 57 | 
 58 |     num_epochs=num_epochs,
 59 |     learning_rate=learning_rate,
 60 |     weight_decay=weight_decay,
 61 |     train_batch_size=train_batch_size,  # this is actually useless when we explicitly provide data_loaders
 62 |     eval_epoch_step=eval_epoch_step,
 63 | 
 64 |     use_gpu=use_gpu,
 65 |     cuda_device_idx=cuda_device_idx,
 66 |     use_multiple_gpu=use_multiple_gpu,
 67 | 
 68 |     save_model=save_model
 69 |   )
 70 | 
 71 | 
 72 | if __name__ == "__main__":
 73 |   train_batch_size = 32
 74 |   test_batch_size = 32
 75 | 
 76 |   logger = get_logger(train_batch_size)
 77 | 
 78 |   logger.info('start loading dataset')
 79 |   begin_time = time.time()
 80 |   train_loader, valid_loader = dataset.get_train_validation_data_loader(
 81 |     resize_size=224,
 82 |     batch_size=train_batch_size,
 83 |     random_seed=random_seed,
 84 |     augment=True,
 85 |     validation_size=validation_size,
 86 |     object_boxes_dict=None,
 87 |     show_sample=False
 88 |   )
 89 |   test_loader = dataset.get_test_data_loader(
 90 |     resize_size=224,
 91 |     batch_size=test_batch_size,
 92 |     object_boxes_dict=None
 93 |   )
 94 |   logger.info('loading dataset costs %.4fs' % (time.time() - begin_time))
 95 | 
 96 |   # first training process for fc layer's parameters
 97 |   fine_tune_all_layers = False
 98 |   num_epochs = 100
 99 |   _, _, _, _, model_path = get_trained_model_global(
100 |     logger=logger,
101 |     data_loaders=[train_loader, valid_loader, test_loader],
102 |     train_batch_size=train_batch_size,
103 |     cuda_device_idx=cuda_device_idx,
104 |     fine_tune_all_layers=fine_tune_all_layers,
105 |     num_epochs=num_epochs
106 |   )
107 | 
108 |   logger.info('training for fc layer finished successfully')
109 |   logger.info('model: ' + model_name)
110 |   logger.info('pretrained: ' + str(use_pretrained_params))
111 |   logger.info('fine tune all layers: ' + str(fine_tune_all_layers))
112 |   logger.info('epochs: ' + str(num_epochs))
113 |   logger.info('batch size: ' + str(train_batch_size))
114 |   logger.info('learning rate: ' + str(learning_rate))
115 | 
116 | 
117 |   # second training process for all layers' parameters
118 |   fine_tune_all_layers = True
119 |   num_epochs = 60
120 |   pre_model = helper.get_model_by_name(model_name, False)
121 |   helper.replace_model_fc(model_name, pre_model)
122 |   pre_model.load_state_dict(torch.load(model_path))
123 | 
124 |   get_trained_model_global(
125 |     logger=logger,
126 |     data_loaders=[train_loader, valid_loader, test_loader],
127 |     train_batch_size=train_batch_size,
128 |     pre_model=pre_model,
129 |     cuda_device_idx=cuda_device_idx,
130 |     fine_tune_all_layers=fine_tune_all_layers,
131 |     num_epochs=num_epochs
132 |   )
133 | 
134 |   logger.info('training for all layers finished successfully')
135 |   logger.info('model: ' + model_name)
136 |   logger.info('pretrained: ' + str(use_pretrained_params))
137 |   logger.info('fine tune all layers: ' + str(fine_tune_all_layers))
138 |   logger.info('epochs: ' + str(num_epochs))
139 |   logger.info('batch size: ' + str(train_batch_size))
140 |   logger.info('learning rate: ' + str(learning_rate))


--------------------------------------------------------------------------------
/model_object.py:
--------------------------------------------------------------------------------
  1 | import cub_200_2011 as dataset
  2 | import helper, utils
  3 | import time
  4 | 
  5 | model_name = 'densenet161'
  6 | use_pretrained_params = True
  7 | fine_tune_all_layers = False
  8 | 
  9 | num_epochs = 100
 10 | # use to generate same train/validation data splits
 11 | random_seed = 96
 12 | # we use a part of train set as validation set
 13 | validation_size = 0.15
 14 | learning_rate = 8e-4
 15 | weight_decay = 5e-4
 16 | eval_epoch_step = 4
 17 | 
 18 | use_gpu = True
 19 | cuda_device_idx=0
 20 | use_multiple_gpu = False
 21 | 
 22 | 
 23 | def get_logger(train_batch_size, add_console_log_prefix=False):
 24 |   log_file_name_prefix = 'obj_' + model_name
 25 |   if use_pretrained_params:
 26 |     log_file_name_prefix += '_prtrn'
 27 |     if fine_tune_all_layers:
 28 |       log_file_name_prefix += 'All'
 29 |   log_file_name_prefix += '_ep' + str(num_epochs) + '_bt' + str(train_batch_size) + '_' + str(learning_rate)
 30 |   if add_console_log_prefix:
 31 |     return utils.get_logger(log_file_name_prefix, 'obj_' + model_name)
 32 |   else:
 33 |     return utils.get_logger(log_file_name_prefix)
 34 | 
 35 | 
 36 | # in fact I don't like writing train_batch_size here...QAQ
 37 | def get_trained_model_object(logger, data_loaders, train_batch_size, cuda_device_idx=cuda_device_idx, save_model=True,
 38 |                              pre_model=None, fine_tune_all_layers=fine_tune_all_layers, num_epochs=num_epochs):
 39 |   # if pre_model is None:
 40 |   #   use_pretrained_params = True
 41 |   #   fine_tune_all_layers = False
 42 |   # else :
 43 |   #   use_pretrained_params = False
 44 |   #   fine_tune_all_layers = True
 45 | 
 46 |   return helper.train_and_evaluate(
 47 |     logger=logger,
 48 | 
 49 |     model_name=model_name,
 50 |     pre_model=pre_model,
 51 |     use_pretrained_params=use_pretrained_params,
 52 |     fine_tune_all_layers=fine_tune_all_layers,
 53 | 
 54 |     data_loaders=data_loaders,
 55 |     is_object_level=True,
 56 | 
 57 |     num_epochs=num_epochs,
 58 |     learning_rate=learning_rate,
 59 |     weight_decay=weight_decay,
 60 |     train_batch_size=train_batch_size,
 61 |     eval_epoch_step=eval_epoch_step,
 62 | 
 63 |     use_gpu=use_gpu,
 64 |     cuda_device_idx=cuda_device_idx,
 65 |     use_multiple_gpu=use_multiple_gpu,
 66 | 
 67 |     save_model=save_model
 68 |   )
 69 | 
 70 | 
 71 | if __name__ == "__main__":
 72 |   train_batch_size = 32
 73 |   test_batch_size = 32
 74 | 
 75 |   logger = get_logger(train_batch_size)
 76 | 
 77 |   logger.info('start loading dataset')
 78 |   begin_time = time.time()
 79 |   bounding_boxes = utils.get_annotated_bounding_boxes()
 80 |   train_loader, valid_loader = dataset.get_train_validation_data_loader(
 81 |     resize_size=224,
 82 |     batch_size=train_batch_size,
 83 |     random_seed=random_seed,
 84 |     augment=True,
 85 |     validation_size=validation_size,
 86 |     object_boxes_dict=bounding_boxes,
 87 |     show_sample=False
 88 |   )
 89 |   test_loader = dataset.get_test_data_loader(
 90 |     resize_size=224,
 91 |     batch_size=test_batch_size,
 92 |     object_boxes_dict=bounding_boxes
 93 |   )
 94 |   logger.info('loading dataset costs %.4fs' % (time.time() - begin_time))
 95 | 
 96 |   get_trained_model_object(
 97 |     logger=logger,
 98 |     data_loaders=[train_loader, valid_loader, test_loader],
 99 |     train_batch_size=train_batch_size
100 |   )
101 | 
102 |   logger.info('model: ' + model_name)
103 |   logger.info('pretrained: ' + str(use_pretrained_params))
104 |   logger.info('fine tune all layers: ' + str(fine_tune_all_layers))
105 |   logger.info('epochs: ' + str(num_epochs))
106 |   logger.info('batch size: ' + str(train_batch_size))
107 |   logger.info('learning rate: ' + str(learning_rate))


--------------------------------------------------------------------------------
/model_test.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import os
  4 | 
  5 | import torch
  6 | import torchvision
  7 | import time
  8 | import cub_200_2011 as dataset
  9 | 
 10 | torch.manual_seed(0)
 11 | torch.cuda.manual_seed_all(0)
 12 | 
 13 | class BCNN(torch.nn.Module):
 14 | 
 15 |     def __init__(self):
 16 |         """Declare all needed layers."""
 17 |         torch.nn.Module.__init__(self)
 18 |         resnet_model = torchvision.models.resnet34(pretrained=False)
 19 |         self.conv1 = resnet_model.conv1
 20 |         self.bn1 = resnet_model.bn1
 21 |         self.relu = resnet_model.relu
 22 |         self.maxpool = resnet_model.maxpool
 23 |         self.layer1 = resnet_model.layer1
 24 |         self.layer2 = resnet_model.layer2
 25 |         self.layer3 = resnet_model.layer3
 26 |         self.layer4 = resnet_model.layer4
 27 |         # Linear classifier.
 28 |         self.fc = torch.nn.Linear(512**2, 200)
 29 |         # Initialize the fc layers.
 30 | 
 31 |     def forward(self, X):
 32 | 
 33 |         N = X.size()[0]
 34 |         assert X.size() == (N, 3, 448, 448)
 35 |         x = self.conv1(X)
 36 |         x = self.bn1(x)
 37 |         x = self.relu(x)
 38 |         x = self.maxpool(x)
 39 | 
 40 |         x = self.layer1(x)
 41 |         x = self.layer2(x)
 42 |         x = self.layer3(x)
 43 |         X = self.layer4(x)
 44 |         assert X.size() == (N, 512, 14, 14)
 45 |         X = X.view(N, 512, 14**2)
 46 |         X = torch.bmm(X, torch.transpose(X, 1, 2)) / (14**2)  # Bilinear
 47 |         assert X.size() == (N, 512, 512)
 48 |         X = X.view(N, 512**2)
 49 |         X = torch.sqrt(X + 1e-5)
 50 |         X = torch.nn.functional.normalize(X)
 51 |         X = self.fc(X)
 52 |         assert X.size() == (N, 200)
 53 |         return X
 54 |     def freeze_layers(self):
 55 |         # Freeze all previous layers.
 56 |         for param in self.conv1.parameters():
 57 |             param.requires_grad = False
 58 |         for param in self.bn1.parameters():
 59 |             param.requires_grad = False
 60 |         for param in self.layer1.parameters():
 61 |             param.requires_grad = False
 62 |         for param in self.layer2.parameters():
 63 |             param.requires_grad = False
 64 |         for param in self.layer3.parameters():
 65 |             param.requires_grad = False
 66 |         for param in self.layer4.parameters():
 67 |             param.requires_grad = False
 68 | class BCNNManager(object):
 69 |     """Manager class to train bilinear CNN.
 70 | 
 71 |     Attributes:
 72 |         _options: Hyperparameters.
 73 |         _path: Useful paths.
 74 |         _net: Bilinear CNN.
 75 |         _criterion: Cross-entropy loss.
 76 |         _solver: SGD with momentum.
 77 |         _scheduler: Reduce learning rate by a fator of 0.1 when plateau.
 78 |         _train_loader: Training data.
 79 |         _test_loader: Testing data.
 80 |     """
 81 |     def __init__(self, path):
 82 |         """Prepare the network, criterion, solver, and data.
 83 | 
 84 |         Args:
 85 |             options, dict: Hyperparameters.
 86 |         """
 87 |         print('Prepare the network and data.')
 88 |         self._path = path
 89 |         # Network.
 90 |         self._net = torch.nn.DataParallel(BCNN()).cuda()
 91 |         self._net.module.freeze_layers()
 92 |         self._net.load_state_dict(torch.load(self._path))
 93 | 
 94 |         self._test_loader = dataset.get_test_data_loader(
 95 |             resize_size=448,
 96 |             batch_size=32,
 97 |             object_boxes_dict=None
 98 |         )
 99 | 
100 |     def test(self):
101 |         """Train the network."""
102 |         print('Testing.')
103 |         test_acc = 1.0 * self._accuracy(self._test_loader)
104 |         print("Test acc: %.4f" % test_acc)
105 | 
106 |     def _accuracy(self, data_loader):
107 |         """Compute the train/test accuracy.
108 | 
109 |         Args:
110 |             data_loader: Train/Test DataLoader.
111 | 
112 |         Returns:
113 |             Train/Test accuracy in percentage.
114 |         """
115 |         self._net.train(False)
116 |         num_correct = 0
117 |         num_total = 0
118 |         for i, (_, X, y) in enumerate(data_loader, 0):
119 |             # Data.
120 |             X = torch.autograd.Variable(X.cuda())
121 |             y = torch.autograd.Variable(y.cuda(async=True))
122 | 
123 |             # Prediction.
124 |             score = self._net(X)
125 |             _, prediction = torch.max(score.data, 1)
126 |             num_total += y.size(0)
127 |             num_correct += torch.sum(prediction == y.data).float()
128 |         return 100.0 * num_correct / num_total
129 | 
130 | 
131 | def test():
132 | 
133 |     path_save='models/resnet_34_all.pth'
134 |     manager = BCNNManager(path_save)
135 |     manager.test()
136 | 
137 | if __name__ == '__main__':
138 |     #dataset.use_less_data=True
139 |     test()
140 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import sys, os, time
  2 | import logging
  3 | import requests
  4 | import tarfile
  5 | import matplotlib.pyplot as plt
  6 | plt.switch_backend('agg')
  7 | 
  8 | 
  9 | def download_file(url, filename):
 10 |   print('Downloading ' + filename + ' from ' + url)
 11 |   with open(filename, 'wb') as file:
 12 |     resp = requests.get(url, stream=True)
 13 |     # file.write(resp.content)
 14 |     # reference: https://stackoverflow.com/questions/15644964/python-progress-bar-and-downloads
 15 |     total_length = resp.headers.get('content-length')
 16 |     if total_length is None:  # no content length header
 17 |       file.write(resp.content)
 18 |     else:
 19 |       dl = 0
 20 |       total_length = int(total_length)
 21 |       for data in resp.iter_content(chunk_size=4096):
 22 |         file.write(data)
 23 |         dl += len(data)
 24 |         done = int(50 * dl / total_length)
 25 |         sys.stdout.write("\r[%s%s] %d%%" % ('=' * done, ' ' * (50 - done), done * 2))
 26 |         sys.stdout.flush()
 27 |   print()
 28 |   print(filename + ' has been downloaded successfully!')
 29 | 
 30 | 
 31 | def extract_tgz(filename):
 32 |   print('Extracting ' + filename + ' ...')
 33 |   tar = tarfile.open(filename, 'r:gz')
 34 |   tar.extractall()
 35 |   tar.close()
 36 |   print(filename + ' has been extracted successfully!')
 37 | 
 38 | 
 39 | def plot_images(class_names, images, classes_true, classes_pred=None):
 40 |   """
 41 |   Adapted from https://github.com/Hvass-Labs/TensorFlow-Tutorials/
 42 |   """
 43 |   fig, axes = plt.subplots(3, 3)
 44 |   for i, ax in enumerate(axes.flat):
 45 |     # plot img
 46 |     ax.imshow(images[i, :, :, :], interpolation='spline16')
 47 |     # show true & predicted classes
 48 |     cls_true_name = class_names[classes_true[i]]
 49 |     if classes_pred is None:
 50 |       xlabel = "{0} ({1})".format(cls_true_name, classes_true[i])
 51 |     else:
 52 |       cls_pred_name = class_names[classes_pred[i]]
 53 |       xlabel = "True: {0}\nPred: {1}".format(
 54 |         cls_true_name, cls_pred_name
 55 |       )
 56 |     ax.set_xlabel(xlabel)
 57 |     ax.set_xticks([])
 58 |     ax.set_yticks([])
 59 |   plt.show()
 60 | 
 61 | 
 62 | def get_annotated_bounding_boxes():
 63 |   fp = 'CUB_200_2011/bounding_boxes.txt'
 64 |   boxes = {}
 65 |   with open(fp, 'r') as file:
 66 |     for line in file:
 67 |       arr = line.split(' ')
 68 |       boxes[int(arr[0])] = (float(arr[1]), float(arr[2]), float(arr[3]), float(arr[4]))
 69 |   return boxes
 70 | 
 71 | 
 72 | # deprecated
 73 | def get_logging(log_file_name_prefix):
 74 |   if not os.path.exists('logs/'):
 75 |     os.makedirs('logs/')
 76 |   time_str = time.strftime("%m-%d-%H-%M", time.localtime())
 77 |   logging.basicConfig(level=logging.INFO,
 78 |                       format='%(asctime)s %(message)s',
 79 |                       datefmt='%m-%d %H:%M',
 80 |                       filename='logs/' + log_file_name_prefix + '_' + time_str + '.log')
 81 |   # define a Handler which writes INFO messages or higher to the sys.stderr
 82 |   # console = logging.StreamHandler()
 83 |   # console.setLevel(logging.DEBUG)
 84 |   # # set a format which is simpler for console use
 85 |   # formatter = logging.Formatter('%(message)s')
 86 |   # # tell the handler to use this format
 87 |   # console.setFormatter(formatter)
 88 |   # # add the handler to the root logger
 89 |   # logging.getLogger('').addHandler(console)
 90 |   return logging
 91 | 
 92 | 
 93 | class LoggerS:  # Logger S, Logger Plus, Logger X, Logger X Plus~
 94 | 
 95 |   def __init__(self, logging, console_msg_prefix=None):
 96 |     self.logging = logging
 97 |     self.console_msg_prefix = console_msg_prefix
 98 | 
 99 |   def info(self, msg):  # i stands for info
100 |     self.logging.info(msg)
101 |     if self.console_msg_prefix is None:
102 |       print(msg)
103 |     else:
104 |       print(self.console_msg_prefix + ' -> ' + msg)
105 | 
106 | 
107 | def get_logger(log_file_name_prefix, console_msg_prefix=None):
108 |   return LoggerS(get_logging(log_file_name_prefix), console_msg_prefix)


--------------------------------------------------------------------------------