├── README.md ├── adjust_weights.py ├── bilinear_densenet161.py ├── bilinear_resnet152.py ├── bilinear_resnet34.py ├── bilinear_resnet34_double.py ├── bilinear_resnet50_densenet121.py ├── bilinear_vgg16.py ├── bilinear_vgg16_double.py ├── cub_200_2011.py ├── helper.py ├── main.py ├── model_global.py ├── model_object.py ├── model_test.py └── utils.py /README.md: -------------------------------------------------------------------------------- 1 | # FGC_CUB-200-2011 2 | 3 | Fine Grained Image Classification on CUB-200-2011 4 | 5 | ## Environment 6 | 7 | We suggest using [Anaconda](https://anaconda.org/) to create a virtual environment for this program. Visit official website or [here](https://mirrors.tuna.tsinghua.edu.cn/anaconda/archive/) to download the installer(Hope there is a GUI and a browser on your deep learning server machine). 8 | 9 | Create a new virtual environment: 10 | `conda create -n pytorch python=3.6` 11 | 12 | Activate the environment on MacOS/Linux: 13 | `source activate pytorch` 14 | 15 | On Windows: 16 | `activate pytorch` 17 | 18 | ### Requirements 19 | 20 | **Note:** We suggest using `pip` instead of `conda` to install following requirements **on Windows**. The reason is that if you choose to use conda to install something like PyTorch or numpy, in order to speed up computation, another 3 packages start with `mkl` will also be downloaded. However, these `mkl` packages have conflicts with `conda` on Windows and you just cannot run the program. 21 | 22 | If you're using MacOS or Linux, just ignore the note and enjoy `conda`~ 23 | 24 | If you want to speed up package download, you can add Tsinghua's package repository for `conda`: 25 | ``` 26 | conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/ 27 | conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/ 28 | conda config --set show_channel_urls yes 29 | ``` 30 | Visit [清华大学开源软件镜像站](https://mirrors.tuna.tsinghua.edu.cn/help/anaconda/) for more information. 31 | 32 | #### PyTorch 33 | Visit [Official Website](https://pytorch.org/), choose correct OS/PM/Python-version/CUDA-version to get install command. Please install both `pytorch` and `torchvision`. 34 | 35 | If your download speed is too slow, you can also add Tsinghua's repository specially for installing pytorch: 36 | 37 | ``` 38 | conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/ 39 | conda install pytorch torchvision 40 | ``` 41 | 42 | #### requests 43 | `pip install requests`, note that `conda` doesn't contain this package. 44 | 45 | #### Other requirements 46 | `conda install matplotlib pillow` 47 | 48 | ## Train and evaluate globally 49 | 50 | ``` 51 | cd FGC_CUB-200-2011 52 | source activate pytorch 53 | python global.py 54 | ``` 55 | -------------------------------------------------------------------------------- /adjust_weights.py: -------------------------------------------------------------------------------- 1 | import cub_200_2011 as dataset 2 | import helper 3 | import torch 4 | import utils 5 | import time 6 | 7 | model_glb_path = '' 8 | model_obj_path = '' 9 | 10 | predict_weights = [0.2, 0.8] 11 | logger = utils.get_logger('weights-' + str(predict_weights[0]) + '-' + str(predict_weights[1])) 12 | logger.info('start loading dataset') 13 | begin_time = time.time() 14 | train_loader_glb, valid_loader_glb = dataset.get_train_validation_data_loader( 15 | resize_size=224, 16 | batch_size=32, 17 | random_seed=96, 18 | validation_size=0.1, 19 | object_boxes_dict=None, 20 | show_sample=False 21 | ) 22 | test_loader_glb = dataset.get_test_data_loader( 23 | resize_size=224, 24 | batch_size=32, 25 | object_boxes_dict=None 26 | ) 27 | 28 | bounding_boxes = utils.get_annotated_bounding_boxes() 29 | train_loader_obj, valid_loader_obj = dataset.get_train_validation_data_loader( 30 | resize_size=224, 31 | batch_size=32, 32 | random_seed=96, 33 | validation_size=0.1, 34 | object_boxes_dict=bounding_boxes, 35 | show_sample=False 36 | ) 37 | test_loader_obj = dataset.get_test_data_loader( 38 | resize_size=224, 39 | batch_size=32, 40 | object_boxes_dict=bounding_boxes 41 | ) 42 | logger.info('loading dataset costs %.4fs' % (time.time() - begin_time)) 43 | 44 | logger.info('loading models') 45 | 46 | begin_time = time.time() 47 | model_glb_name = 'resnet152' 48 | model_glb = helper.get_model_by_name(model_glb_name, pretrained=False) 49 | helper.replace_model_fc(model_glb_name, model_glb) 50 | model_glb.load_state_dict(torch.load(model_glb_path)) 51 | 52 | model_obj_name = 'densenet161' 53 | model_obj = helper.get_model_by_name(model_obj_name, pretrained=False) 54 | helper.replace_model_fc(model_obj_name, model_obj) 55 | model_obj.load_state_dict(torch.load(model_obj_path)) 56 | logger.info('loading models costs %.4fs' % (time.time() - begin_time)) 57 | 58 | models = [model_glb, model_obj] 59 | validation_loaders = [valid_loader_glb, valid_loader_obj] 60 | test_loaders = [test_loader_glb, test_loader_obj] 61 | 62 | helper.evaluate( 63 | logger=logger, 64 | models=models, 65 | data_loaders=validation_loaders, 66 | set_name='validation set', 67 | predict_weights=predict_weights 68 | ) 69 | helper.evaluate( 70 | logger=logger, 71 | models=models, 72 | data_loaders=test_loaders, 73 | set_name='test set', 74 | predict_weights=predict_weights 75 | ) -------------------------------------------------------------------------------- /bilinear_densenet161.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | 5 | import torch 6 | import torchvision 7 | import time 8 | import cub_200_2011 as dataset 9 | 10 | torch.manual_seed(0) 11 | torch.cuda.manual_seed_all(0) 12 | 13 | class BCNN(torch.nn.Module): 14 | 15 | def __init__(self): 16 | """Declare all needed layers.""" 17 | torch.nn.Module.__init__(self) 18 | self.features = torchvision.models.densenet161(pretrained=True).features 19 | self.in_features = torchvision.models.densenet161().classifier.in_features #2208 20 | self.conv = torch.nn.Conv2d(self.in_features, 512, 1) 21 | self.bn = torch.nn.BatchNorm2d(512) 22 | self.relu = torch.nn.ReLU(inplace=True) 23 | # Linear classifier. 24 | self.fc = torch.nn.Linear(512**2, 200) 25 | # Initialize the fc layers. 26 | torch.nn.init.kaiming_normal_(self.fc.weight.data) 27 | if self.fc.bias is not None: 28 | torch.nn.init.constant_(self.fc.bias.data, val=0) 29 | 30 | def forward(self, X): 31 | 32 | N = X.size()[0] 33 | assert X.size() == (N, 3, 224, 224) 34 | X = self.features(X) 35 | assert X.size() == (N, self.in_features, 7, 7) 36 | X = self.conv(X) 37 | X = self.bn(X) 38 | X = self.relu(X) 39 | X = X.view(N, 512, 7**2) 40 | X = torch.bmm(X, torch.transpose(X, 1, 2)) / (7**2) # Bilinear 41 | assert X.size() == (N, 512, 512) 42 | X = X.view(N, 512**2) 43 | X = torch.sqrt(X + 1e-5) 44 | X = torch.nn.functional.normalize(X) 45 | X = self.fc(X) 46 | assert X.size() == (N, 200) 47 | return X 48 | 49 | def freeze_layers(self): 50 | # Freeze all previous layers. 51 | for param in self.features.parameters(): 52 | param.requires_grad = False 53 | 54 | class BCNNManager(object): 55 | """Manager class to train bilinear CNN. 56 | 57 | Attributes: 58 | _options: Hyperparameters. 59 | _path: Useful paths. 60 | _net: Bilinear CNN. 61 | _criterion: Cross-entropy loss. 62 | _solver: SGD with momentum. 63 | _scheduler: Reduce learning rate by a fator of 0.1 when plateau. 64 | _train_loader: Training data. 65 | _test_loader: Testing data. 66 | """ 67 | def __init__(self, options, path, freeze=True, pre_model_path=None): 68 | """Prepare the network, criterion, solver, and data. 69 | 70 | Args: 71 | options, dict: Hyperparameters. 72 | """ 73 | print('Prepare the network and data.') 74 | self._options = options 75 | self._path = path 76 | # Network. 77 | self._net = torch.nn.DataParallel(BCNN()).cuda() 78 | #print(self._net) 79 | if freeze is True: 80 | self._net.module.freeze_layers() 81 | if pre_model_path is not None: 82 | self._net.load_state_dict(torch.load(pre_model_path)) 83 | # Criterion. 84 | self._criterion = torch.nn.CrossEntropyLoss().cuda() 85 | # Solver. 86 | if freeze is True: 87 | self._solver = torch.optim.SGD( 88 | list(self._net.module.conv.parameters())+list(self._net.module.bn.parameters())+list(self._net.module.fc.parameters()), 89 | lr=self._options['base_lr'], 90 | momentum=0.9, weight_decay=self._options['weight_decay']) 91 | else: 92 | self._solver = torch.optim.SGD( 93 | self._net.parameters(), lr=self._options['base_lr'], 94 | momentum=0.9, weight_decay=self._options['weight_decay']) 95 | self._scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( 96 | self._solver, mode='max', factor=0.1, patience=3, verbose=True, 97 | threshold=1e-4) 98 | self._train_loader, self._valid_loader = dataset.get_train_validation_data_loader( 99 | resize_size=224, 100 | batch_size=self._options['batch_size'], 101 | random_seed=96, 102 | validation_size=0, 103 | object_boxes_dict=None, 104 | show_sample=False, 105 | augment=True 106 | ) 107 | self._test_loader = dataset.get_test_data_loader( 108 | resize_size=224, 109 | batch_size=32, 110 | object_boxes_dict=None 111 | ) 112 | 113 | def train(self): 114 | """Train the network.""" 115 | print('Training.') 116 | best_acc = 0.0 117 | best_epoch = None 118 | print('Epoch\tTrain loss\tTrain acc\tTest acc') 119 | for t in range(self._options['epochs']): 120 | epoch_loss = [] 121 | num_correct = 0 122 | num_total = 0 123 | for i, (_, X, y) in enumerate(self._train_loader, 0): 124 | # Data. 125 | X = torch.autograd.Variable(X.cuda()) 126 | y = torch.autograd.Variable(y.cuda(async=True)) 127 | 128 | # Clear the existing gradients. 129 | self._solver.zero_grad() 130 | # Forward pass. 131 | score = self._net(X) 132 | loss = self._criterion(score, y) 133 | epoch_loss.append(loss.data.item()) 134 | # Prediction. 135 | _, prediction = torch.max(score.data, 1) 136 | num_total += y.size(0) 137 | num_correct += torch.sum(prediction == y.data).float() 138 | # Backward pass. 139 | loss.backward() 140 | self._solver.step() 141 | train_acc = 100.0 * num_correct / num_total 142 | #valid_acc = 1.0 * self._accuracy(self._valid_loader) 143 | test_acc = 1.0 * self._accuracy(self._test_loader) 144 | self._scheduler.step(test_acc) 145 | if test_acc > best_acc: 146 | best_acc = test_acc 147 | best_epoch = t + 1 148 | print('*', end='') 149 | # Save model onto disk. 150 | torch.save(self._net.state_dict(),self._path) 151 | print('%d\t%4.3f\t\t%4.2f%%\t\t%4.2f%%' % 152 | (t+1, sum(epoch_loss) / len(epoch_loss), train_acc, test_acc)) 153 | torch.cuda.empty_cache() 154 | print('Best at epoch %d, test accuaray %f' % (best_epoch, best_acc)) 155 | 156 | def _accuracy(self, data_loader): 157 | """Compute the train/test accuracy. 158 | 159 | Args: 160 | data_loader: Train/Test DataLoader. 161 | 162 | Returns: 163 | Train/Test accuracy in percentage. 164 | """ 165 | self._net.train(False) 166 | num_correct = 0 167 | num_total = 0 168 | for i, (_, X, y) in enumerate(data_loader, 0): 169 | # Data. 170 | X = torch.autograd.Variable(X.cuda()) 171 | y = torch.autograd.Variable(y.cuda(async=True)) 172 | 173 | # Prediction. 174 | score = self._net(X) 175 | _, prediction = torch.max(score.data, 1) 176 | num_total += y.size(0) 177 | num_correct += torch.sum(prediction == y.data).float() 178 | self._net.train(True) # Set the model to training phase 179 | return 100.0 * num_correct / num_total 180 | 181 | def fc(): 182 | options = { 183 | 'base_lr': 1.0, 184 | 'batch_size': 64, 185 | 'epochs': 55, 186 | 'weight_decay': 1e-8, 187 | } 188 | pre_model_path = None 189 | path_save='models/densenet152_fc.pth' 190 | manager = BCNNManager(options, path_save, freeze=True, pre_model_path=pre_model_path) 191 | manager.train() 192 | 193 | def all_layers(): 194 | options = { 195 | 'base_lr': 0.01, 196 | 'batch_size': 32, 197 | 'epochs': 30, 198 | 'weight_decay': 1e-5, 199 | } 200 | pre_model_path = 'models/densenet152_fc.pth' 201 | path_save='models/densenet152_all.pth' 202 | manager = BCNNManager(options, path_save, freeze=False, pre_model_path=pre_model_path) 203 | manager.train() 204 | 205 | if __name__ == '__main__': 206 | #dataset.use_less_data=True 207 | fc() 208 | torch.cuda.empty_cache() 209 | all_layers() 210 | 211 | 212 | 213 | -------------------------------------------------------------------------------- /bilinear_resnet152.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | 5 | import torch 6 | import torchvision 7 | import time 8 | import cub_200_2011 as dataset 9 | import torch.nn as nn 10 | torch.manual_seed(0) 11 | torch.cuda.manual_seed_all(0) 12 | 13 | class BCNN(torch.nn.Module): 14 | 15 | def __init__(self): 16 | """Declare all needed layers.""" 17 | torch.nn.Module.__init__(self) 18 | resnet_model = torchvision.models.resnet152(pretrained=True) 19 | self.conv1 = resnet_model.conv1 20 | self.bn1 = resnet_model.bn1 21 | self.relu = resnet_model.relu 22 | self.maxpool = resnet_model.maxpool 23 | self.layer1 = resnet_model.layer1 24 | self.layer2 = resnet_model.layer2 25 | self.layer3 = resnet_model.layer3 26 | self.layer4 = resnet_model.layer4 27 | in_channels = 2048 28 | out_channels = 512 29 | self.conv2 = nn.Conv2d(in_channels, out_channels, 1) 30 | self.bn2 = nn.BatchNorm2d(out_channels) 31 | self.relu2 = nn.ReLU(inplace=True) 32 | # Linear classifier. 33 | self.fc = torch.nn.Linear(512**2, 200) 34 | # Initialize the fc layers. 35 | torch.nn.init.kaiming_normal_(self.fc.weight.data) 36 | if self.fc.bias is not None: 37 | torch.nn.init.constant_(self.fc.bias.data, val=0) 38 | 39 | def forward(self, X): 40 | N = X.size()[0] 41 | assert X.size() == (N, 3, 224, 224) 42 | x = self.conv1(X) 43 | x = self.bn1(x) 44 | x = self.relu(x) 45 | x = self.maxpool(x) 46 | x = self.layer1(x) 47 | x = self.layer2(x) 48 | x = self.layer3(x) 49 | X = self.layer4(x) 50 | assert X.size() == (N, 2048, 7, 7) 51 | X = self.conv2(X) 52 | X = self.bn2(X) 53 | X = self.relu2(X) 54 | X = X.view(N, 512, 7**2) 55 | X = torch.bmm(X, torch.transpose(X, 1, 2)) / (7**2) # Bilinear 56 | assert X.size() == (N, 512, 512) 57 | X = X.view(N, 512**2) 58 | X = torch.sqrt(X + 1e-5) 59 | X = torch.nn.functional.normalize(X) 60 | X = self.fc(X) 61 | assert X.size() == (N, 200) 62 | return X 63 | 64 | def freeze_layers(self): 65 | # Freeze all previous layers. 66 | for param in self.conv1.parameters(): 67 | param.requires_grad = False 68 | for param in self.bn1.parameters(): 69 | param.requires_grad = False 70 | for param in self.layer1.parameters(): 71 | param.requires_grad = False 72 | for param in self.layer2.parameters(): 73 | param.requires_grad = False 74 | for param in self.layer3.parameters(): 75 | param.requires_grad = False 76 | for param in self.layer4.parameters(): 77 | param.requires_grad = False 78 | 79 | class BCNNManager(object): 80 | """Manager class to train bilinear CNN. 81 | 82 | Attributes: 83 | _options: Hyperparameters. 84 | _path: Useful paths. 85 | _net: Bilinear CNN. 86 | _criterion: Cross-entropy loss. 87 | _solver: SGD with momentum. 88 | _scheduler: Reduce learning rate by a fator of 0.1 when plateau. 89 | _train_loader: Training data. 90 | _test_loader: Testing data. 91 | """ 92 | def __init__(self, options, path, freeze=True, pre_model_path=None): 93 | """Prepare the network, criterion, solver, and data. 94 | 95 | Args: 96 | options, dict: Hyperparameters. 97 | """ 98 | print('Prepare the network and data.') 99 | self._options = options 100 | self._path = path 101 | # Network. 102 | self._net = torch.nn.DataParallel(BCNN()).cuda() 103 | #print(self._net) 104 | if freeze is True: 105 | self._net.module.freeze_layers() 106 | if pre_model_path is not None: 107 | self._net.load_state_dict(torch.load(pre_model_path)) 108 | # Criterion. 109 | self._criterion = torch.nn.CrossEntropyLoss().cuda() 110 | # Solver. 111 | if freeze is True: 112 | self._solver = torch.optim.SGD( 113 | list(self._net.module.conv2.parameters())+list(self._net.module.bn2.parameters())+list(self._net.module.fc.parameters()), 114 | lr=self._options['base_lr'], 115 | momentum=0.9, weight_decay=self._options['weight_decay']) 116 | else: 117 | self._solver = torch.optim.SGD( 118 | self._net.parameters(), lr=self._options['base_lr'], 119 | momentum=0.9, weight_decay=self._options['weight_decay']) 120 | self._scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( 121 | self._solver, mode='max', factor=0.1, patience=3, verbose=True, 122 | threshold=1e-4) 123 | self._train_loader, self._valid_loader = dataset.get_train_validation_data_loader( 124 | resize_size=224, 125 | batch_size=self._options['batch_size'], 126 | random_seed=96, 127 | validation_size=0, 128 | object_boxes_dict=None, 129 | show_sample=False, 130 | augment=True 131 | ) 132 | self._test_loader = dataset.get_test_data_loader( 133 | resize_size=224, 134 | batch_size=32, 135 | object_boxes_dict=None 136 | ) 137 | 138 | def train(self): 139 | """Train the network.""" 140 | print('Training.') 141 | best_acc = 0.0 142 | best_epoch = None 143 | print('Epoch\tTrain loss\tTrain acc\tTest acc') 144 | for t in range(self._options['epochs']): 145 | epoch_loss = [] 146 | num_correct = 0 147 | num_total = 0 148 | for i, (_, X, y) in enumerate(self._train_loader, 0): 149 | # Data. 150 | X = torch.autograd.Variable(X.cuda()) 151 | y = torch.autograd.Variable(y.cuda(async=True)) 152 | 153 | # Clear the existing gradients. 154 | self._solver.zero_grad() 155 | # Forward pass. 156 | score = self._net(X) 157 | loss = self._criterion(score, y) 158 | epoch_loss.append(loss.data.item()) 159 | # Prediction. 160 | _, prediction = torch.max(score.data, 1) 161 | num_total += y.size(0) 162 | num_correct += torch.sum(prediction == y.data).float() 163 | # Backward pass. 164 | loss.backward() 165 | self._solver.step() 166 | train_acc = 100.0 * num_correct / num_total 167 | #valid_acc = 1.0 * self._accuracy(self._valid_loader) 168 | test_acc = 1.0 * self._accuracy(self._test_loader) 169 | self._scheduler.step(test_acc) 170 | if test_acc > best_acc: 171 | best_acc = test_acc 172 | best_epoch = t + 1 173 | print('*', end='') 174 | # Save model onto disk. 175 | torch.save(self._net.state_dict(),self._path) 176 | print('%d\t%4.3f\t\t%4.2f%%\t\t%4.2f%%' % 177 | (t+1, sum(epoch_loss) / len(epoch_loss), train_acc, test_acc)) 178 | print('Best at epoch %d, test accuaray %f' % (best_epoch, best_acc)) 179 | 180 | def _accuracy(self, data_loader): 181 | """Compute the train/test accuracy. 182 | 183 | Args: 184 | data_loader: Train/Test DataLoader. 185 | 186 | Returns: 187 | Train/Test accuracy in percentage. 188 | """ 189 | self._net.train(False) 190 | num_correct = 0 191 | num_total = 0 192 | for i, (_, X, y) in enumerate(data_loader, 0): 193 | # Data. 194 | X = torch.autograd.Variable(X.cuda()) 195 | y = torch.autograd.Variable(y.cuda(async=True)) 196 | 197 | # Prediction. 198 | score = self._net(X) 199 | _, prediction = torch.max(score.data, 1) 200 | num_total += y.size(0) 201 | num_correct += torch.sum(prediction == y.data).float() 202 | self._net.train(True) # Set the model to training phase 203 | return 100.0 * num_correct / num_total 204 | 205 | def fc(): 206 | options = { 207 | 'base_lr': 1.0, 208 | 'batch_size': 64, 209 | 'epochs': 50, 210 | 'weight_decay': 1e-8, 211 | } 212 | pre_model_path = None 213 | path_save='models/resnet152_fc_224.pth' 214 | manager = BCNNManager(options, path_save, freeze=True, pre_model_path=pre_model_path) 215 | manager.train() 216 | 217 | def all_layers(): 218 | options = { 219 | 'base_lr': 0.01, 220 | 'batch_size': 32, 221 | 'epochs': 30, 222 | 'weight_decay': 1e-5, 223 | } 224 | pre_model_path = 'models/resnet152_fc_224.pth' 225 | path_save='models/resnet152_all_224.pth' 226 | manager = BCNNManager(options, path_save, freeze=False, pre_model_path=pre_model_path) 227 | manager.train() 228 | 229 | if __name__ == '__main__': 230 | #dataset.use_less_data=True 231 | fc() 232 | torch.cuda.empty_cache() 233 | all_layers() -------------------------------------------------------------------------------- /bilinear_resnet34.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | 5 | import torch 6 | import torchvision 7 | import time 8 | import cub_200_2011 as dataset 9 | 10 | torch.manual_seed(0) 11 | torch.cuda.manual_seed_all(0) 12 | 13 | class BCNN(torch.nn.Module): 14 | 15 | def __init__(self): 16 | """Declare all needed layers.""" 17 | torch.nn.Module.__init__(self) 18 | resnet_model = torchvision.models.resnet34(pretrained=True) 19 | self.conv1 = resnet_model.conv1 20 | self.bn1 = resnet_model.bn1 21 | self.relu = resnet_model.relu 22 | self.maxpool = resnet_model.maxpool 23 | self.layer1 = resnet_model.layer1 24 | self.layer2 = resnet_model.layer2 25 | self.layer3 = resnet_model.layer3 26 | self.layer4 = resnet_model.layer4 27 | # Linear classifier. 28 | self.fc = torch.nn.Linear(512**2, 200) 29 | # Initialize the fc layers. 30 | torch.nn.init.kaiming_normal_(self.fc.weight.data) 31 | if self.fc.bias is not None: 32 | torch.nn.init.constant_(self.fc.bias.data, val=0) 33 | 34 | def forward(self, X): 35 | 36 | N = X.size()[0] 37 | assert X.size() == (N, 3, 448, 448) 38 | x = self.conv1(X) 39 | x = self.bn1(x) 40 | x = self.relu(x) 41 | x = self.maxpool(x) 42 | 43 | x = self.layer1(x) 44 | x = self.layer2(x) 45 | x = self.layer3(x) 46 | X = self.layer4(x) 47 | assert X.size() == (N, 512, 14, 14) 48 | X = X.view(N, 512, 14**2) 49 | X = torch.bmm(X, torch.transpose(X, 1, 2)) / (14**2) # Bilinear 50 | assert X.size() == (N, 512, 512) 51 | X = X.view(N, 512**2) 52 | X = torch.sqrt(X + 1e-5) 53 | X = torch.nn.functional.normalize(X) 54 | X = self.fc(X) 55 | assert X.size() == (N, 200) 56 | return X 57 | def freeze_layers(self): 58 | # Freeze all previous layers. 59 | for param in self.conv1.parameters(): 60 | param.requires_grad = False 61 | for param in self.bn1.parameters(): 62 | param.requires_grad = False 63 | for param in self.layer1.parameters(): 64 | param.requires_grad = False 65 | for param in self.layer2.parameters(): 66 | param.requires_grad = False 67 | for param in self.layer3.parameters(): 68 | param.requires_grad = False 69 | for param in self.layer4.parameters(): 70 | param.requires_grad = False 71 | 72 | class BCNNManager(object): 73 | """Manager class to train bilinear CNN. 74 | 75 | Attributes: 76 | _options: Hyperparameters. 77 | _path: Useful paths. 78 | _net: Bilinear CNN. 79 | _criterion: Cross-entropy loss. 80 | _solver: SGD with momentum. 81 | _scheduler: Reduce learning rate by a fator of 0.1 when plateau. 82 | _train_loader: Training data. 83 | _test_loader: Testing data. 84 | """ 85 | def __init__(self, options, path, freeze=True, pre_model_path=None): 86 | """Prepare the network, criterion, solver, and data. 87 | 88 | Args: 89 | options, dict: Hyperparameters. 90 | """ 91 | print('Prepare the network and data.') 92 | self._options = options 93 | self._path = path 94 | # Network. 95 | self._net = torch.nn.DataParallel(BCNN()).cuda() 96 | #print(self._net) 97 | if freeze is True: 98 | self._net.module.freeze_layers() 99 | if pre_model_path is not None: 100 | self._net.load_state_dict(torch.load(pre_model_path)) 101 | # Criterion. 102 | self._criterion = torch.nn.CrossEntropyLoss().cuda() 103 | # Solver. 104 | if freeze is True: 105 | self._solver = torch.optim.SGD( 106 | self._net.module.fc.parameters(), lr=self._options['base_lr'], 107 | momentum=0.9, weight_decay=self._options['weight_decay']) 108 | else: 109 | self._solver = torch.optim.SGD( 110 | self._net.parameters(), lr=self._options['base_lr'], 111 | momentum=0.9, weight_decay=self._options['weight_decay']) 112 | self._scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( 113 | self._solver, mode='max', factor=0.1, patience=3, verbose=True, 114 | threshold=1e-4) 115 | self._train_loader, self._valid_loader = dataset.get_train_validation_data_loader( 116 | resize_size=448, 117 | batch_size=self._options['batch_size'], 118 | random_seed=96, 119 | validation_size=0, 120 | object_boxes_dict=None, 121 | show_sample=False, 122 | augment=True 123 | ) 124 | self._test_loader = dataset.get_test_data_loader( 125 | resize_size=448, 126 | batch_size=32, 127 | object_boxes_dict=None 128 | ) 129 | 130 | def train(self): 131 | """Train the network.""" 132 | print('Training.') 133 | best_acc = 0.0 134 | best_epoch = None 135 | print('Epoch\tTrain loss\tTrain acc\tValid acc\tTest acc') 136 | for t in range(self._options['epochs']): 137 | epoch_loss = [] 138 | num_correct = 0 139 | num_total = 0 140 | for i, (_, X, y) in enumerate(self._train_loader, 0): 141 | # Data. 142 | X = torch.autograd.Variable(X.cuda()) 143 | y = torch.autograd.Variable(y.cuda(async=True)) 144 | 145 | # Clear the existing gradients. 146 | self._solver.zero_grad() 147 | # Forward pass. 148 | score = self._net(X) 149 | loss = self._criterion(score, y) 150 | epoch_loss.append(loss.data.item()) 151 | # Prediction. 152 | _, prediction = torch.max(score.data, 1) 153 | num_total += y.size(0) 154 | num_correct += torch.sum(prediction == y.data).float() 155 | # Backward pass. 156 | loss.backward() 157 | self._solver.step() 158 | train_acc = 100.0 * num_correct / num_total 159 | #valid_acc = 1.0 * self._accuracy(self._valid_loader) 160 | test_acc = 1.0 * self._accuracy(self._test_loader) 161 | self._scheduler.step(test_acc) 162 | if test_acc > best_acc: 163 | best_acc = test_acc 164 | best_epoch = t + 1 165 | print('*', end='') 166 | # Save model onto disk. 167 | torch.save(self._net.state_dict(),self._path) 168 | print('%d\t%4.3f\t\t%4.2f%%\t\t%4.2f%%' % 169 | (t+1, sum(epoch_loss) / len(epoch_loss), train_acc, test_acc)) 170 | print('Best at epoch %d, test accuaray %f' % (best_epoch, best_acc)) 171 | 172 | def _accuracy(self, data_loader): 173 | """Compute the train/test accuracy. 174 | 175 | Args: 176 | data_loader: Train/Test DataLoader. 177 | 178 | Returns: 179 | Train/Test accuracy in percentage. 180 | """ 181 | self._net.train(False) 182 | num_correct = 0 183 | num_total = 0 184 | for i, (_, X, y) in enumerate(data_loader, 0): 185 | # Data. 186 | X = torch.autograd.Variable(X.cuda()) 187 | y = torch.autograd.Variable(y.cuda(async=True)) 188 | 189 | # Prediction. 190 | score = self._net(X) 191 | _, prediction = torch.max(score.data, 1) 192 | num_total += y.size(0) 193 | num_correct += torch.sum(prediction == y.data).float() 194 | self._net.train(True) # Set the model to training phase 195 | return 100.0 * num_correct / num_total 196 | 197 | def fc(): 198 | options = { 199 | 'base_lr': 1.0, 200 | 'batch_size': 64, 201 | 'epochs': 50, 202 | 'weight_decay': 1e-8, 203 | } 204 | pre_model_path = None 205 | path_save='models/resnet34_fc_448.pth' 206 | manager = BCNNManager(options, path_save, freeze=True, pre_model_path=pre_model_path) 207 | manager.train() 208 | 209 | def all_layers(): 210 | options = { 211 | 'base_lr': 0.01, 212 | 'batch_size': 32, 213 | 'epochs': 30, 214 | 'weight_decay': 1e-5, 215 | } 216 | pre_model_path = 'models/resnet34_fc_448.pth' 217 | path_save='models/resnet34_all_448.pth' 218 | manager = BCNNManager(options, path_save, freeze=False, pre_model_path=pre_model_path) 219 | #manager.train() 220 | 221 | if __name__ == '__main__': 222 | #fc() 223 | #torch.cuda.empty_cache() 224 | all_layers() 225 | 226 | import numpy as np 227 | import torch.nn as nn 228 | import torch.nn.functional as F 229 | class ScaledDotProductAttention(nn.Module): 230 | ''' Scaled Dot-Product Attention ''' 231 | 232 | def __init__(self, temperature, attn_dropout=0.1): 233 | super().__init__() 234 | self.temperature = temperature 235 | self.dropout = nn.Dropout(attn_dropout) 236 | self.softmax = nn.Softmax(dim=2) 237 | 238 | def forward(self, q, k, v, mask=None): 239 | 240 | attn = torch.bmm(q, k.transpose(1, 2)) 241 | attn = attn / self.temperature 242 | 243 | if mask is not None: 244 | attn = attn.masked_fill(mask, -np.inf) 245 | 246 | attn = self.softmax(attn) 247 | attn = self.dropout(attn) 248 | output = torch.bmm(attn, v) 249 | 250 | return output, attn 251 | 252 | class MultiHeadAttention(nn.Module): 253 | ''' Multi-Head Attention module ''' 254 | 255 | def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1): 256 | super().__init__() 257 | 258 | self.n_head = n_head 259 | self.d_k = d_k 260 | self.d_v = d_v 261 | 262 | self.w_qs = nn.Linear(d_model, n_head * d_k) 263 | self.w_ks = nn.Linear(d_model, n_head * d_k) 264 | self.w_vs = nn.Linear(d_model, n_head * d_v) 265 | nn.init.normal_(self.w_qs.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_k))) 266 | nn.init.normal_(self.w_ks.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_k))) 267 | nn.init.normal_(self.w_vs.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_v))) 268 | 269 | self.attention = ScaledDotProductAttention(temperature=np.power(d_k, 0.5)) 270 | self.layer_norm = nn.LayerNorm(d_model) 271 | 272 | self.fc = nn.Linear(n_head * d_v, d_model) 273 | nn.init.xavier_normal_(self.fc.weight) 274 | 275 | self.dropout = nn.Dropout(dropout) 276 | 277 | 278 | def forward(self, q, k, v, mask=None): 279 | 280 | d_k, d_v, n_head = self.d_k, self.d_v, self.n_head 281 | 282 | sz_b, len_q, _ = q.size() # batch, 79, 256 283 | sz_b, len_k, _ = k.size() 284 | sz_b, len_v, _ = v.size() 285 | 286 | residual = q 287 | 288 | q = self.w_qs(q).view(sz_b, len_q, n_head, d_k) # N, 79, 8, 64 289 | k = self.w_ks(k).view(sz_b, len_k, n_head, d_k) 290 | v = self.w_vs(v).view(sz_b, len_v, n_head, d_v) 291 | 292 | q = q.permute(2, 0, 1, 3).contiguous().view(-1, len_q, d_k) # (n*b) x lq x dk 293 | k = k.permute(2, 0, 1, 3).contiguous().view(-1, len_k, d_k) # (n*b) x lk x dk 294 | v = v.permute(2, 0, 1, 3).contiguous().view(-1, len_v, d_v) # (n*b) x lv x dv 295 | 296 | mask = mask.repeat(n_head, 1, 1) # (n*b) x .. x .. 297 | output, attn = self.attention(q, k, v, mask=mask) 298 | 299 | output = output.view(n_head, sz_b, len_q, d_v) 300 | output = output.permute(1, 2, 0, 3).contiguous().view(sz_b, len_q, -1) # b x lq x (n*dv) 301 | 302 | output = self.dropout(self.fc(output)) 303 | output = self.layer_norm(output + residual) 304 | 305 | return output, attn 306 | -------------------------------------------------------------------------------- /bilinear_resnet34_double.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | 5 | import torch 6 | import torchvision 7 | import time 8 | import cub_200_2011 as dataset 9 | 10 | torch.manual_seed(0) 11 | torch.cuda.manual_seed_all(0) 12 | 13 | class BCNN(torch.nn.Module): 14 | 15 | def __init__(self): 16 | """Declare all needed layers.""" 17 | torch.nn.Module.__init__(self) 18 | resnet_model = torchvision.models.resnet34(pretrained=True) 19 | 20 | self.conv1 = resnet_model.conv1 21 | self.bn1 = resnet_model.bn1 22 | self.relu = resnet_model.relu 23 | self.maxpool = resnet_model.maxpool 24 | self.layer1 = resnet_model.layer1 25 | self.layer2 = resnet_model.layer2 26 | self.layer3 = resnet_model.layer3 27 | self.layer4 = resnet_model.layer4 28 | 29 | resnet_model = torchvision.models.resnet34(pretrained=True) 30 | self._conv1 = resnet_model.conv1 31 | self._bn1 = resnet_model.bn1 32 | self._relu = resnet_model.relu 33 | self._maxpool = resnet_model.maxpool 34 | self._layer1 = resnet_model.layer1 35 | self._layer2 = resnet_model.layer2 36 | self._layer3 = resnet_model.layer3 37 | self._layer4 = resnet_model.layer4 38 | # Linear classifier. 39 | self.fc = torch.nn.Linear(512**2, 200) 40 | # Initialize the fc layers. 41 | torch.nn.init.kaiming_normal_(self.fc.weight.data) 42 | if self.fc.bias is not None: 43 | torch.nn.init.constant_(self.fc.bias.data, val=0) 44 | 45 | def forward(self, X): 46 | 47 | N = X.size()[0] 48 | assert X.size() == (N, 3, 224, 224) 49 | x1 = self.conv1(X) 50 | x1 = self.bn1(x1) 51 | x1 = self.relu(x1) 52 | x1 = self.maxpool(x1) 53 | x1 = self.layer1(x1) 54 | x1 = self.layer2(x1) 55 | x1 = self.layer3(x1) 56 | x1 = self.layer4(x1) 57 | 58 | x2 = self._conv1(X) 59 | x2 = self._bn1(x2) 60 | x2 = self._relu(x2) 61 | x2 = self._maxpool(x2) 62 | x2 = self._layer1(x2) 63 | x2 = self._layer2(x2) 64 | x2 = self._layer3(x2) 65 | x2 = self._layer4(x2) 66 | assert x1.size() == (N, 512, 7, 7) 67 | x1 = x1.view(N, 512, 7**2) 68 | x2 = x2.view(N, 512, 7**2) 69 | X = torch.bmm(x1, torch.transpose(x2, 1, 2)) / (7**2) # Bilinear 70 | assert X.size() == (N, 512, 512) 71 | X = X.view(N, 512**2) 72 | X = torch.sqrt(X + 1e-5) 73 | X = torch.nn.functional.normalize(X) 74 | X = self.fc(X) 75 | assert X.size() == (N, 200) 76 | return X 77 | def freeze_layers(self): 78 | # Freeze all previous layers. 79 | for param in self.conv1.parameters(): 80 | param.requires_grad = False 81 | for param in self.bn1.parameters(): 82 | param.requires_grad = False 83 | for param in self.layer1.parameters(): 84 | param.requires_grad = False 85 | for param in self.layer2.parameters(): 86 | param.requires_grad = False 87 | for param in self.layer3.parameters(): 88 | param.requires_grad = False 89 | for param in self.layer4.parameters(): 90 | param.requires_grad = False 91 | 92 | for param in self._conv1.parameters(): 93 | param.requires_grad = False 94 | for param in self._bn1.parameters(): 95 | param.requires_grad = False 96 | for param in self._layer1.parameters(): 97 | param.requires_grad = False 98 | for param in self._layer2.parameters(): 99 | param.requires_grad = False 100 | for param in self._layer3.parameters(): 101 | param.requires_grad = False 102 | for param in self._layer4.parameters(): 103 | param.requires_grad = False 104 | 105 | class BCNNManager(object): 106 | """Manager class to train bilinear CNN. 107 | 108 | Attributes: 109 | _options: Hyperparameters. 110 | _path: Useful paths. 111 | _net: Bilinear CNN. 112 | _criterion: Cross-entropy loss. 113 | _solver: SGD with momentum. 114 | _scheduler: Reduce learning rate by a fator of 0.1 when plateau. 115 | _train_loader: Training data. 116 | _test_loader: Testing data. 117 | """ 118 | def __init__(self, options, path, freeze=True, pre_model_path=None): 119 | """Prepare the network, criterion, solver, and data. 120 | 121 | Args: 122 | options, dict: Hyperparameters. 123 | """ 124 | print('Prepare the network and data.') 125 | self._options = options 126 | self._path = path 127 | # Network. 128 | self._net = torch.nn.DataParallel(BCNN()).cuda() 129 | #print(self._net) 130 | if freeze is True: 131 | self._net.module.freeze_layers() 132 | if pre_model_path is not None: 133 | self._net.load_state_dict(torch.load(pre_model_path)) 134 | # Criterion. 135 | self._criterion = torch.nn.CrossEntropyLoss().cuda() 136 | # Solver. 137 | if freeze is True: 138 | self._solver = torch.optim.SGD( 139 | self._net.module.fc.parameters(), lr=self._options['base_lr'], 140 | momentum=0.9, weight_decay=self._options['weight_decay']) 141 | else: 142 | self._solver = torch.optim.SGD( 143 | self._net.parameters(), lr=self._options['base_lr'], 144 | momentum=0.9, weight_decay=self._options['weight_decay']) 145 | self._scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( 146 | self._solver, mode='max', factor=0.1, patience=3, verbose=True, 147 | threshold=1e-4) 148 | self._train_loader, self._valid_loader = dataset.get_train_validation_data_loader( 149 | resize_size=224, 150 | batch_size=self._options['batch_size'], 151 | random_seed=96, 152 | validation_size=0, 153 | object_boxes_dict=None, 154 | show_sample=False, 155 | augment=True 156 | ) 157 | self._test_loader = dataset.get_test_data_loader( 158 | resize_size=224, 159 | batch_size=32, 160 | object_boxes_dict=None 161 | ) 162 | 163 | def train(self): 164 | """Train the network.""" 165 | print('Training.') 166 | best_acc = 0.0 167 | best_epoch = None 168 | print('Epoch\tTrain loss\tTrain acc\tValid acc\tTest acc') 169 | for t in range(self._options['epochs']): 170 | epoch_loss = [] 171 | num_correct = 0 172 | num_total = 0 173 | for i, (_, X, y) in enumerate(self._train_loader, 0): 174 | # Data. 175 | X = torch.autograd.Variable(X.cuda()) 176 | y = torch.autograd.Variable(y.cuda(async=True)) 177 | 178 | # Clear the existing gradients. 179 | self._solver.zero_grad() 180 | # Forward pass. 181 | score = self._net(X) 182 | loss = self._criterion(score, y) 183 | epoch_loss.append(loss.data.item()) 184 | # Prediction. 185 | _, prediction = torch.max(score.data, 1) 186 | num_total += y.size(0) 187 | num_correct += torch.sum(prediction == y.data).float() 188 | # Backward pass. 189 | loss.backward() 190 | self._solver.step() 191 | train_acc = 100.0 * num_correct / num_total 192 | #valid_acc = 1.0 * self._accuracy(self._valid_loader) 193 | test_acc = 1.0 * self._accuracy(self._test_loader) 194 | self._scheduler.step(test_acc) 195 | if test_acc > best_acc: 196 | best_acc = test_acc 197 | best_epoch = t + 1 198 | print('*', end='') 199 | # Save model onto disk. 200 | torch.save(self._net.state_dict(),self._path) 201 | print('%d\t%4.3f\t\t%4.2f%%\t\t%4.2f%%' % 202 | (t+1, sum(epoch_loss) / len(epoch_loss), train_acc, test_acc)) 203 | print('Best at epoch %d, test accuaray %f' % (best_epoch, best_acc)) 204 | 205 | def _accuracy(self, data_loader): 206 | """Compute the train/test accuracy. 207 | 208 | Args: 209 | data_loader: Train/Test DataLoader. 210 | 211 | Returns: 212 | Train/Test accuracy in percentage. 213 | """ 214 | self._net.train(False) 215 | num_correct = 0 216 | num_total = 0 217 | for i, (_, X, y) in enumerate(data_loader, 0): 218 | # Data. 219 | X = torch.autograd.Variable(X.cuda()) 220 | y = torch.autograd.Variable(y.cuda(async=True)) 221 | 222 | # Prediction. 223 | score = self._net(X) 224 | _, prediction = torch.max(score.data, 1) 225 | num_total += y.size(0) 226 | num_correct += torch.sum(prediction == y.data).float() 227 | self._net.train(True) # Set the model to training phase 228 | return 100.0 * num_correct / num_total 229 | 230 | def fc(): 231 | options = { 232 | 'base_lr': 1.0, 233 | 'batch_size': 64, 234 | 'epochs': 55, 235 | 'weight_decay': 1e-8, 236 | } 237 | pre_model_path = None 238 | path_save='models/resnet34_fc_double_224.pth' 239 | manager = BCNNManager(options, path_save, freeze=True, pre_model_path=pre_model_path) 240 | manager.train() 241 | 242 | def all_layers(): 243 | options = { 244 | 'base_lr': 0.01, 245 | 'batch_size': 64, 246 | 'epochs': 30, 247 | 'weight_decay': 1e-5, 248 | } 249 | pre_model_path = 'models/resnet34_fc_double_224.pth' 250 | path_save='models/resnet34_all_double_224.pth' 251 | manager = BCNNManager(options, path_save, freeze=False, pre_model_path=pre_model_path) 252 | manager.train() 253 | 254 | if __name__ == '__main__': 255 | #dataset.use_less_data=True 256 | fc() 257 | torch.cuda.empty_cache() 258 | all_layers() -------------------------------------------------------------------------------- /bilinear_resnet50_densenet121.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | 5 | import torch 6 | import torchvision 7 | import time 8 | import cub_200_2011 as dataset 9 | 10 | torch.manual_seed(0) 11 | torch.cuda.manual_seed_all(0) 12 | 13 | class BCNN(torch.nn.Module): 14 | 15 | def __init__(self): 16 | """Declare all needed layers.""" 17 | torch.nn.Module.__init__(self) 18 | # resnet152 19 | resnet_model = torchvision.models.resnet50(pretrained=True) 20 | self.conv1 = resnet_model.conv1 21 | self.bn1 = resnet_model.bn1 22 | self.relu = resnet_model.relu 23 | self.maxpool = resnet_model.maxpool 24 | self.layer1 = resnet_model.layer1 25 | self.layer2 = resnet_model.layer2 26 | self.layer3 = resnet_model.layer3 27 | self.layer4 = resnet_model.layer4 28 | in_channels = 2048 29 | out_channels = 512 30 | self.conv2 = torch.nn.Conv2d(in_channels, out_channels, 1) 31 | self.bn2 = torch.nn.BatchNorm2d(out_channels) 32 | self.relu2 = torch.nn.ReLU(inplace=True) 33 | 34 | # densenet161 35 | self.features = torchvision.models.densenet121(pretrained=True).features 36 | self.in_features = torchvision.models.densenet121().classifier.in_features #2208 37 | self.conv = torch.nn.Conv2d(self.in_features, 512, 1) 38 | self.bn = torch.nn.BatchNorm2d(512) 39 | self.relu = torch.nn.ReLU(inplace=True) 40 | 41 | # Linear classifier. 42 | self.fc = torch.nn.Linear(512**2, 200) 43 | 44 | # Initialize the fc layers. 45 | torch.nn.init.kaiming_normal_(self.fc.weight.data) 46 | if self.fc.bias is not None: 47 | torch.nn.init.constant_(self.fc.bias.data, val=0) 48 | 49 | def forward(self, X): 50 | 51 | N = X.size()[0] 52 | assert X.size() == (N, 3, 224, 224) 53 | # resnet 54 | x1 = self.conv1(X) 55 | x1 = self.bn1(x1) 56 | x1 = self.relu(x1) 57 | x1 = self.maxpool(x1) 58 | x1 = self.layer1(x1) 59 | x1 = self.layer2(x1) 60 | x1 = self.layer3(x1) 61 | x1 = self.layer4(x1) 62 | assert x1.size() == (N, 2048, 7, 7) 63 | x1 = self.conv2(x1) 64 | x1 = self.bn2(x1) 65 | x1 = self.relu2(x1) 66 | x1 = x1.view(N, 512, 7**2) 67 | 68 | # densenet 69 | x2 = self.features(X) 70 | assert x2.size() == (N, self.in_features, 7, 7) 71 | x2 = self.conv(x2) 72 | x2 = self.bn(x2) 73 | x2 = self.relu(x2) 74 | x2 = x2.view(N, 512, 7**2) 75 | 76 | X = torch.bmm(x1, torch.transpose(x2, 1, 2)) / (7**2) # Bilinear 77 | assert X.size() == (N, 512, 512) 78 | X = X.view(N, 512**2) 79 | X = torch.sqrt(X + 1e-5) 80 | X = torch.nn.functional.normalize(X) 81 | X = self.fc(X) 82 | assert X.size() == (N, 200) 83 | return X 84 | 85 | def freeze_layers(self): 86 | # Freeze all previous layers. 87 | for param in self.conv1.parameters(): 88 | param.requires_grad = False 89 | for param in self.bn1.parameters(): 90 | param.requires_grad = False 91 | for param in self.layer1.parameters(): 92 | param.requires_grad = False 93 | for param in self.layer2.parameters(): 94 | param.requires_grad = False 95 | for param in self.layer3.parameters(): 96 | param.requires_grad = False 97 | for param in self.layer4.parameters(): 98 | param.requires_grad = False 99 | 100 | for param in self.features.parameters(): 101 | param.requires_grad = False 102 | 103 | 104 | class BCNNManager(object): 105 | """Manager class to train bilinear CNN. 106 | 107 | Attributes: 108 | _options: Hyperparameters. 109 | _path: Useful paths. 110 | _net: Bilinear CNN. 111 | _criterion: Cross-entropy loss. 112 | _solver: SGD with momentum. 113 | _scheduler: Reduce learning rate by a fator of 0.1 when plateau. 114 | _train_loader: Training data. 115 | _test_loader: Testing data. 116 | """ 117 | def __init__(self, options, path, freeze=True, pre_model_path=None): 118 | """Prepare the network, criterion, solver, and data. 119 | 120 | Args: 121 | options, dict: Hyperparameters. 122 | """ 123 | print('Prepare the network and data.') 124 | self._options = options 125 | self._path = path 126 | # Network. 127 | self._net = torch.nn.DataParallel(BCNN()).cuda() 128 | #print(self._net) 129 | if freeze is True: 130 | self._net.module.freeze_layers() 131 | if pre_model_path is not None: 132 | self._net.load_state_dict(torch.load(pre_model_path)) 133 | # Criterion. 134 | self._criterion = torch.nn.CrossEntropyLoss().cuda() 135 | # Solver. 136 | if freeze is True: 137 | self._solver = torch.optim.SGD( 138 | list(self._net.module.conv2.parameters())+list(self._net.module.bn2.parameters())+list(self._net.module.conv.parameters())+ 139 | list(self._net.module.bn.parameters())+ list(self._net.module.fc.parameters()), 140 | lr=self._options['base_lr'], 141 | momentum=0.9, weight_decay=self._options['weight_decay']) 142 | else: 143 | self._solver = torch.optim.SGD( 144 | self._net.parameters(), lr=self._options['base_lr'], 145 | momentum=0.9, weight_decay=self._options['weight_decay']) 146 | self._scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( 147 | self._solver, mode='max', factor=0.1, patience=3, verbose=True, 148 | threshold=1e-4) 149 | self._train_loader, self._valid_loader = dataset.get_train_validation_data_loader( 150 | resize_size=224, 151 | batch_size=self._options['batch_size'], 152 | random_seed=96, 153 | validation_size=0, 154 | object_boxes_dict=None, 155 | show_sample=False, 156 | augment=True 157 | ) 158 | self._test_loader = dataset.get_test_data_loader( 159 | resize_size=224, 160 | batch_size=32, 161 | object_boxes_dict=None 162 | ) 163 | 164 | def train(self): 165 | """Train the network.""" 166 | print('Training.') 167 | best_acc = 0.0 168 | best_epoch = None 169 | print('Epoch\tTrain loss\tTrain acc\tValid acc\tTest acc') 170 | for t in range(self._options['epochs']): 171 | epoch_loss = [] 172 | num_correct = 0 173 | num_total = 0 174 | for i, (_, X, y) in enumerate(self._train_loader, 0): 175 | # Data. 176 | X = torch.autograd.Variable(X.cuda()) 177 | y = torch.autograd.Variable(y.cuda(async=True)) 178 | 179 | # Clear the existing gradients. 180 | self._solver.zero_grad() 181 | # Forward pass. 182 | score = self._net(X) 183 | loss = self._criterion(score, y) 184 | epoch_loss.append(loss.data.item()) 185 | # Prediction. 186 | _, prediction = torch.max(score.data, 1) 187 | num_total += y.size(0) 188 | num_correct += torch.sum(prediction == y.data).float() 189 | # Backward pass. 190 | loss.backward() 191 | self._solver.step() 192 | train_acc = 100.0 * num_correct / num_total 193 | #valid_acc = 1.0 * self._accuracy(self._valid_loader) 194 | test_acc = 1.0 * self._accuracy(self._test_loader) 195 | self._scheduler.step(test_acc) 196 | if test_acc > best_acc: 197 | best_acc = test_acc 198 | best_epoch = t + 1 199 | print('*', end='') 200 | # Save model onto disk. 201 | torch.save(self._net.state_dict(),self._path) 202 | print('%d\t%4.3f\t\t%4.2f%%\t\t%4.2f%%' % 203 | (t+1, sum(epoch_loss) / len(epoch_loss), train_acc, test_acc)) 204 | print('Best at epoch %d, test accuaray %f' % (best_epoch, best_acc)) 205 | 206 | def _accuracy(self, data_loader): 207 | """Compute the train/test accuracy. 208 | 209 | Args: 210 | data_loader: Train/Test DataLoader. 211 | 212 | Returns: 213 | Train/Test accuracy in percentage. 214 | """ 215 | self._net.train(False) 216 | num_correct = 0 217 | num_total = 0 218 | for i, (_, X, y) in enumerate(data_loader, 0): 219 | # Data. 220 | X = torch.autograd.Variable(X.cuda()) 221 | y = torch.autograd.Variable(y.cuda(async=True)) 222 | 223 | # Prediction. 224 | score = self._net(X) 225 | _, prediction = torch.max(score.data, 1) 226 | num_total += y.size(0) 227 | num_correct += torch.sum(prediction == y.data).float() 228 | self._net.train(True) # Set the model to training phase 229 | return 100.0 * num_correct / num_total 230 | 231 | def fc(): 232 | options = { 233 | 'base_lr': 1.0, 234 | 'batch_size': 64, 235 | 'epochs': 55, 236 | 'weight_decay': 1e-8, 237 | } 238 | pre_model_path = None 239 | path_save='models/resnet_densenet_fc_224.pth' 240 | manager = BCNNManager(options, path_save, freeze=True, pre_model_path=pre_model_path) 241 | manager.train() 242 | 243 | def all_layers(): 244 | options = { 245 | 'base_lr': 0.01, 246 | 'batch_size': 16, 247 | 'epochs': 30, 248 | 'weight_decay': 1e-5, 249 | } 250 | pre_model_path = 'models/resnet_densenet_fc_224.pth' 251 | path_save='models/resnet_densenet_all_224.pth' 252 | manager = BCNNManager(options, path_save, freeze=False, pre_model_path=pre_model_path) 253 | manager.train() 254 | 255 | if __name__ == '__main__': 256 | #dataset.use_less_data=True 257 | fc() 258 | torch.cuda.empty_cache() 259 | all_layers() -------------------------------------------------------------------------------- /bilinear_vgg16.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | 5 | import torch 6 | import torchvision 7 | import time 8 | import cub_200_2011 as dataset 9 | 10 | torch.manual_seed(0) 11 | torch.cuda.manual_seed_all(0) 12 | 13 | class BCNN(torch.nn.Module): 14 | 15 | def __init__(self): 16 | """Declare all needed layers.""" 17 | torch.nn.Module.__init__(self) 18 | # Convolution and pooling layers of VGG-16. 19 | self.features = torchvision.models.vgg16(pretrained=True).features 20 | self.features = torch.nn.Sequential(*list(self.features.children()) 21 | [:-1]) # Remove pool5. 22 | # Linear classifier. 23 | self.fc = torch.nn.Linear(512**2, 200) 24 | 25 | # Initialize the fc layers. 26 | torch.nn.init.kaiming_normal_(self.fc.weight.data) 27 | if self.fc.bias is not None: 28 | torch.nn.init.constant_(self.fc.bias.data, val=0) 29 | 30 | def forward(self, X): 31 | 32 | N = X.size()[0] 33 | assert X.size() == (N, 3, 448, 448) 34 | X = self.features(X) 35 | assert X.size() == (N, 512, 28, 28) 36 | X = X.view(N, 512, 28**2) 37 | X = torch.bmm(X, torch.transpose(X, 1, 2)) / (28**2) # Bilinear 38 | assert X.size() == (N, 512, 512) 39 | X = X.view(N, 512**2) 40 | X = torch.sqrt(X + 1e-5) 41 | X = torch.nn.functional.normalize(X) 42 | X = self.fc(X) 43 | assert X.size() == (N, 200) 44 | return X 45 | def freeze_layers(self): 46 | # Freeze all previous layers. 47 | for param in self.features.parameters(): 48 | param.requires_grad = False 49 | 50 | class BCNNManager(object): 51 | """Manager class to train bilinear CNN. 52 | 53 | Attributes: 54 | _options: Hyperparameters. 55 | _path: Useful paths. 56 | _net: Bilinear CNN. 57 | _criterion: Cross-entropy loss. 58 | _solver: SGD with momentum. 59 | _scheduler: Reduce learning rate by a fator of 0.1 when plateau. 60 | _train_loader: Training data. 61 | _test_loader: Testing data. 62 | """ 63 | def __init__(self, options, path, freeze=True, pre_model_path=None): 64 | """Prepare the network, criterion, solver, and data. 65 | 66 | Args: 67 | options, dict: Hyperparameters. 68 | """ 69 | print('Prepare the network and data.') 70 | self._options = options 71 | self._path = path 72 | # Network. 73 | self._net = torch.nn.DataParallel(BCNN()).cuda() 74 | #print(self._net) 75 | if freeze is True: 76 | self._net.module.freeze_layers() 77 | if pre_model_path is not None: 78 | self._net.load_state_dict(torch.load(pre_model_path)) 79 | # Criterion. 80 | self._criterion = torch.nn.CrossEntropyLoss().cuda() 81 | # Solver. 82 | if freeze is True: 83 | self._solver = torch.optim.SGD( 84 | self._net.module.fc.parameters(), lr=self._options['base_lr'], 85 | momentum=0.9, weight_decay=self._options['weight_decay']) 86 | else: 87 | self._solver = torch.optim.SGD( 88 | self._net.parameters(), lr=self._options['base_lr'], 89 | momentum=0.9, weight_decay=self._options['weight_decay']) 90 | self._scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( 91 | self._solver, mode='max', factor=0.1, patience=3, verbose=True, 92 | threshold=1e-4) 93 | self._train_loader, self._valid_loader = dataset.get_train_validation_data_loader( 94 | resize_size=448, 95 | batch_size=self._options['batch_size'], 96 | random_seed=96, 97 | validation_size=0, 98 | object_boxes_dict=None, 99 | show_sample=False, 100 | augment=True 101 | ) 102 | self._test_loader = dataset.get_test_data_loader( 103 | resize_size=448, 104 | batch_size=32, 105 | object_boxes_dict=None 106 | ) 107 | 108 | def train(self): 109 | """Train the network.""" 110 | print('Training.') 111 | best_acc = 0.0 112 | best_epoch = None 113 | print('Epoch\tTrain loss\tTrain acc\tTest acc') 114 | for t in range(self._options['epochs']): 115 | epoch_loss = [] 116 | num_correct = 0 117 | num_total = 0 118 | for i, (_, X, y) in enumerate(self._train_loader, 0): 119 | # Data. 120 | X = torch.autograd.Variable(X.cuda()) 121 | y = torch.autograd.Variable(y.cuda(async=True)) 122 | 123 | # Clear the existing gradients. 124 | self._solver.zero_grad() 125 | # Forward pass. 126 | score = self._net(X) 127 | loss = self._criterion(score, y) 128 | epoch_loss.append(loss.data.item()) 129 | # Prediction. 130 | _, prediction = torch.max(score.data, 1) 131 | num_total += y.size(0) 132 | num_correct += torch.sum(prediction == y.data).float() 133 | # Backward pass. 134 | loss.backward() 135 | self._solver.step() 136 | train_acc = 100.0 * num_correct / num_total 137 | #valid_acc = 1.0 * self._accuracy(self._valid_loader) 138 | test_acc = 1.0 * self._accuracy(self._test_loader) 139 | self._scheduler.step(test_acc) 140 | if test_acc > best_acc: 141 | best_acc = test_acc 142 | best_epoch = t + 1 143 | print('*', end='') 144 | # Save model onto disk. 145 | torch.save(self._net.state_dict(),self._path) 146 | print('%d\t%4.3f\t\t%4.2f%%\t\t%4.2f%%' % 147 | (t+1, sum(epoch_loss) / len(epoch_loss), train_acc, test_acc)) 148 | print('Best at epoch %d, test accuaray %f' % (best_epoch, best_acc)) 149 | 150 | def _accuracy(self, data_loader): 151 | """Compute the train/test accuracy. 152 | 153 | Args: 154 | data_loader: Train/Test DataLoader. 155 | 156 | Returns: 157 | Train/Test accuracy in percentage. 158 | """ 159 | self._net.train(False) 160 | num_correct = 0 161 | num_total = 0 162 | for i, (_, X, y) in enumerate(data_loader, 0): 163 | # Data. 164 | X = torch.autograd.Variable(X.cuda()) 165 | y = torch.autograd.Variable(y.cuda(async=True)) 166 | 167 | # Prediction. 168 | score = self._net(X) 169 | _, prediction = torch.max(score.data, 1) 170 | num_total += y.size(0) 171 | num_correct += torch.sum(prediction == y.data).float() 172 | self._net.train(True) # Set the model to training phase 173 | return 100.0 * num_correct / num_total 174 | 175 | def fc(): 176 | options = { 177 | 'base_lr': 1.0, 178 | 'batch_size': 64, 179 | 'epochs': 55, 180 | 'weight_decay': 1e-8, 181 | } 182 | pre_model_path = None 183 | path_save='models/vgg16_fc.pth' 184 | manager = BCNNManager(options, path_save, freeze=True, pre_model_path=pre_model_path) 185 | manager.train() 186 | 187 | def all_layers(): 188 | options = { 189 | 'base_lr': 0.01, 190 | 'batch_size': 32, 191 | 'epochs': 30, 192 | 'weight_decay': 1e-5, 193 | } 194 | pre_model_path = 'models/vgg16_fc.pth' 195 | path_save='models/vgg16_all.pth' 196 | manager = BCNNManager(options, path_save, freeze=False, pre_model_path=pre_model_path) 197 | manager.train() 198 | 199 | if __name__ == '__main__': 200 | fc() 201 | torch.cuda.empty_cache() 202 | all_layers() -------------------------------------------------------------------------------- /bilinear_vgg16_double.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | 5 | import torch 6 | import torchvision 7 | import time 8 | import cub_200_2011 as dataset 9 | 10 | torch.manual_seed(0) 11 | torch.cuda.manual_seed_all(0) 12 | 13 | class BCNN(torch.nn.Module): 14 | 15 | def __init__(self): 16 | """Declare all needed layers.""" 17 | torch.nn.Module.__init__(self) 18 | # Convolution and pooling layers of VGG-16. 19 | self.features = torchvision.models.vgg16(pretrained=True).features 20 | self.features = torch.nn.Sequential(*list(self.features.children()) 21 | [:-1]) # Remove pool5. 22 | self._features = torchvision.models.vgg16(pretrained=True).features 23 | self._features = torch.nn.Sequential(*list(self._features.children()) 24 | [:-1]) # Remove pool5. 25 | # Linear classifier. 26 | self.fc = torch.nn.Linear(512**2, 200) 27 | 28 | # Initialize the fc layers. 29 | torch.nn.init.kaiming_normal_(self.fc.weight.data) 30 | if self.fc.bias is not None: 31 | torch.nn.init.constant_(self.fc.bias.data, val=0) 32 | 33 | def forward(self, X): 34 | 35 | N = X.size()[0] 36 | assert X.size() == (N, 3, 224, 224) 37 | X1 = self.features(X) 38 | X2 = self._features(X) 39 | assert X1.size() == (N, 512, 14, 14) 40 | X1 = X1.view(N, 512, 14**2) 41 | X2 = X2.view(N, 512, 14**2) 42 | X = torch.bmm(X1, torch.transpose(X2, 1, 2)) / (14**2) # Bilinear 43 | assert X.size() == (N, 512, 512) 44 | X = X.view(N, 512**2) 45 | X = torch.sqrt(X + 1e-5) 46 | X = torch.nn.functional.normalize(X) 47 | X = self.fc(X) 48 | assert X.size() == (N, 200) 49 | return X 50 | def freeze_layers(self): 51 | # Freeze all previous layers. 52 | for param in self.features.parameters(): 53 | param.requires_grad = False 54 | for param in self._features.parameters(): 55 | param.requires_grad = False 56 | class BCNNManager(object): 57 | """Manager class to train bilinear CNN. 58 | 59 | Attributes: 60 | _options: Hyperparameters. 61 | _path: Useful paths. 62 | _net: Bilinear CNN. 63 | _criterion: Cross-entropy loss. 64 | _solver: SGD with momentum. 65 | _scheduler: Reduce learning rate by a fator of 0.1 when plateau. 66 | _train_loader: Training data. 67 | _test_loader: Testing data. 68 | """ 69 | def __init__(self, options, path, freeze=True, pre_model_path=None): 70 | """Prepare the network, criterion, solver, and data. 71 | 72 | Args: 73 | options, dict: Hyperparameters. 74 | """ 75 | print('Prepare the network and data.') 76 | self._options = options 77 | self._path = path 78 | # Network. 79 | self._net = torch.nn.DataParallel(BCNN()).cuda() 80 | #print(self._net) 81 | if freeze is True: 82 | self._net.module.freeze_layers() 83 | if pre_model_path is not None: 84 | self._net.load_state_dict(torch.load(pre_model_path)) 85 | # Criterion. 86 | self._criterion = torch.nn.CrossEntropyLoss().cuda() 87 | # Solver. 88 | if freeze is True: 89 | self._solver = torch.optim.SGD( 90 | self._net.module.fc.parameters(), lr=self._options['base_lr'], 91 | momentum=0.9, weight_decay=self._options['weight_decay']) 92 | else: 93 | self._solver = torch.optim.SGD( 94 | self._net.parameters(), lr=self._options['base_lr'], 95 | momentum=0.9, weight_decay=self._options['weight_decay']) 96 | self._scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( 97 | self._solver, mode='max', factor=0.1, patience=3, verbose=True, 98 | threshold=1e-4) 99 | self._train_loader, self._valid_loader = dataset.get_train_validation_data_loader( 100 | resize_size=224, 101 | batch_size=self._options['batch_size'], 102 | random_seed=96, 103 | validation_size=0, 104 | object_boxes_dict=None, 105 | show_sample=False, 106 | augment=True 107 | ) 108 | self._test_loader = dataset.get_test_data_loader( 109 | resize_size=224, 110 | batch_size=32, 111 | object_boxes_dict=None 112 | ) 113 | 114 | def train(self): 115 | """Train the network.""" 116 | print('Training.') 117 | best_acc = 0.0 118 | best_epoch = None 119 | print('Epoch\tTrain loss\tTrain acc\tTest acc') 120 | for t in range(self._options['epochs']): 121 | epoch_loss = [] 122 | num_correct = 0 123 | num_total = 0 124 | for i, (_, X, y) in enumerate(self._train_loader, 0): 125 | # Data. 126 | X = torch.autograd.Variable(X.cuda()) 127 | y = torch.autograd.Variable(y.cuda(async=True)) 128 | 129 | # Clear the existing gradients. 130 | self._solver.zero_grad() 131 | # Forward pass. 132 | score = self._net(X) 133 | loss = self._criterion(score, y) 134 | epoch_loss.append(loss.data.item()) 135 | # Prediction. 136 | _, prediction = torch.max(score.data, 1) 137 | num_total += y.size(0) 138 | num_correct += torch.sum(prediction == y.data).float() 139 | # Backward pass. 140 | loss.backward() 141 | self._solver.step() 142 | train_acc = 100.0 * num_correct / num_total 143 | #valid_acc = 1.0 * self._accuracy(self._valid_loader) 144 | test_acc = 1.0 * self._accuracy(self._test_loader) 145 | self._scheduler.step(test_acc) 146 | if test_acc > best_acc: 147 | best_acc = test_acc 148 | best_epoch = t + 1 149 | print('*', end='') 150 | # Save model onto disk. 151 | torch.save(self._net.state_dict(),self._path) 152 | print('%d\t%4.3f\t\t%4.2f%%\t\t%4.2f%%' % 153 | (t+1, sum(epoch_loss) / len(epoch_loss), train_acc, test_acc)) 154 | print('Best at epoch %d, test accuaray %f' % (best_epoch, best_acc)) 155 | 156 | def _accuracy(self, data_loader): 157 | """Compute the train/test accuracy. 158 | 159 | Args: 160 | data_loader: Train/Test DataLoader. 161 | 162 | Returns: 163 | Train/Test accuracy in percentage. 164 | """ 165 | self._net.train(False) 166 | num_correct = 0 167 | num_total = 0 168 | for i, (_, X, y) in enumerate(data_loader, 0): 169 | # Data. 170 | X = torch.autograd.Variable(X.cuda()) 171 | y = torch.autograd.Variable(y.cuda(async=True)) 172 | 173 | # Prediction. 174 | score = self._net(X) 175 | _, prediction = torch.max(score.data, 1) 176 | num_total += y.size(0) 177 | num_correct += torch.sum(prediction == y.data).float() 178 | self._net.train(True) # Set the model to training phase 179 | return 100.0 * num_correct / num_total 180 | 181 | def fc(): 182 | options = { 183 | 'base_lr': 1.0, 184 | 'batch_size': 64, 185 | 'epochs': 55, 186 | 'weight_decay': 1e-8, 187 | } 188 | pre_model_path = None 189 | path_save='models/vgg16_fc_double.pth' 190 | manager = BCNNManager(options, path_save, freeze=True, pre_model_path=pre_model_path) 191 | manager.train() 192 | 193 | def all_layers(): 194 | options = { 195 | 'base_lr': 0.01, 196 | 'batch_size': 32, 197 | 'epochs': 30, 198 | 'weight_decay': 1e-5, 199 | } 200 | pre_model_path = 'models/vgg16_fc_double.pth' 201 | path_save='models/vgg16_all_double.pth' 202 | manager = BCNNManager(options, path_save, freeze=False, pre_model_path=pre_model_path) 203 | manager.train() 204 | 205 | if __name__ == '__main__': 206 | fc() 207 | torch.cuda.empty_cache() 208 | all_layers() -------------------------------------------------------------------------------- /cub_200_2011.py: -------------------------------------------------------------------------------- 1 | """ 2 | Class of Dataset 3 | 4 | call get_train_validation_data_loader(resize_shape, batch_size, random_seed, 5 | augment=False, validation_size=0.3, 6 | object_boxes_dict=None, 7 | shuffle=True, show_sample=False) 8 | return (train_loader, valid_loader) 9 | call get_test_data_loader(resize_shape, batch_size, object_boxes_dict=None, shuffle=True) 10 | return (test_loader) 11 | 12 | """ 13 | 14 | 15 | import os 16 | 17 | import torch 18 | import numpy as np 19 | 20 | import utils 21 | 22 | from PIL import Image 23 | from torchvision import transforms 24 | from torch.utils.data import Dataset 25 | from torch.utils.data.sampler import SubsetRandomSampler 26 | 27 | use_less_data = False # this flag is just for debugging multiple-process task 28 | less_data_count_train = 500 29 | less_data_count_test = 400 30 | 31 | class BirdsDataset(Dataset): 32 | 33 | dataset_url = 'http://www.vision.caltech.edu/visipedia-data/CUB-200-2011/CUB_200_2011.tgz' 34 | tar_file_name = 'CUB_200_2011.tgz' 35 | root_dir = 'CUB_200_2011' 36 | img_dir = 'images' 37 | data_split_file_name = 'train_test_split.txt' 38 | image_path_file_name = 'images.txt' 39 | image_label_file_name = 'image_class_labels.txt' 40 | classes_file_name = 'classes.txt' 41 | 42 | def __init__(self, object_boxes_dict=None, train=True, transform=None): 43 | self.train = train 44 | self.transform = transform 45 | self.download() # download before loading 46 | 47 | train_indexes, test_indexes = self._get_train_test_indexes() 48 | img_label_dict = self._get_labels_of_images() 49 | img_path_dict = self._get_path_of_images() 50 | count = 0 51 | if train: 52 | self.image_indexes = [] 53 | self.train_data = [] 54 | self.train_labels = [] 55 | for i in train_indexes: 56 | if use_less_data and count == less_data_count_train: 57 | break 58 | self.image_indexes.append(i) 59 | img_path = os.path.join(self.root_dir, self.img_dir, img_path_dict[i]) 60 | img = self.__get_image_data(i, img_path, object_boxes_dict) 61 | self.train_data.append(img) 62 | self.train_labels.append(img_label_dict[i]) 63 | count += 1 64 | else: 65 | self.image_indexes = [] 66 | self.test_data = [] 67 | self.test_labels = [] 68 | for i in test_indexes: 69 | if use_less_data and count == less_data_count_test: 70 | break 71 | self.image_indexes.append(i) 72 | img_path = os.path.join(self.root_dir, self.img_dir, img_path_dict[i]) 73 | img = self.__get_image_data(i, img_path, object_boxes_dict) 74 | self.test_data.append(img) 75 | self.test_labels.append(img_label_dict[i]) 76 | count += 1 77 | 78 | def __getitem__(self, index): 79 | if self.train: 80 | data = self.train_data[index] 81 | label = self.train_labels[index] 82 | else: 83 | data = self.test_data[index] 84 | label = self.test_labels[index] 85 | if self.transform is not None: 86 | data = self.transform(data) 87 | return self.image_indexes[index], data, label 88 | 89 | def __len__(self): 90 | if self.train: 91 | return len(self.train_data) 92 | else: 93 | return len(self.test_data) 94 | 95 | def download(self): 96 | if self.__check_exist(): 97 | return 98 | utils.download_file(self.dataset_url, self.tar_file_name) 99 | utils.extract_tgz(self.tar_file_name) 100 | 101 | def __check_exist(self): 102 | return os.path.exists(self.root_dir + '/images') 103 | 104 | def __get_image_data(self, img_idx, fpath, object_boxes_dict): 105 | img = Image.open(fpath) 106 | img = img.convert('RGB') # it seems some of original images are png(???), which is ridiculous 107 | if object_boxes_dict is not None: 108 | box = object_boxes_dict[img_idx] # (x, y, w, h) 109 | box = (box[0], box[1], box[0] + box[2], box[1] + box[3]) # (left, upper, right, lower) 110 | img = img.crop(box) 111 | return img 112 | 113 | def _get_train_test_indexes(self): 114 | fpath = os.path.join(self.root_dir, self.data_split_file_name) 115 | train_indexes = [] 116 | test_indexes = [] 117 | with open(fpath, 'r') as file: 118 | for line in file: 119 | tmp = line.split(' ') 120 | flag = tmp[1][0] 121 | if flag == '1': 122 | train_indexes.append(int(tmp[0])) 123 | else: 124 | test_indexes.append(int(tmp[0])) 125 | return train_indexes, test_indexes 126 | 127 | def _get_path_of_images(self): 128 | fpath = os.path.join(self.root_dir, self.image_path_file_name) 129 | img_path_dict = {} 130 | with open(fpath, 'r') as file: 131 | for line in file: 132 | tmp = line.split(' ') 133 | img_path_dict[int(tmp[0])] = tmp[1].strip('\n') 134 | return img_path_dict 135 | 136 | def _get_labels_of_images(self): 137 | fpath = os.path.join(self.root_dir, self.image_label_file_name) 138 | img_label_dict = {} 139 | with open(fpath, 'r') as file: 140 | for line in file: 141 | tmp = line.split(' ') 142 | img_label_dict[int(tmp[0])] = int(tmp[1]) - 1 # starts from 0, otherwise pytorch will throw an exception when training... 143 | return img_label_dict 144 | 145 | def get_classes_names(self): 146 | fpath = os.path.join(self.root_dir, self.classes_file_name) 147 | classes_names = [] 148 | with open(fpath, 'r') as file: 149 | for line in file: 150 | tmp = line.split(' ') 151 | classes_names.append(tmp[1].strip('\n')) 152 | return classes_names 153 | 154 | 155 | def get_train_validation_data_loader(resize_size, batch_size, random_seed, 156 | augment=False, validation_size=0.3, 157 | object_boxes_dict=None, 158 | shuffle=True, show_sample=False): 159 | normalize = transforms.Normalize( 160 | mean=[0.485, 0.456, 0.406], 161 | std=[0.229, 0.224, 0.225], 162 | ) 163 | if augment: 164 | transforms_random_apply = transforms.RandomApply([ 165 | transforms.RandomChoice([ 166 | transforms.RandomHorizontalFlip(), 167 | transforms.RandomVerticalFlip(), 168 | transforms.RandomRotation(15), 169 | transforms.RandomRotation(60) 170 | ]), 171 | ], p=0.4) 172 | if isinstance(resize_size, int): 173 | # shorter edges should be scaled to this size and original ratio will be kept 174 | # as a result, we should also do a random crop 175 | train_transform = transforms.Compose([ 176 | transforms.Resize(resize_size), 177 | transforms_random_apply, 178 | transforms.RandomCrop(resize_size), 179 | transforms.ToTensor(), 180 | normalize 181 | ]) 182 | else: # should be a tuple like (224, 224) 183 | train_transform = transforms.Compose([ 184 | transforms.Resize(resize_size), 185 | transforms_random_apply, 186 | transforms.ToTensor(), 187 | normalize 188 | ]) 189 | else: 190 | if isinstance(resize_size, int): 191 | train_transform = transforms.Compose([ 192 | transforms.Resize(resize_size), 193 | transforms.RandomCrop(resize_size), 194 | transforms.ToTensor(), 195 | normalize 196 | ]) 197 | else: 198 | train_transform = transforms.Compose([ 199 | transforms.Resize(resize_size), 200 | transforms.ToTensor(), 201 | normalize 202 | ]) 203 | 204 | if isinstance(resize_size, int): # for validation, we should keep all information of an image 205 | resize_size = (resize_size, resize_size) 206 | valid_transform = transforms.Compose([ 207 | transforms.Resize(resize_size), 208 | transforms.ToTensor(), 209 | normalize 210 | ]) 211 | 212 | train_dataset = BirdsDataset( 213 | train=True, transform=train_transform, object_boxes_dict=object_boxes_dict) 214 | valid_dataset = BirdsDataset( 215 | train=True, transform=valid_transform, object_boxes_dict=object_boxes_dict) 216 | 217 | num_train = len(train_dataset) 218 | indices = list(range(num_train)) 219 | split = int(np.floor(validation_size * num_train)) 220 | 221 | if shuffle: 222 | np.random.seed(random_seed) 223 | np.random.shuffle(indices) 224 | 225 | train_idx, valid_idx = indices[split:], indices[:split] 226 | train_sampler = SubsetRandomSampler(train_idx) 227 | valid_sampler = SubsetRandomSampler(valid_idx) 228 | 229 | train_loader = torch.utils.data.DataLoader( 230 | train_dataset, batch_size=batch_size, sampler=train_sampler, num_workers=4 231 | ) 232 | valid_loader = torch.utils.data.DataLoader( 233 | valid_dataset, batch_size=batch_size, sampler=valid_sampler, num_workers=4 234 | ) 235 | 236 | # visualize some images 237 | if show_sample: 238 | sample_loader = torch.utils.data.DataLoader( 239 | train_dataset, batch_size=9, shuffle=shuffle 240 | ) 241 | data_iter = iter(sample_loader) 242 | images, labels = data_iter.next() 243 | X = images.numpy().transpose([0, 2, 3, 1]) 244 | utils.plot_images(train_dataset.get_classes_names(), X, labels) 245 | 246 | return train_loader, valid_loader 247 | 248 | 249 | def get_test_data_loader(resize_size, batch_size, object_boxes_dict=None, shuffle=True): 250 | normalize = transforms.Normalize( 251 | mean=[0.485, 0.456, 0.406], 252 | std=[0.229, 0.224, 0.225], 253 | ) 254 | if isinstance(resize_size, int): 255 | resize_size = (resize_size, resize_size) 256 | transform = transforms.Compose([ 257 | transforms.Resize(resize_size), 258 | transforms.ToTensor(), 259 | normalize 260 | ]) 261 | test_dataset = BirdsDataset( 262 | train=False, transform=transform, object_boxes_dict=object_boxes_dict) 263 | test_loader = torch.utils.data.DataLoader( 264 | test_dataset, batch_size=batch_size, shuffle=shuffle 265 | ) 266 | return test_loader 267 | -------------------------------------------------------------------------------- /helper.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 1.Valuate trained model(s) on dataset(s) and 3 | combine their results into one output prediction 4 | 5 | def evaluate(log, 6 | models, 7 | data_loaders, 8 | set_name, 9 | predict_weights=None, 10 | use_gpu=cuda.is_available(), 11 | cuda_device_idx=0): 12 | 13 | return acc 14 | 15 | 16 | 2.Train a model and evaluate it after training 17 | 18 | train_and_evaluate( 19 | log = None, 20 | model_name = 'resnet152', 21 | pre_model = None, 22 | use_pretrained_params = True, 23 | fine_tune_all_layers = False, 24 | 25 | data_loaders=None, 26 | is_object_level=False, 27 | 28 | num_epochs = 4, 29 | learning_rate = 1e-3, 30 | weight_decay = 5e-4, 31 | train_batch_size = 32, 32 | eval_epoch_step = 4, 33 | 34 | use_gpu = cuda.is_available(), 35 | cuda_device_idx = 0, 36 | use_multiple_gpu = False, 37 | 38 | save_model = True 39 | ): 40 | 41 | return model, train_acc, valid_acc, test_acc, model_path 42 | 43 | 44 | ''' 45 | 46 | import torch 47 | import torch.nn as nn 48 | import torch.cuda as cuda 49 | import torch.optim as optim 50 | from torchvision.models import * 51 | import torchvision.transforms as transforms 52 | 53 | import matplotlib.pyplot as plt 54 | 55 | import os, time 56 | 57 | import cub_200_2011 as dataset 58 | import utils 59 | 60 | def get_model_by_name(name, pretrained): 61 | if name == 'resnet18': return resnet18(pretrained=pretrained) 62 | if name == 'resnet34': return resnet34(pretrained=pretrained) 63 | if name == 'resnet50': return resnet50(pretrained=pretrained) 64 | if name == 'resnet101': return resnet101(pretrained=pretrained) 65 | if name == 'resnet152': return resnet152(pretrained=pretrained) 66 | if name == 'vgg16': return vgg16_bn(pretrained=pretrained) 67 | if name == 'vgg19': return vgg19_bn(pretrained=pretrained) 68 | if name == 'inception': return inception_v3(pretrained=pretrained) 69 | if name == 'densenet121': return densenet121(pretrained=pretrained) 70 | if name == 'densenet169': return densenet169(pretrained=pretrained) 71 | if name == 'densenet201': return densenet201(pretrained=pretrained) 72 | if name == 'densenet161': return densenet161(pretrained=pretrained) 73 | 74 | #in order to modify resnet.fc , used in 'replace_model_fc' 75 | resnet_block_dict = { 76 | 'resnet18': 1, 'resnet34': 1, 'resnet50': 1, 77 | 'resnet101': 4, 'resnet152': 4, 78 | } 79 | def replace_model_fc(model_name, model): 80 | """ Replace fully connected layer of a neural network model in order to correct output class number 81 | 82 | :param model_name: model's name 83 | :param model: model itself, a pytorch's nn.Module object 84 | """ 85 | if model_name.startswith('bilinear_densenet'): 86 | for param in model.conv.parameters(): 87 | param.requires_grad = True 88 | for param in model.bn.parameters(): 89 | param.requires_grad = True 90 | for param in model.fc.parameters(): 91 | param.requires_grad = True 92 | return None 93 | 94 | # change the num_classes to 200 95 | if model_name.startswith('resnet'): 96 | model.fc = nn.Linear(512 * resnet_block_dict[model_name], 200) 97 | elif model_name.startswith('vgg'): 98 | pass # todo find out how we can change num_classes to fine tune vgg 99 | elif model_name == 'inception': 100 | model.fc = nn.Linear(2048, 200) 101 | elif model_name.startswith('densenet'): 102 | model.classifier = nn.Linear(model.classifier.in_features, 200) 103 | elif model_name.startswith('bilinear_resnet'): 104 | if model_name.endswith('152'): 105 | model.conv2 = nn.Conv2d(2048, 512, 1) 106 | model.bn2 = nn.BatchNorm2d(512) 107 | model.fc = nn.Linear(512**2, 200) 108 | 109 | 110 | def get_model_parameters(model_name, model, pretrained, fine_tune_all_layers): 111 | """ Get model's parameters to optimize 112 | 113 | :param model_name: model's name 114 | :param model: model itself 115 | :param pretrained: True if we should use pretrained model parameters 116 | :param fine_tune_all_layers: True if we should fine tune all layers of the model 117 | """ 118 | if not pretrained or fine_tune_all_layers: 119 | return model.parameters() 120 | else: # fine tune only fully connected layer 121 | if model_name.startswith('resnet') or model_name.startswith('inception'): 122 | return model.fc.parameters() 123 | elif model_name.startswith('densenet'): 124 | return model.classifier.parameters() 125 | elif model_name.startswith('bilinear_densenet'): 126 | return list(model.conv.parameters()) + list(model.bn.parameters()) + list(model.fc.parameters()) 127 | elif model_name.startswith('bilinear'): 128 | if model_name.endswith('34'): 129 | return model.fc.parameters() 130 | elif model_name.endswith('152'): 131 | return list(model.conv2.parameters()) + list(model.bn2.parameters()) + list(model.fc.parameters()) 132 | else: # vgg 133 | pass 134 | 135 | 136 | def save_model_parameters(parameters, file_name_prefix): 137 | # parameters should come from model.state_dict() 138 | if not os.path.exists('models/'): 139 | os.makedirs('models/') 140 | fp = 'models/' + file_name_prefix + '_' + time.strftime("%m-%d-%H-%M", time.localtime()) + '.pth' 141 | torch.save(parameters, fp) 142 | return fp 143 | 144 | 145 | def save_evaluation_result(prefix, epochs_arr, losses, epochs_step_arr, train_accuracies, valid_accuracies): 146 | if not os.path.exists("result"): 147 | os.mkdir("result") 148 | 149 | post_fix = time.strftime("%m-%d-%H-%M", time.localtime()) 150 | 151 | plt.clf() # clear existing figure content 152 | plt.plot(epochs_arr, losses) 153 | plt.xlabel('epoch') 154 | plt.ylabel('loss') 155 | plt.savefig("result/" + prefix + "_loss_" + post_fix + ".png") 156 | 157 | plt.clf() # clear existing figure content 158 | plt.plot(epochs_step_arr, train_accuracies) 159 | plt.xlabel('epoch') 160 | plt.ylabel('train accuracy') 161 | plt.savefig("result/" + prefix + "_acc_train_" + post_fix + ".png") 162 | 163 | plt.clf() # clear existing figure content 164 | plt.plot(epochs_step_arr, valid_accuracies) 165 | plt.xlabel('epoch') 166 | plt.ylabel('validation accuracy') 167 | plt.savefig("result/" + prefix + "_acc_valid_" + post_fix + ".png") 168 | 169 | 170 | def predict(model_glb, img_pil, resize_shape=(224, 224), model_obj=None, obj_bounding_box=None, predict_weights=None, use_gpu=cuda.is_available(), cuda_device_idx=0): 171 | """ predict input's class 172 | 173 | :param model_glb: classification model for global level 174 | :param img_pil: image as PIL.Image 175 | :param resize_shape: resize shape 176 | :param model_obj: classification model for object level 177 | :param obj_bounding_box: as its name 178 | :param predict_weights: weights of different levels' prediction, [0] should be for global level 179 | :param use_gpu: as its name 180 | :param cuda_device_idx: as its name 181 | :return: the top5 [probabilities(???), indices] list 182 | """ 183 | 184 | img = img_pil.resize(resize_shape) 185 | img_obj = None 186 | if obj_bounding_box is not None: 187 | img_obj = img_pil.crop(obj_bounding_box) 188 | img_obj = img_obj.resize(resize_shape) 189 | 190 | normalize = transforms.Normalize( 191 | mean=[0.485, 0.456, 0.406], 192 | std=[0.229, 0.224, 0.225], 193 | ) 194 | transform = transforms.Compose([ 195 | transforms.ToTensor(), 196 | normalize 197 | ]) 198 | 199 | img_tensor_glb = transform(img) 200 | # tmp = img_tensor_glb.numpy().transpose([1, 2, 0]) 201 | # plt.imshow(tmp) 202 | # plt.show() 203 | 204 | img_tensor_glb = img_tensor_glb.unsqueeze(0) # convert shape (3, 224, 224) to (1, 3, 224, 224) 205 | img_tensor_obj = None 206 | if model_obj is not None: 207 | img_tensor_obj = transform(img_obj) 208 | # tmp = img_tensor_obj.numpy().transpose([1, 2, 0]) 209 | # plt.imshow(tmp) 210 | # plt.show() 211 | img_tensor_obj = img_tensor_obj.unsqueeze(0) # convert shape (3, 224, 224) to (1, 3, 224, 224) 212 | 213 | if use_gpu: 214 | cuda_device = torch.device('cuda', cuda_device_idx) 215 | img_tensor_glb = img_tensor_glb.cuda(cuda_device) 216 | model_glb = model_glb.cuda(cuda_device_idx) 217 | if model_obj is not None: 218 | img_tensor_obj = img_tensor_obj.cuda(cuda_device) 219 | model_obj = model_obj.cuda(cuda_device_idx) 220 | 221 | predict_prob_arr_glb = model_glb(img_tensor_glb) # probabilities 222 | predict_prob_arr = predict_prob_arr_glb 223 | if model_obj is not None: 224 | predict_prob_arr_obj = model_obj(img_tensor_obj) 225 | predict_prob_arr = predict_weights[0] * predict_prob_arr_glb + predict_weights[1] * predict_prob_arr_obj 226 | top5 = torch.topk(predict_prob_arr, 5) 227 | if use_gpu: 228 | top5 = [top5[0].cpu(), top5[1].cpu()] # back to cpu so that we can detach them 229 | probs = top5[0].detach().numpy() # 2-d nparray 230 | classes = top5[1].detach().numpy() 231 | probs = probs[0] 232 | classes = classes[0] 233 | list_prob = [probs[0], probs[1], probs[2], probs[3], probs[4], ] 234 | list_cls = [classes[0] + 1, classes[1] + 1, classes[2] + 1, classes[3] + 1, classes[4] + 1, ] 235 | return [list_prob, list_cls] 236 | 237 | 238 | def evaluate(logger, models, data_loaders, set_name, predict_weights=None, use_gpu=cuda.is_available(), cuda_device_idx=0, 239 | use_multiple_gpu=False): 240 | """ Evaluate trained model(s) on dataset(s) and combine their results into one output prediction 241 | Note: there should be an one-to-one match between models and data_loaders. 242 | 243 | :param logger: the utils.LoggerS object to print logs 244 | :param models: a list of models 245 | :param data_loaders: a list of dataset loaders. 246 | :param set_name: dataset's name, can be 'train_set', 'validation set' or 'test set' 247 | 248 | :param predict_weights: a list of weights for each models' prediction result 249 | Example: for a specific input, models[0] gives an output as [0.7, 0.3] (probability of class 0 and class 1), 250 | and models[1] gives [0.4, 0.6]. If the predict_weights is [0.2, 0.8], then the final output should be 251 | [0.7*0.2 + 0.4*0.8, 0.3*0.2 + 0.6*0.8], i.e. [0.46, 0.54], so the prediction is class 1 252 | 253 | :param use_gpu: use GPU to run the model or not 254 | :param cuda_device_idx: an int value that indicates which cuda device that we want to use for inputs 255 | 256 | :return: prediction accuracy 257 | """ 258 | for model in models: 259 | model.eval() # evaluation mode 260 | 261 | if predict_weights is None: 262 | predict_weights = [1] 263 | logger.info('computing classification accuracy on ' + set_name) 264 | _begin_time = time.time() 265 | #acc = correct_num / sample_num 266 | correct_num = 0 267 | sample_num = 0 268 | has_multiple_gpu = cuda.device_count() > 1 269 | cuda_device = None 270 | if use_gpu: 271 | cuda_device = torch.device('cuda', cuda_device_idx) 272 | with torch.no_grad(): 273 | labels_dict = {} 274 | predicts_dict = {} 275 | for i in range(len(models)): # each model is only valid on corresponding data loader 276 | model = models[i] 277 | data_loader = data_loaders[i] 278 | for data in data_loader: 279 | image_indexes, images, labels = data 280 | if has_multiple_gpu and use_gpu: 281 | if use_multiple_gpu: 282 | images = torch.autograd.Variable(images.cuda()) 283 | labels = torch.autograd.Variable(labels.cuda(async=True)) 284 | else: 285 | images = images.cuda(cuda_device) 286 | labels = labels.cuda(cuda_device) # shape is (batch_size, 1) 287 | 288 | batch_size = labels.size(0) 289 | predict = model(images) # shape is (batch_size, 200) 290 | 291 | if i == 0: 292 | sample_num += labels.size(0) 293 | for j in range(batch_size): 294 | img_idx = image_indexes[j].item() 295 | labels_dict[img_idx] = labels[j].item() 296 | predicts_dict[img_idx] = predict_weights[i] * predict.data[j] 297 | else: 298 | for j in range(batch_size): 299 | img_idx = image_indexes[j].item() 300 | predict_data = predict_weights[i] * predict.data[j] + predicts_dict[img_idx] 301 | predicts_dict[img_idx] = predict_data 302 | 303 | for img_idx in predicts_dict: 304 | label = labels_dict[img_idx] 305 | predict = predicts_dict[img_idx] 306 | predict_cls = torch.argmax(predict) 307 | if predict_cls == label: 308 | correct_num += 1 309 | 310 | for model in models: 311 | model.train() # back to train mode 312 | 313 | acc = 100.0 * correct_num / sample_num 314 | logger.info('accuracy: %.4f%%, cost time: %.4fs' % (acc, time.time() - _begin_time)) 315 | return acc 316 | 317 | 318 | # ----------------------- This is a very important method ---------------------- 319 | 320 | def train_and_evaluate( 321 | logger = None, 322 | 323 | model_name = 'resnet152', 324 | pre_model = None, 325 | use_pretrained_params = True, 326 | fine_tune_all_layers = False, 327 | 328 | data_loaders=None, 329 | is_object_level=False, 330 | 331 | num_epochs = 4, 332 | learning_rate = 1e-3, 333 | use_scheduler = False, 334 | weight_decay = 5e-4, 335 | train_batch_size = 32, 336 | eval_epoch_step = 4, 337 | 338 | use_gpu = cuda.is_available(), 339 | cuda_device_idx = 0, 340 | use_multiple_gpu = False, 341 | 342 | save_model = True 343 | ): 344 | """ Train a model and evaluate it after training 345 | 346 | :param logger: the utils.LoggerS object to print logs onto file and console 347 | :param model_name: model's name, used to create the model and help provide more detailed log 348 | :param pre_model: if this is not None, we will train and evaluate on it instead of creating a new model 349 | :param use_pretrained_params: True if we initialize the model with pretrained parameters 350 | :param fine_tune_all_layers: True if we want to fine tune all layers of the model 351 | 352 | :param data_loaders: a list of data loaders for train, validation and test set. The order must be correct 353 | :param is_object_level: as its name 354 | 355 | :param num_epochs: the number of training iterations on whole train set 356 | :param learning_rate: as its name 357 | :param weight_decay: as its name 358 | :param train_batch_size: batch size of train set 359 | :param eval_epoch_step: evaluation step 360 | 361 | :param use_gpu: use GPU to train/evaluate or not 362 | :param cuda_device_idx: an int value that indicates which cuda device that we want to use for inputs and model 363 | :param use_multiple_gpu: use multiple GPU to train/evaluate or not; todo currently this flag is useless 364 | 365 | :param save_model: True if we want to save the model that has best validation accuracy when training 366 | 367 | :return: trained model, accuracies on train, validation and test set, 368 | and stored model path if :param save_model is set to True 369 | """ 370 | # obj -- object 371 | # glb -- global 372 | # prtrn -- pretrain 373 | # ep -- epoch 374 | # bt -- batch_size 375 | if is_object_level: 376 | res_file_name_prefix = 'obj' 377 | else: 378 | res_file_name_prefix = 'glb' 379 | res_file_name_prefix += '_' + model_name 380 | if use_pretrained_params: 381 | res_file_name_prefix += '_prtrn' 382 | if fine_tune_all_layers: 383 | res_file_name_prefix += 'All' 384 | res_file_name_prefix += '_ep' + str(num_epochs) + '_bt' + str(train_batch_size) + '_' + str(learning_rate) 385 | if logger is None: 386 | logger = utils.get_logger(res_file_name_prefix) 387 | 388 | # get train/valid/test_loader 389 | if data_loaders is None: 390 | logger.info('start loading dataset') 391 | begin_time = time.time() 392 | train_loader, valid_loader = dataset.get_train_validation_data_loader( 393 | resize_size=224, 394 | batch_size=train_batch_size, 395 | random_seed=96, 396 | validation_size=0.2, 397 | object_boxes_dict=None, 398 | show_sample=False 399 | ) 400 | test_loader = dataset.get_test_data_loader( 401 | resize_size=224, 402 | batch_size=32, 403 | object_boxes_dict=None 404 | ) 405 | logger.info('loading dataset costs ' + str(time.time() - begin_time)) 406 | else: 407 | train_loader = data_loaders[0] 408 | valid_loader = data_loaders[1] 409 | test_loader = data_loaders[2] 410 | 411 | # Create nn model 412 | if pre_model is not None: 413 | model = pre_model 414 | # pre_model should have been trained 415 | if not fine_tune_all_layers: 416 | for param in model.parameters(): 417 | param.requires_grad = False 418 | replace_model_fc(model_name, model) 419 | else: 420 | model = get_model_by_name(model_name, use_pretrained_params) 421 | if use_pretrained_params and not fine_tune_all_layers: 422 | # only fine tune fully connected layer, which means we should not upgrade network layers except for last one 423 | for param in model.parameters(): 424 | param.requires_grad = False 425 | replace_model_fc(model_name, model) 426 | 427 | has_multiple_gpu = cuda.device_count() > 1 428 | 429 | cuda_device = None # declare this just in order to remove IDE warnings ... 430 | if use_gpu: 431 | if has_multiple_gpu and use_multiple_gpu: model = nn.DataParallel(model).cuda() 432 | else : 433 | model = model.cuda(cuda_device_idx) 434 | cuda_device = torch.device('cuda', cuda_device_idx) 435 | 436 | 437 | criterion = nn.CrossEntropyLoss().cuda() 438 | if has_multiple_gpu and use_multiple_gpu: _model=model.module 439 | else:_model=model 440 | optimizer = optim.SGD( 441 | get_model_parameters(model_name, _model, use_pretrained_params, fine_tune_all_layers), 442 | lr=learning_rate, 443 | momentum=0.9, 444 | weight_decay=weight_decay 445 | ) 446 | # Reduce learning rate when a metric has stopped improving. 447 | if use_scheduler is True: 448 | scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( 449 | optimizer, mode='min', factor=0.1, patience=3, verbose=True, threshold=1e-4 450 | ) 451 | 452 | 453 | logger.info('start training') 454 | train_cost_time = 0.0 455 | epochs_arr = [] 456 | losses_arr = [] 457 | epochs_step_arr = [] 458 | train_acc_arr = [] 459 | valid_acc_arr = [] 460 | best_valid_acc = 0.0 461 | best_valid_acc_model_params = None 462 | for epoch in range(num_epochs): 463 | running_loss = 0.0 464 | batch_num = 0 465 | for i, (_, inputs, labels) in enumerate(train_loader, 0): 466 | begin_time = time.time() 467 | # get the inputs 468 | 469 | if use_gpu: 470 | if has_multiple_gpu and use_multiple_gpu: 471 | inputs = torch.autograd.Variable(inputs.cuda()) 472 | labels = torch.autograd.Variable(labels.cuda(async=True)) 473 | else: 474 | inputs = inputs.cuda(cuda_device) 475 | labels = labels.cuda(cuda_device) 476 | 477 | 478 | # zero the parameter gradients 479 | optimizer.zero_grad() 480 | 481 | # forward + backward + optimize 482 | outputs = model(inputs) 483 | loss = criterion(outputs, labels) 484 | loss.backward() 485 | optimizer.step() 486 | running_loss += loss.item() 487 | # print statistics 488 | logger.info('[%d, %5d] loss: %.6f' % (epoch + 1, i + 1, loss.item())) 489 | cost_time_i = time.time() - begin_time 490 | train_cost_time += cost_time_i 491 | logger.info('cost time: %.4fs' % cost_time_i) 492 | batch_num = i 493 | if use_scheduler is True: 494 | scheduler.step(running_loss) 495 | epochs_arr.append(epoch + 1) 496 | losses_arr.append(running_loss / batch_num) 497 | if epoch == 0 or (epoch + 1) % eval_epoch_step == 0: # compute classification accuracy on train and validation set 498 | epochs_step_arr.append(epoch + 1) 499 | logger.info('') 500 | train_acc = evaluate(logger=logger, models=[model], data_loaders=[train_loader], 501 | set_name='train set', cuda_device_idx=cuda_device_idx, use_multiple_gpu=use_multiple_gpu) 502 | train_acc_arr.append(train_acc) 503 | valid_acc = evaluate(logger=logger, models=[model], data_loaders=[valid_loader], 504 | set_name='validation set', cuda_device_idx=cuda_device_idx, use_multiple_gpu=use_multiple_gpu) 505 | valid_acc_arr.append(valid_acc) 506 | if valid_acc > best_valid_acc: 507 | best_valid_acc = valid_acc 508 | best_valid_acc_model_params = model.state_dict() 509 | logger.info('') 510 | 511 | logger.info('Finished Training, cost time: %.4fs' % train_cost_time) 512 | logger.info('') 513 | 514 | test_acc = evaluate(logger=logger, models=[model], data_loaders=[test_loader], 515 | set_name='test set', cuda_device_idx=cuda_device_idx, use_multiple_gpu=use_multiple_gpu) 516 | logger.info('') 517 | 518 | save_evaluation_result(res_file_name_prefix, epochs_arr, losses_arr, epochs_step_arr, train_acc_arr, valid_acc_arr) 519 | 520 | saved_model_path = None 521 | if save_model: 522 | logger.info('') 523 | logger.info('saving model parameters') 524 | if is_object_level: 525 | model_file_name_prefix = 'obj_' 526 | else: 527 | model_file_name_prefix = 'glb_' 528 | model_file_name_prefix += model_name + ('_acc%.4f' % best_valid_acc) 529 | saved_model_path = save_model_parameters(best_valid_acc_model_params, model_file_name_prefix) 530 | logger.info('parameters have been saved successfully to ' + saved_model_path) 531 | logger.info('') 532 | 533 | return model, train_acc_arr[len(train_acc_arr) - 1], valid_acc_arr[len(valid_acc_arr) - 1], test_acc, saved_model_path 534 | 535 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import cub_200_2011 as dataset 2 | import helper, utils 3 | import model_global as glb, model_object as obj 4 | import time 5 | import torch 6 | import torch.multiprocessing as mp 7 | 8 | train_batch_size = 32 9 | test_batch_size = 32 10 | random_seed = 96 11 | validation_size = 0.1 12 | predict_weights = [0.2, 0.8] 13 | 14 | use_multiple_gpu = False # run global model on 1 gpu and object-level model on another 15 | pre_models = [None,None] 16 | 17 | def print_summary_log(logger, trn_acc_glb, val_acc_glb, tst_acc_glb, trn_acc_obj, val_acc_obj, tst_acc_obj): 18 | logger.info('') 19 | 20 | logger.info('global-level model: ' + str(glb.model_name)) 21 | logger.info('pretrained: ' + str(glb.use_pretrained_params)) 22 | logger.info('fine tune all layers: ' + str(glb.fine_tune_all_layers)) 23 | logger.info('epochs: ' + str(glb.num_epochs)) 24 | logger.info('batch size: ' + str(train_batch_size)) 25 | logger.info('learning rate: ' + str(glb.learning_rate)) 26 | logger.info('prediction accuracy: %.4f%%, %.4f%%, %.4f%%' % (trn_acc_glb, val_acc_glb, tst_acc_glb)) 27 | 28 | logger.info('') 29 | 30 | logger.info('object-level model: ' + str(obj.model_name)) 31 | logger.info('pretrained: ' + str(obj.use_pretrained_params)) 32 | logger.info('fine tune all layers: ' + str(obj.fine_tune_all_layers)) 33 | logger.info('epochs: ' + str(obj.num_epochs)) 34 | logger.info('batch size: ' + str(train_batch_size)) 35 | logger.info('learning rate: ' + str(obj.learning_rate)) 36 | logger.info('prediction accuracy: %.4f%%, %.4f%%, %.4f%%' % (trn_acc_obj, val_acc_obj, tst_acc_obj)) 37 | 38 | 39 | def evaluate(logger, models, train_loaders, validation_loaders, test_loaders): 40 | logger.info('') 41 | logger.info('evaluating model on multiple sets combining both global-level and object-level models\' predictions') 42 | logger.info('predict weights: ' + str(predict_weights[0]) + ', ' + str(predict_weights[1])) 43 | begin_time = time.time() 44 | 45 | helper.evaluate( 46 | logger=logger, 47 | models=models, 48 | data_loaders=train_loaders, 49 | set_name='train set', 50 | predict_weights=predict_weights 51 | ) 52 | helper.evaluate( 53 | logger=logger, 54 | models=models, 55 | data_loaders=validation_loaders, 56 | set_name='validation set', 57 | predict_weights=predict_weights 58 | ) 59 | helper.evaluate( 60 | logger=logger, 61 | models=models, 62 | data_loaders=test_loaders, 63 | set_name='test set', 64 | predict_weights=predict_weights 65 | ) 66 | 67 | logger.info('evaluation has been done! total time: %.4fs' % (time.time() - begin_time)) 68 | 69 | 70 | def get_model_with_saved_parameters(model_path_glb, model_path_obj): 71 | model_glb = helper.get_model_by_name(glb.model_name, pretrained=False) 72 | helper.replace_model_fc(glb.model_name, model_glb) 73 | model_glb.load_state_dict(torch.load(model_path_glb)) 74 | model_glb = model_glb.cuda() 75 | 76 | model_obj = helper.get_model_by_name(obj.model_name, pretrained=False) 77 | helper.replace_model_fc(obj.model_name, model_obj) 78 | model_obj.load_state_dict(torch.load(model_path_obj)) 79 | model_obj = model_obj.cuda() 80 | 81 | return model_glb, model_obj 82 | 83 | 84 | def run_on_single_gpu(logger, data_loaders_glb, data_loaders_obj, 85 | train_loaders, valid_loaders, test_loaders, pre_models, fine_tune_all_layers, num_epochs): 86 | # if you want to change hyper-parameters like number of epochs or learning rate for each level's training, 87 | # please go to corresponding module file 88 | _, trn_acc_glb, val_acc_glb, tst_acc_glb, model_path_glb = glb.get_trained_model_global( 89 | logger=logger, data_loaders=data_loaders_glb, train_batch_size=train_batch_size, 90 | save_model=True, pre_model=pre_models[0], fine_tune_all_layers=fine_tune_all_layers, num_epochs=num_epochs) 91 | _, trn_acc_obj, val_acc_obj, tst_acc_obj, model_path_obj = obj.get_trained_model_object( 92 | logger=logger, data_loaders=data_loaders_obj, train_batch_size=train_batch_size, 93 | save_model=True, pre_model=pre_models[1], fine_tune_all_layers=fine_tune_all_layers, num_epochs=num_epochs) 94 | 95 | print_summary_log(logger, trn_acc_glb, val_acc_glb, tst_acc_glb, trn_acc_obj, val_acc_obj, tst_acc_obj) 96 | model_glb, model_obj = get_model_with_saved_parameters(model_path_glb, model_path_obj) 97 | evaluate( 98 | logger=logger, 99 | models=[model_glb, model_obj], 100 | train_loaders=train_loaders, 101 | validation_loaders=valid_loaders, 102 | test_loaders=test_loaders 103 | ) 104 | return model_glb, model_obj 105 | 106 | def target_model_global(q_glb, data_loaders_glb, pre_model, fine_tune_all_layers, num_epochs): 107 | logger_glb = glb.get_logger(train_batch_size, add_console_log_prefix=True) 108 | logger_glb.info('target model global starts') 109 | _, trn_acc_glb, val_acc_glb, tst_acc_glb, model_path_glb = glb.get_trained_model_global( 110 | logger=logger_glb, data_loaders=data_loaders_glb, train_batch_size=train_batch_size, 111 | cuda_device_idx=0, save_model=True, pre_model=pre_model, fine_tune_all_layers=fine_tune_all_layers, 112 | num_epochs=num_epochs ) 113 | q_glb.put(trn_acc_glb) 114 | q_glb.put(val_acc_glb) 115 | q_glb.put(tst_acc_glb) 116 | q_glb.put(model_path_glb) 117 | logger_glb.info('target model global stops') 118 | 119 | def target_model_object(q_obj, data_loaders_obj, pre_model, fine_tune_all_layers, num_epochs): 120 | logger_obj = obj.get_logger(train_batch_size, add_console_log_prefix=True) 121 | logger_obj.info('target model object starts') 122 | _, trn_acc_obj, val_acc_obj, tst_acc_obj, model_path_obj = obj.get_trained_model_object( 123 | logger=logger_obj, data_loaders=data_loaders_obj, train_batch_size=train_batch_size, 124 | cuda_device_idx=1, save_model=True, pre_model=pre_model,fine_tune_all_layers=fine_tune_all_layers, 125 | num_epochs=num_epochs ) 126 | q_obj.put(trn_acc_obj) 127 | q_obj.put(val_acc_obj) 128 | q_obj.put(tst_acc_obj) 129 | q_obj.put(model_path_obj) 130 | logger_obj.info('target model object stops') 131 | 132 | def run_on_multiple_gpus(logger, data_loaders_glb, data_loaders_obj, 133 | train_loaders, valid_loaders, test_loaders, pre_models, fine_tune_all_layers, num_epochs): 134 | 135 | q_glb = mp.Queue() # store models and accuracies 136 | q_obj = mp.Queue() 137 | process_glb = mp.Process(target=target_model_global, 138 | args=(q_glb, data_loaders_glb, pre_models[0], fine_tune_all_layers, num_epochs,)) 139 | process_obj = mp.Process(target=target_model_object, 140 | args=(q_obj, data_loaders_obj, pre_models[1], fine_tune_all_layers, num_epochs,)) 141 | 142 | process_glb.start() 143 | process_obj.start() 144 | 145 | process_glb.join() # join current process(main process), then current process will stop until process_glb finishes 146 | process_obj.join() 147 | 148 | trn_acc_glb = q_glb.get() # FIFO 149 | val_acc_glb = q_glb.get() 150 | tst_acc_glb = q_glb.get() 151 | model_path_glb = q_glb.get() 152 | 153 | trn_acc_obj = q_obj.get() 154 | val_acc_obj = q_obj.get() 155 | tst_acc_obj = q_obj.get() 156 | model_path_obj = q_obj.get() 157 | 158 | print_summary_log(logger, trn_acc_glb, val_acc_glb, tst_acc_glb, trn_acc_obj, val_acc_obj, tst_acc_obj) 159 | model_glb, model_obj = get_model_with_saved_parameters(model_path_glb, model_path_obj) 160 | evaluate( 161 | logger=logger, 162 | models=[model_glb, model_obj], 163 | train_loaders=train_loaders, 164 | validation_loaders=valid_loaders, 165 | test_loaders=test_loaders 166 | ) 167 | return model_glb, model_obj 168 | 169 | if __name__ == "__main__": 170 | log_file_name_prefix = 'combined' 171 | logger = utils.get_logger(log_file_name_prefix) 172 | 173 | logger.info('start loading dataset') 174 | begin_time = time.time() 175 | train_loader_glb, valid_loader_glb = dataset.get_train_validation_data_loader( 176 | resize_size=224, # apply random crop for train set 177 | batch_size=train_batch_size, 178 | random_seed=random_seed, 179 | augment=True, 180 | validation_size=validation_size, 181 | object_boxes_dict=None, 182 | show_sample=False 183 | ) 184 | test_loader_glb = dataset.get_test_data_loader( 185 | resize_size=224, # no any crop 186 | batch_size=test_batch_size, 187 | object_boxes_dict=None 188 | ) 189 | 190 | bounding_boxes = utils.get_annotated_bounding_boxes() 191 | train_loader_obj, valid_loader_obj = dataset.get_train_validation_data_loader( 192 | resize_size=(224, 224), # for object level model, we don't need cropping any more! 193 | batch_size=train_batch_size, 194 | random_seed=random_seed, 195 | augment=True, 196 | validation_size=validation_size, 197 | object_boxes_dict=bounding_boxes, 198 | show_sample=False 199 | ) 200 | test_loader_obj = dataset.get_test_data_loader( 201 | resize_size=224, 202 | batch_size=test_batch_size, 203 | object_boxes_dict=bounding_boxes 204 | ) 205 | logger.info('loading dataset costs %.4fs' % (time.time() - begin_time)) 206 | 207 | data_loaders_glb = [train_loader_glb, valid_loader_glb, test_loader_glb] 208 | data_loaders_obj = [train_loader_obj, valid_loader_obj, test_loader_obj] 209 | 210 | train_loaders = [train_loader_glb, train_loader_obj] 211 | valid_loaders = [valid_loader_glb, valid_loader_obj] 212 | test_loaders = [test_loader_glb, test_loader_obj] 213 | pre_models = [None, None] 214 | 215 | # test: it seems ResNet is better for global model and DenseNet better for object-level model 216 | glb.model_name = 'resnet152' 217 | obj.model_name = 'densenet161' 218 | fine_tune_all_layers=False 219 | glb.use_multiple_gpu=False 220 | obj.use_multiple_gpu=False 221 | num_epochs = 160 222 | if not use_multiple_gpu: 223 | pre_models[0], pre_models[1] = run_on_single_gpu(logger, data_loaders_glb, data_loaders_obj, 224 | train_loaders, valid_loaders, test_loaders, pre_models, fine_tune_all_layers, num_epochs) 225 | else: 226 | mp.set_start_method('spawn') # CUDA requires this 227 | pre_models[0], pre_models[1] = run_on_multiple_gpus(logger, data_loaders_glb, data_loaders_obj, 228 | train_loaders, valid_loaders, test_loaders, pre_models, fine_tune_all_layers, num_epochs) 229 | 230 | fine_tune_all_layers = True 231 | num_epochs = 120 232 | if not use_multiple_gpu: 233 | run_on_single_gpu(logger, data_loaders_glb, data_loaders_obj, train_loaders, valid_loaders, test_loaders, pre_models, fine_tune_all_layers, num_epochs) 234 | else: 235 | run_on_multiple_gpus(logger, data_loaders_glb, data_loaders_obj, train_loaders, valid_loaders, test_loaders, pre_models, fine_tune_all_layers, num_epochs) 236 | 237 | 238 | -------------------------------------------------------------------------------- /model_global.py: -------------------------------------------------------------------------------- 1 | import cub_200_2011 as dataset 2 | import helper, utils 3 | import time 4 | import torch 5 | 6 | model_name = 'resnet152' 7 | use_pretrained_params = True 8 | fine_tune_all_layers = False 9 | 10 | num_epochs = 100 11 | # use to generate same train/validation data splits 12 | random_seed = 96 13 | # we use a part of train set as validation set 14 | validation_size = 0.1 15 | learning_rate = 8e-4 16 | weight_decay = 5e-4 17 | eval_epoch_step = 4 18 | 19 | use_gpu = True 20 | cuda_device_idx=0 21 | use_multiple_gpu = False 22 | 23 | 24 | def get_logger(train_batch_size, add_console_log_prefix=False): 25 | log_file_name_prefix = 'glb_' + model_name 26 | if use_pretrained_params: 27 | log_file_name_prefix += '_prtrn' 28 | if fine_tune_all_layers: 29 | log_file_name_prefix += 'All' 30 | log_file_name_prefix += '_ep' + str(num_epochs) + '_bt' + str(train_batch_size) + '_' + str(learning_rate) 31 | if add_console_log_prefix: 32 | return utils.get_logger(log_file_name_prefix, 'glb_' + model_name) 33 | else: 34 | return utils.get_logger(log_file_name_prefix) 35 | 36 | 37 | # Why do we need param train_batch_size? Because log output must be precise 38 | def get_trained_model_global(logger, data_loaders, train_batch_size, cuda_device_idx=cuda_device_idx, save_model=True, 39 | pre_model=None, fine_tune_all_layers=fine_tune_all_layers, num_epochs=num_epochs): 40 | # if pre_model is None: 41 | # use_pretrained_params = True 42 | # fine_tune_all_layers = False 43 | # else : 44 | # use_pretrained_params = False 45 | # fine_tune_all_layers = True 46 | 47 | return helper.train_and_evaluate( 48 | logger=logger, 49 | 50 | model_name=model_name, 51 | pre_model=pre_model, 52 | use_pretrained_params=use_pretrained_params, 53 | fine_tune_all_layers=fine_tune_all_layers, 54 | 55 | data_loaders=data_loaders, 56 | is_object_level=False, 57 | 58 | num_epochs=num_epochs, 59 | learning_rate=learning_rate, 60 | weight_decay=weight_decay, 61 | train_batch_size=train_batch_size, # this is actually useless when we explicitly provide data_loaders 62 | eval_epoch_step=eval_epoch_step, 63 | 64 | use_gpu=use_gpu, 65 | cuda_device_idx=cuda_device_idx, 66 | use_multiple_gpu=use_multiple_gpu, 67 | 68 | save_model=save_model 69 | ) 70 | 71 | 72 | if __name__ == "__main__": 73 | train_batch_size = 32 74 | test_batch_size = 32 75 | 76 | logger = get_logger(train_batch_size) 77 | 78 | logger.info('start loading dataset') 79 | begin_time = time.time() 80 | train_loader, valid_loader = dataset.get_train_validation_data_loader( 81 | resize_size=224, 82 | batch_size=train_batch_size, 83 | random_seed=random_seed, 84 | augment=True, 85 | validation_size=validation_size, 86 | object_boxes_dict=None, 87 | show_sample=False 88 | ) 89 | test_loader = dataset.get_test_data_loader( 90 | resize_size=224, 91 | batch_size=test_batch_size, 92 | object_boxes_dict=None 93 | ) 94 | logger.info('loading dataset costs %.4fs' % (time.time() - begin_time)) 95 | 96 | # first training process for fc layer's parameters 97 | fine_tune_all_layers = False 98 | num_epochs = 100 99 | _, _, _, _, model_path = get_trained_model_global( 100 | logger=logger, 101 | data_loaders=[train_loader, valid_loader, test_loader], 102 | train_batch_size=train_batch_size, 103 | cuda_device_idx=cuda_device_idx, 104 | fine_tune_all_layers=fine_tune_all_layers, 105 | num_epochs=num_epochs 106 | ) 107 | 108 | logger.info('training for fc layer finished successfully') 109 | logger.info('model: ' + model_name) 110 | logger.info('pretrained: ' + str(use_pretrained_params)) 111 | logger.info('fine tune all layers: ' + str(fine_tune_all_layers)) 112 | logger.info('epochs: ' + str(num_epochs)) 113 | logger.info('batch size: ' + str(train_batch_size)) 114 | logger.info('learning rate: ' + str(learning_rate)) 115 | 116 | 117 | # second training process for all layers' parameters 118 | fine_tune_all_layers = True 119 | num_epochs = 60 120 | pre_model = helper.get_model_by_name(model_name, False) 121 | helper.replace_model_fc(model_name, pre_model) 122 | pre_model.load_state_dict(torch.load(model_path)) 123 | 124 | get_trained_model_global( 125 | logger=logger, 126 | data_loaders=[train_loader, valid_loader, test_loader], 127 | train_batch_size=train_batch_size, 128 | pre_model=pre_model, 129 | cuda_device_idx=cuda_device_idx, 130 | fine_tune_all_layers=fine_tune_all_layers, 131 | num_epochs=num_epochs 132 | ) 133 | 134 | logger.info('training for all layers finished successfully') 135 | logger.info('model: ' + model_name) 136 | logger.info('pretrained: ' + str(use_pretrained_params)) 137 | logger.info('fine tune all layers: ' + str(fine_tune_all_layers)) 138 | logger.info('epochs: ' + str(num_epochs)) 139 | logger.info('batch size: ' + str(train_batch_size)) 140 | logger.info('learning rate: ' + str(learning_rate)) -------------------------------------------------------------------------------- /model_object.py: -------------------------------------------------------------------------------- 1 | import cub_200_2011 as dataset 2 | import helper, utils 3 | import time 4 | 5 | model_name = 'densenet161' 6 | use_pretrained_params = True 7 | fine_tune_all_layers = False 8 | 9 | num_epochs = 100 10 | # use to generate same train/validation data splits 11 | random_seed = 96 12 | # we use a part of train set as validation set 13 | validation_size = 0.15 14 | learning_rate = 8e-4 15 | weight_decay = 5e-4 16 | eval_epoch_step = 4 17 | 18 | use_gpu = True 19 | cuda_device_idx=0 20 | use_multiple_gpu = False 21 | 22 | 23 | def get_logger(train_batch_size, add_console_log_prefix=False): 24 | log_file_name_prefix = 'obj_' + model_name 25 | if use_pretrained_params: 26 | log_file_name_prefix += '_prtrn' 27 | if fine_tune_all_layers: 28 | log_file_name_prefix += 'All' 29 | log_file_name_prefix += '_ep' + str(num_epochs) + '_bt' + str(train_batch_size) + '_' + str(learning_rate) 30 | if add_console_log_prefix: 31 | return utils.get_logger(log_file_name_prefix, 'obj_' + model_name) 32 | else: 33 | return utils.get_logger(log_file_name_prefix) 34 | 35 | 36 | # in fact I don't like writing train_batch_size here...QAQ 37 | def get_trained_model_object(logger, data_loaders, train_batch_size, cuda_device_idx=cuda_device_idx, save_model=True, 38 | pre_model=None, fine_tune_all_layers=fine_tune_all_layers, num_epochs=num_epochs): 39 | # if pre_model is None: 40 | # use_pretrained_params = True 41 | # fine_tune_all_layers = False 42 | # else : 43 | # use_pretrained_params = False 44 | # fine_tune_all_layers = True 45 | 46 | return helper.train_and_evaluate( 47 | logger=logger, 48 | 49 | model_name=model_name, 50 | pre_model=pre_model, 51 | use_pretrained_params=use_pretrained_params, 52 | fine_tune_all_layers=fine_tune_all_layers, 53 | 54 | data_loaders=data_loaders, 55 | is_object_level=True, 56 | 57 | num_epochs=num_epochs, 58 | learning_rate=learning_rate, 59 | weight_decay=weight_decay, 60 | train_batch_size=train_batch_size, 61 | eval_epoch_step=eval_epoch_step, 62 | 63 | use_gpu=use_gpu, 64 | cuda_device_idx=cuda_device_idx, 65 | use_multiple_gpu=use_multiple_gpu, 66 | 67 | save_model=save_model 68 | ) 69 | 70 | 71 | if __name__ == "__main__": 72 | train_batch_size = 32 73 | test_batch_size = 32 74 | 75 | logger = get_logger(train_batch_size) 76 | 77 | logger.info('start loading dataset') 78 | begin_time = time.time() 79 | bounding_boxes = utils.get_annotated_bounding_boxes() 80 | train_loader, valid_loader = dataset.get_train_validation_data_loader( 81 | resize_size=224, 82 | batch_size=train_batch_size, 83 | random_seed=random_seed, 84 | augment=True, 85 | validation_size=validation_size, 86 | object_boxes_dict=bounding_boxes, 87 | show_sample=False 88 | ) 89 | test_loader = dataset.get_test_data_loader( 90 | resize_size=224, 91 | batch_size=test_batch_size, 92 | object_boxes_dict=bounding_boxes 93 | ) 94 | logger.info('loading dataset costs %.4fs' % (time.time() - begin_time)) 95 | 96 | get_trained_model_object( 97 | logger=logger, 98 | data_loaders=[train_loader, valid_loader, test_loader], 99 | train_batch_size=train_batch_size 100 | ) 101 | 102 | logger.info('model: ' + model_name) 103 | logger.info('pretrained: ' + str(use_pretrained_params)) 104 | logger.info('fine tune all layers: ' + str(fine_tune_all_layers)) 105 | logger.info('epochs: ' + str(num_epochs)) 106 | logger.info('batch size: ' + str(train_batch_size)) 107 | logger.info('learning rate: ' + str(learning_rate)) -------------------------------------------------------------------------------- /model_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | 5 | import torch 6 | import torchvision 7 | import time 8 | import cub_200_2011 as dataset 9 | 10 | torch.manual_seed(0) 11 | torch.cuda.manual_seed_all(0) 12 | 13 | class BCNN(torch.nn.Module): 14 | 15 | def __init__(self): 16 | """Declare all needed layers.""" 17 | torch.nn.Module.__init__(self) 18 | resnet_model = torchvision.models.resnet34(pretrained=False) 19 | self.conv1 = resnet_model.conv1 20 | self.bn1 = resnet_model.bn1 21 | self.relu = resnet_model.relu 22 | self.maxpool = resnet_model.maxpool 23 | self.layer1 = resnet_model.layer1 24 | self.layer2 = resnet_model.layer2 25 | self.layer3 = resnet_model.layer3 26 | self.layer4 = resnet_model.layer4 27 | # Linear classifier. 28 | self.fc = torch.nn.Linear(512**2, 200) 29 | # Initialize the fc layers. 30 | 31 | def forward(self, X): 32 | 33 | N = X.size()[0] 34 | assert X.size() == (N, 3, 448, 448) 35 | x = self.conv1(X) 36 | x = self.bn1(x) 37 | x = self.relu(x) 38 | x = self.maxpool(x) 39 | 40 | x = self.layer1(x) 41 | x = self.layer2(x) 42 | x = self.layer3(x) 43 | X = self.layer4(x) 44 | assert X.size() == (N, 512, 14, 14) 45 | X = X.view(N, 512, 14**2) 46 | X = torch.bmm(X, torch.transpose(X, 1, 2)) / (14**2) # Bilinear 47 | assert X.size() == (N, 512, 512) 48 | X = X.view(N, 512**2) 49 | X = torch.sqrt(X + 1e-5) 50 | X = torch.nn.functional.normalize(X) 51 | X = self.fc(X) 52 | assert X.size() == (N, 200) 53 | return X 54 | def freeze_layers(self): 55 | # Freeze all previous layers. 56 | for param in self.conv1.parameters(): 57 | param.requires_grad = False 58 | for param in self.bn1.parameters(): 59 | param.requires_grad = False 60 | for param in self.layer1.parameters(): 61 | param.requires_grad = False 62 | for param in self.layer2.parameters(): 63 | param.requires_grad = False 64 | for param in self.layer3.parameters(): 65 | param.requires_grad = False 66 | for param in self.layer4.parameters(): 67 | param.requires_grad = False 68 | class BCNNManager(object): 69 | """Manager class to train bilinear CNN. 70 | 71 | Attributes: 72 | _options: Hyperparameters. 73 | _path: Useful paths. 74 | _net: Bilinear CNN. 75 | _criterion: Cross-entropy loss. 76 | _solver: SGD with momentum. 77 | _scheduler: Reduce learning rate by a fator of 0.1 when plateau. 78 | _train_loader: Training data. 79 | _test_loader: Testing data. 80 | """ 81 | def __init__(self, path): 82 | """Prepare the network, criterion, solver, and data. 83 | 84 | Args: 85 | options, dict: Hyperparameters. 86 | """ 87 | print('Prepare the network and data.') 88 | self._path = path 89 | # Network. 90 | self._net = torch.nn.DataParallel(BCNN()).cuda() 91 | self._net.module.freeze_layers() 92 | self._net.load_state_dict(torch.load(self._path)) 93 | 94 | self._test_loader = dataset.get_test_data_loader( 95 | resize_size=448, 96 | batch_size=32, 97 | object_boxes_dict=None 98 | ) 99 | 100 | def test(self): 101 | """Train the network.""" 102 | print('Testing.') 103 | test_acc = 1.0 * self._accuracy(self._test_loader) 104 | print("Test acc: %.4f" % test_acc) 105 | 106 | def _accuracy(self, data_loader): 107 | """Compute the train/test accuracy. 108 | 109 | Args: 110 | data_loader: Train/Test DataLoader. 111 | 112 | Returns: 113 | Train/Test accuracy in percentage. 114 | """ 115 | self._net.train(False) 116 | num_correct = 0 117 | num_total = 0 118 | for i, (_, X, y) in enumerate(data_loader, 0): 119 | # Data. 120 | X = torch.autograd.Variable(X.cuda()) 121 | y = torch.autograd.Variable(y.cuda(async=True)) 122 | 123 | # Prediction. 124 | score = self._net(X) 125 | _, prediction = torch.max(score.data, 1) 126 | num_total += y.size(0) 127 | num_correct += torch.sum(prediction == y.data).float() 128 | return 100.0 * num_correct / num_total 129 | 130 | 131 | def test(): 132 | 133 | path_save='models/resnet_34_all.pth' 134 | manager = BCNNManager(path_save) 135 | manager.test() 136 | 137 | if __name__ == '__main__': 138 | #dataset.use_less_data=True 139 | test() 140 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import sys, os, time 2 | import logging 3 | import requests 4 | import tarfile 5 | import matplotlib.pyplot as plt 6 | plt.switch_backend('agg') 7 | 8 | 9 | def download_file(url, filename): 10 | print('Downloading ' + filename + ' from ' + url) 11 | with open(filename, 'wb') as file: 12 | resp = requests.get(url, stream=True) 13 | # file.write(resp.content) 14 | # reference: https://stackoverflow.com/questions/15644964/python-progress-bar-and-downloads 15 | total_length = resp.headers.get('content-length') 16 | if total_length is None: # no content length header 17 | file.write(resp.content) 18 | else: 19 | dl = 0 20 | total_length = int(total_length) 21 | for data in resp.iter_content(chunk_size=4096): 22 | file.write(data) 23 | dl += len(data) 24 | done = int(50 * dl / total_length) 25 | sys.stdout.write("\r[%s%s] %d%%" % ('=' * done, ' ' * (50 - done), done * 2)) 26 | sys.stdout.flush() 27 | print() 28 | print(filename + ' has been downloaded successfully!') 29 | 30 | 31 | def extract_tgz(filename): 32 | print('Extracting ' + filename + ' ...') 33 | tar = tarfile.open(filename, 'r:gz') 34 | tar.extractall() 35 | tar.close() 36 | print(filename + ' has been extracted successfully!') 37 | 38 | 39 | def plot_images(class_names, images, classes_true, classes_pred=None): 40 | """ 41 | Adapted from https://github.com/Hvass-Labs/TensorFlow-Tutorials/ 42 | """ 43 | fig, axes = plt.subplots(3, 3) 44 | for i, ax in enumerate(axes.flat): 45 | # plot img 46 | ax.imshow(images[i, :, :, :], interpolation='spline16') 47 | # show true & predicted classes 48 | cls_true_name = class_names[classes_true[i]] 49 | if classes_pred is None: 50 | xlabel = "{0} ({1})".format(cls_true_name, classes_true[i]) 51 | else: 52 | cls_pred_name = class_names[classes_pred[i]] 53 | xlabel = "True: {0}\nPred: {1}".format( 54 | cls_true_name, cls_pred_name 55 | ) 56 | ax.set_xlabel(xlabel) 57 | ax.set_xticks([]) 58 | ax.set_yticks([]) 59 | plt.show() 60 | 61 | 62 | def get_annotated_bounding_boxes(): 63 | fp = 'CUB_200_2011/bounding_boxes.txt' 64 | boxes = {} 65 | with open(fp, 'r') as file: 66 | for line in file: 67 | arr = line.split(' ') 68 | boxes[int(arr[0])] = (float(arr[1]), float(arr[2]), float(arr[3]), float(arr[4])) 69 | return boxes 70 | 71 | 72 | # deprecated 73 | def get_logging(log_file_name_prefix): 74 | if not os.path.exists('logs/'): 75 | os.makedirs('logs/') 76 | time_str = time.strftime("%m-%d-%H-%M", time.localtime()) 77 | logging.basicConfig(level=logging.INFO, 78 | format='%(asctime)s %(message)s', 79 | datefmt='%m-%d %H:%M', 80 | filename='logs/' + log_file_name_prefix + '_' + time_str + '.log') 81 | # define a Handler which writes INFO messages or higher to the sys.stderr 82 | # console = logging.StreamHandler() 83 | # console.setLevel(logging.DEBUG) 84 | # # set a format which is simpler for console use 85 | # formatter = logging.Formatter('%(message)s') 86 | # # tell the handler to use this format 87 | # console.setFormatter(formatter) 88 | # # add the handler to the root logger 89 | # logging.getLogger('').addHandler(console) 90 | return logging 91 | 92 | 93 | class LoggerS: # Logger S, Logger Plus, Logger X, Logger X Plus~ 94 | 95 | def __init__(self, logging, console_msg_prefix=None): 96 | self.logging = logging 97 | self.console_msg_prefix = console_msg_prefix 98 | 99 | def info(self, msg): # i stands for info 100 | self.logging.info(msg) 101 | if self.console_msg_prefix is None: 102 | print(msg) 103 | else: 104 | print(self.console_msg_prefix + ' -> ' + msg) 105 | 106 | 107 | def get_logger(log_file_name_prefix, console_msg_prefix=None): 108 | return LoggerS(get_logging(log_file_name_prefix), console_msg_prefix) --------------------------------------------------------------------------------