├── .gitignore
├── gen_dataset.py
├── threshold.py
├── src
    ├── Test.py
    ├── Train.py
    ├── Network.py
    └── Utils.py
├── README.md
└── reference
    ├── PascalLoader.py
    ├── PascalNetwork.py
    └── Loader.py


/.gitignore:
--------------------------------------------------------------------------------
1 | */**/*checkpoint*
2 | .idea
3 | __pycache__
4 | dataset/
5 | checkpoints/
6 | *.jpg
7 | *.out


--------------------------------------------------------------------------------
/gen_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import argparse
 4 | 
 5 | 
 6 | def gen_dataset(dataset_root="/home/tsinghuaee13/dataset"):
 7 |     origin_path = os.path.join(dataset_root, "origin")
 8 |     train_path = os.path.join(dataset_root, "train")
 9 |     test_path = os.path.join(dataset_root, "test")
10 | 
11 |     if not os.path.exists(train_path):
12 |         os.mkdir(train_path)
13 |         os.mkdir(os.path.join(train_path, "JPEGImages"))
14 | 
15 |     if not os.path.exists(test_path):
16 |         os.mkdir(test_path)
17 |         os.mkdir(os.path.join(test_path, "JPEGImages"))
18 |     # copy images
19 |     for (idx, img_name) in enumerate(os.listdir(os.path.join(origin_path, "JPEGImages"))):
20 |         if img_name[0:4] == "2007" or img_name[0:4] == "2008":
21 |             shutil.copy(os.path.join(origin_path, "JPEGImages", img_name),
22 |                         os.path.join(test_path, "JPEGImages", img_name))
23 |             print("image written, index%d" % idx, end='\r')
24 |         else:
25 |             shutil.copy(os.path.join(origin_path, "JPEGImages", img_name),
26 |                         os.path.join(train_path, "JPEGImages", img_name))
27 |             print("image written, index%d" % idx, end='\r')
28 |     # write annotations
29 |     with open(os.path.join(origin_path, "annotations.txt"), "r") as fin:
30 |         with open(os.path.join(test_path, "annotations.txt"), "w") as test_out:
31 |             with open(os.path.join(train_path, 'annotations.txt'), 'w') as train_out:
32 |                 for line in fin.readlines():
33 |                     if line[0:4] == "2007" or line[0:4] == "2008":
34 |                         test_out.write(line)
35 |                     else:
36 |                         train_out.write(line)
37 | 
38 | 
39 | parser = argparse.ArgumentParser(description="A tool used to generate train set and test set")
40 | parser.add_argument("datasetpath", default="/home/tsinghuaee13/dataset")
41 | args = parser.parse_args()
42 | 
43 | if __name__ == "__main__":
44 |     gen_dataset(args.datasetpath)
45 | 


--------------------------------------------------------------------------------
/threshold.py:
--------------------------------------------------------------------------------
 1 | “”“
 2 | This is added to the project for manually figure out the best threshold in predicting.
 3 | The file is not directly depended by any of this project, so I put it outside along with preprocess
 4 | ”“”
 5 | import torch
 6 | import os
 7 | import torchvision.transforms as transforms
 8 | import matplotlib.pyplot as plt
 9 | import numpy as np
10 | import argparse
11 | 
12 | from Utils import predict, eval_macc, MyDataLoader, eval_wacc, eval_map, eval_f1, load_model_from_file
13 | 
14 | if __name__ == '__main__':
15 |     normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
16 |                                      std=[0.229, 0.224, 0.225])
17 | 
18 |     val_transform = transforms.Compose([
19 |         transforms.Resize((224, 224)),
20 |         # transforms.RandomHorizontalFlip(),
21 |         transforms.ToTensor(),
22 |         normalize,
23 |     ])
24 |     testpath = "../dataset"
25 |     batch = 10
26 |     modelpath = "../checkpoints/resnet18_190515_2049_001.pth"
27 |     gpu = "0"
28 |     val_data = MyDataLoader(transform=val_transform, trainval='test', data_path=testpath,
29 |                             random_crops=0)
30 |     val_loader = torch.utils.data.DataLoader(dataset=val_data, batch_size=batch, shuffle=False, num_workers=4)
31 |     net = load_model_from_file(modelpath, "resnet18", True)
32 |     if gpu is not None:
33 |         net.cuda()
34 |         os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
35 |         os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu)
36 | 
37 |     pos = []
38 |     neg = []
39 |     net.eval()
40 |     for idx, (images, labels) in enumerate(val_loader):
41 |         images = images.view((-1, 3, 224, 224))
42 |         if gpu is not None:
43 |             images = images.cuda()
44 | 
45 |         outputs = net(images)
46 |         outputs = outputs.cpu().data
47 |         outputs = outputs.view((-1, 20))
48 | 
49 |         # outputs: shape [batchsize * num_classes]
50 |         for i, lbl in enumerate(labels):
51 |             # print(lbl, outputs)
52 |             if lbl[0] == 1:
53 |                 pos.append(outputs[i].numpy())
54 |             else:
55 |                 neg.append(outputs[i].numpy())
56 | 
57 |         print("Evaluating threshold, Batch_size: %d" % idx, end='\r')
58 | 
59 |     pos = np.array(pos)
60 |     neg = np.array(neg)
61 |     plt.hist(pos[:, 0])
62 |     plt.savefig("figure1.jpg")
63 |     plt.close()
64 |     plt.hist(neg[:, 0])
65 |     plt.savefig("figure2.jpg")
66 |     plt.close()
67 | 


--------------------------------------------------------------------------------
/src/Test.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torchvision.transforms as transforms
 3 | import argparse
 4 | 
 5 | from Utils import predict, eval_macc, MyDataLoader, eval_wacc, eval_map, eval_f1, load_model_from_file
 6 | 
 7 | parser = argparse.ArgumentParser(description='Predict a picture or evaluate the model on a test dataset')
 8 | parser.add_argument("modelpath", type=str, help="The model for prediction or evaluation")
 9 | parser.add_argument("--mode", type=str, default="evaluate",
10 |                     choices=["predict", "evaluate", "evalmacc", "evalwacc", "evalmap", "evalf1"],
11 |                     help="Whether to predict a single image or evaluate a model on a dataset")
12 | parser.add_argument("--testpath", type=str, required=True, help="The path to the test image or dataset")
13 | parser.add_argument("--gpu", type=int, default=None, help="Which gpu to use(leave it None for cpu)")
14 | parser.add_argument("--model", type=str, required=True, help="Which kind of the model is the one for test")
15 | parser.add_argument("--crops", type=int, default=0, help="How many crops while testing")
16 | parser.add_argument("--batch", type=int, default=8, help="Batch size while evaluating")
17 | args = parser.parse_args()
18 | 
19 | if __name__ == '__main__':
20 |     normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
21 |                                      std=[0.229, 0.224, 0.225])
22 |     if args.crops == 0:
23 |         val_transform = transforms.Compose([
24 |             transforms.Resize((224, 224)),
25 |             # transforms.RandomHorizontalFlip(),
26 |             transforms.ToTensor(),
27 |             normalize,
28 |         ])
29 |     else:
30 |         val_transform = transforms.Compose([
31 |             transforms.RandomResizedCrop((224, 224)),
32 |             # transforms.RandomHorizontalFlip(),
33 |             transforms.ToTensor(),
34 |             normalize,
35 |         ])
36 | 
37 |     if args.mode == "predict":
38 |         predict(val_transform, model_path=args.modelpath, img_path=args.testpath, model=args.model, gpu=args.gpu,
39 |                 crops=args.crops)
40 |     else:
41 |         val_data = MyDataLoader(transform=val_transform, trainval='test', data_path=args.testpath,
42 |                                 random_crops=args.crops)
43 |         val_loader = torch.utils.data.DataLoader(dataset=val_data, batch_size=args.batch, shuffle=False, num_workers=4)
44 |         if args.mode == "evalmacc":
45 |             eval_macc(val_loader, model_path=args.modelpath, model=args.model, gpu=args.gpu, crops=args.crops)
46 |         if args.mode == "evalwacc":
47 |             eval_wacc(val_loader, model_path=args.modelpath, model=args.model, gpu=args.gpu, crops=args.crops)
48 |         if args.mode == "evalmap":
49 |             eval_map(load_model_from_file(args.modelpath, model=args.model, load_fc=1), logger=None,
50 |                      val_loader=val_loader, steps=0, gpu=args.gpu, crops=args.crops)
51 |         if args.mode == "evalf1":
52 |             eval_f1(val_loader, model_path=args.modelpath, model=args.model, gpu=args.gpu, crops=args.crops)
53 |         if args.mode == "evaluate":
54 |             eval_macc(val_loader, model_path=args.modelpath, model=args.model, gpu=args.gpu, crops=args.crops)
55 |             eval_wacc(val_loader, model_path=args.modelpath, model=args.model, gpu=args.gpu, crops=args.crops)
56 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # MultiLabelClassification
  2 | This is a project for Media&amp;Recognition course.
  3 | 
  4 | Reference: https://github.com/bbrattoli/PascalClassificationPytorch
  5 | 
  6 | ## Preprocess
  7 | For training on VOC2012 with 09-12 pictures, and test on 07-08(our homework requirement actually), run:
  8 | ```bash
  9 | python ./gen_dataset.py {$YOUR_DATASET_PATH}
 10 | ```
 11 | Note: All the required dataset path is the root to the dataset path with required strcture.
 12 | Namely the structure should be like this:
 13 | 
 14 |     --dataset_root
 15 |      |--origin
 16 |      | |--JPEGImages(dir)
 17 |      | |--annotations.txt
 18 |      |
 19 |      |--train
 20 |      | |--JPEGImages(dir)
 21 |      | |--annotations.txt
 22 |      |
 23 |      |--test
 24 |        |--JPEGImages(dir)
 25 |        |--annotations.txt
 26 | 
 27 | And YOUR_DATASET_PATH is the dataset_root above.
 28 | 
 29 | 'origin' is the original dataset, which is PascalVOC2012 in our homework. It differs from standard VOC2012
 30 | dataset in that it only has one annotation file in it, with the format: $FILENAME(without suffix) label0 label1... each line.
 31 | 
 32 | If you want to train it on other datasets, perhaps you should do the preprocess yourself, and upon
 33 | training you just need to make sure you have correct file structure like above(origin is not needed)
 34 | 
 35 | ## Training Guide
 36 | 
 37 | ### Training from scratch
 38 | The working directory is src/, first enter the directory:
 39 | ```Bash
 40 | cd src
 41 | ```
 42 | Then run:
 43 | ```Bash
 44 | python ./Train.py\
 45 |     {$YOUR_DATASET_PATH}\
 46 |     --gpu={$YOUR_GPU_INDEX}
 47 | ```
 48 | This will automatically generate a checkpoint path, and then the .pth checkpoint file would be written there.
 49 | 
 50 | You can use --epochs, --batch, --lr, --model to appoint the parameters too:
 51 | ```Bash
 52 | python ./Train.py\
 53 |     {$YOUR_DATASET_PATH}\
 54 |     --gpu={$YOUR_GPU_INDEX}\
 55 |     --epochs={$MAX_TRAINING_EPOCH}\
 56 |     --batch={$BATCH_SIZE}\
 57 |     --lr={$LEARNING_RATE}\
 58 |     --model={$YOUR_MODEL}
 59 | ```
 60 | Notes: models can be chosen from resnet18, 34, 50, 101, 152
 61 | 
 62 | ### Finetuning(May need Internet)
 63 | Use the parameter '--finetune' would enable the usage of pytorch implemented pretrained model. 
 64 | Then the layers close to input would be frozen, and the fc layer will be modified for finetuning the model pretrained on ImageNet(1000 classes)<br>
 65 | For example:
 66 | ```Bash
 67 | python ./Train.py\
 68 |     ../dataset\
 69 |     --gpu=0\
 70 |     --finetune=1
 71 | ```
 72 | ### Restart
 73 | Use the parameter '--model' would help you continue your training after a stop. If you want to load the fc layer as well, add the parameter --fc
 74 | ```Bash
 75 | python ./Train.py\
 76 |     ../dataset\
 77 |     --gpu=0\
 78 |     --modelpath=../checkpoints/example.pth\
 79 |     --fc=1
 80 | ``` 
 81 | ## Evaluating Guide
 82 | Run the Test.py to evaluate your model on a dataset(output: mAcc wAcc):
 83 | ```Bash
 84 | python ./Test.py\
 85 |     {$YOUR_MODEL_PATH}\
 86 |     --model={$YOUR_MODEL}\
 87 |     --mode=evaluate\
 88 |     --testpath={$DATASET_ROOT}\
 89 |     --gpu=0
 90 | ```
 91 | mAP is not a criterion of our homework, so I did not add it to Test.py. However, there is a eval_map function in Utils.py
 92 | which has the same input and output with eval_macc. So you can add it manually if needed.
 93 | 
 94 | If you want to predict the labels of a single image, run:
 95 | ```bash
 96 | python ./Test.py\
 97 |     {$YOUR_MODEL_PATH}\
 98 |     --model={$YOUR_MODEL}\
 99 |     --mode=predict\
100 |     --testpath={$IMAGE_PATH}\
101 |     --gpu=0
102 | ```


--------------------------------------------------------------------------------
/reference/PascalLoader.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Fri Aug 18 11:58:07 2017
  4 | 
  5 | @author: Biagio Brattoli
  6 | """
  7 | import os, numpy as np
  8 | import torch
  9 | import torch.utils.data as data
 10 | from scipy.misc import imread, imresize
 11 | # from scipy.sparse import csr_matrix
 12 | from PIL import Image
 13 | import xml.etree.ElementTree as ET
 14 | 
 15 | class DataLoader(data.Dataset):
 16 |     def __init__(self,data_path,trainval,transform,random_crops=0):
 17 |         self.data_path = data_path
 18 |         self.transform = transform
 19 |         self.random_crops = random_crops
 20 |         self.trainval = trainval
 21 |         
 22 |         self.__init_classes()
 23 |         self.names, self.labels = self.__dataset_info()
 24 |     
 25 |     def __getitem__(self, index):
 26 |         x = imread(self.data_path+'/JPEGImages/'+self.names[index]+'.jpg',mode='RGB')
 27 |         x = Image.fromarray(x)
 28 |         
 29 |         scale = np.random.rand()*2+0.25
 30 |         w = int(x.size[0]*scale)
 31 |         h = int(x.size[1]*scale)
 32 |         if min(w,h)<227:
 33 |             scale = 227/min(w,h)
 34 |             w = int(x.size[0]*scale)
 35 |             h = int(x.size[1]*scale)
 36 |         
 37 |         #x = x.resize((w,h), Image.BILINEAR) # Random scale
 38 |         
 39 |         if self.random_crops==0:
 40 |             x = self.transform(x)
 41 |         else:
 42 |             crops = []
 43 |             for i in range(self.random_crops):
 44 |                 crops.append(self.transform(x))
 45 |             x = torch.stack(crops)
 46 |         
 47 |         y = self.labels[index]
 48 |         return x, y
 49 |     
 50 |     def __len__(self):
 51 |         return len(self.names)
 52 |     
 53 |     def __dataset_info(self):
 54 |         #annotation_files = os.listdir(self.data_path+'/Annotations')
 55 |         with open(self.data_path+'/ImageSets/Main/'+self.trainval+'.txt') as f:
 56 |             annotations = f.readlines()
 57 |         
 58 |         annotations = [n[:-1] for n in annotations]
 59 |         
 60 |         names  = []
 61 |         labels = []
 62 |         for af in annotations:
 63 |             if len(af)!=6:
 64 |                 continue
 65 |             filename = os.path.join(self.data_path,'Annotations',af)
 66 |             tree = ET.parse(filename+'.xml')
 67 |             objs = tree.findall('object')
 68 |             num_objs = len(objs)
 69 |             
 70 |             boxes = np.zeros((num_objs, 4), dtype=np.uint16)
 71 |             boxes_cl = np.zeros((num_objs), dtype=np.int32)
 72 |             
 73 |             for ix, obj in enumerate(objs):
 74 |                 bbox = obj.find('bndbox')
 75 |                 # Make pixel indexes 0-based
 76 |                 x1 = float(bbox.find('xmin').text) - 1
 77 |                 y1 = float(bbox.find('ymin').text) - 1
 78 |                 x2 = float(bbox.find('xmax').text) - 1
 79 |                 y2 = float(bbox.find('ymax').text) - 1
 80 |                 
 81 |                 cls = self.class_to_ind[obj.find('name').text.lower().strip()]
 82 |                 boxes[ix, :] = [x1, y1, x2, y2]
 83 |                 boxes_cl[ix] = cls
 84 |             
 85 |             lbl = np.zeros(self.num_classes)
 86 |             lbl[boxes_cl] = 1
 87 |             labels.append(lbl)
 88 |             names.append(af)
 89 |         
 90 |         return np.array(names), np.array(labels).astype(np.float32)
 91 |     
 92 |     def __init_classes(self):
 93 |         self.classes = ('__background__','aeroplane', 'bicycle', 'bird', 'boat',
 94 |                          'bottle', 'bus', 'car', 'cat', 'chair',
 95 |                          'cow', 'diningtable', 'dog', 'horse',
 96 |                          'motorbike', 'person', 'pottedplant',
 97 |                          'sheep', 'sofa', 'train', 'tvmonitor')
 98 |         self.num_classes  = len(self.classes)
 99 |         self.class_to_ind = dict(zip(self.classes, range(self.num_classes)))
100 |         
101 |         
102 | 


--------------------------------------------------------------------------------
/reference/PascalNetwork.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Wed Sep 13 15:57:01 2017
 4 | 
 5 | @author: bbrattol
 6 | """
 7 | import torch
 8 | import torch.nn as nn
 9 | 
10 | import sys
11 | sys.path.append('../Utils')
12 | from torch.nn import BatchNorm2d as BN
13 | from collections import OrderedDict
14 | 
15 | 
16 | class Network(nn.Module):
17 | 
18 |     def __init__(self, num_classes=21, groups=2):
19 |         super(Network, self).__init__()
20 |         self.conv = nn.Sequential()
21 |         self.fc6 = nn.Sequential()
22 | 
23 |     def vggnet(self, num_classes=21, groups=2):
24 |         self.conv.add_module('conv1_s1', nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0))
25 |         self.conv.add_module('relu1_s1', nn.ReLU(inplace=True))
26 |         # self.conv.add_module('bn1_s1',nn.BatchNorm2d(96))
27 |         self.conv.add_module('pool1_s1', nn.MaxPool2d(kernel_size=3, stride=2))
28 |         self.conv.add_module('lrn1_s1', BN(local_size=5, alpha=0.0001, beta=0.75))
29 | 
30 |         self.conv.add_module('conv2_s1', nn.Conv2d(96, 256, kernel_size=5, padding=2, groups=groups))
31 |         self.conv.add_module('relu2_s1', nn.ReLU(inplace=True))
32 |         # self.conv.add_module('bn2_s1',nn.BatchNorm2d(256))
33 |         self.conv.add_module('pool2_s1', nn.MaxPool2d(kernel_size=3, stride=2))
34 |         self.conv.add_module('lrn2_s1', BN(local_size=5, alpha=0.0001, beta=0.75))
35 | 
36 |         self.conv.add_module('conv3_s1', nn.Conv2d(256, 384, kernel_size=3, padding=1))
37 |         self.conv.add_module('relu3_s1', nn.ReLU(inplace=True))
38 |         # self.conv.add_module('bn3_s1',nn.BatchNorm2d(384))
39 | 
40 |         self.conv.add_module('conv4_s1', nn.Conv2d(384, 384, kernel_size=3, padding=1, groups=groups))
41 |         # self.conv.add_module('bn4_s1',nn.BatchNorm2d(384))
42 |         self.conv.add_module('relu4_s1', nn.ReLU(inplace=True))
43 | 
44 |         self.conv.add_module('conv5_s1', nn.Conv2d(384, 256, kernel_size=3, padding=1, groups=groups))
45 |         # self.conv.add_module('bn5_s1',nn.BatchNorm2d(256))
46 |         self.conv.add_module('relu5_s1', nn.ReLU(inplace=True))
47 |         self.conv.add_module('pool5_s1', nn.MaxPool2d(kernel_size=3, stride=2))
48 | 
49 |         self.fc6.add_module('fc6_s1', nn.Linear(256 * 6 * 6, 4096))
50 |         self.fc6.add_module('relu6_s1', nn.ReLU(inplace=True))
51 |         self.fc6.add_module('drop6_s1', nn.Dropout(p=0.5))
52 | 
53 |         self.fc7 = nn.Sequential()
54 |         self.fc7.add_module('fc7', nn.Linear(4096, 4096))
55 |         self.fc7.add_module('relu7', nn.ReLU(inplace=True))
56 |         self.fc7.add_module('drop7', nn.Dropout(p=0.5))
57 | 
58 |         self.classifier = nn.Sequential()
59 |         self.classifier.add_module('fc8', nn.Linear(4096, num_classes))
60 |     
61 |     def load(self, checkpoint, load_fc=False):
62 |         model_dict = self.state_dict()
63 |         layers = [k for k, v in model_dict.items()]
64 |         
65 |         pretrained_dict = torch.load(checkpoint)
66 |         keys = [k for k, v in pretrained_dict.items()]
67 |         keys.sort()
68 |         #keys = keys[2:-4] #load until conv5
69 |         
70 |         to_load = []
71 |         for k in keys:
72 |             if k not in model_dict:
73 |                 continue
74 | #            if 'conv5' in k or 'bn5' in k:
75 | #                continue
76 |             if 'conv' in k:
77 |                 to_load.append(k)
78 |             if 'fc' in k and load_fc:
79 |                 to_load.append(k)
80 |         
81 |         pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in to_load and k in model_dict}
82 |         model_dict.update(pretrained_dict)
83 |         self.load_state_dict(model_dict)
84 |     
85 |     def save(self, checkpointFold, epoch):
86 |         filename = '%s/jps_%03i.pth.tar'%(checkpointFold,epoch)
87 |         torch.save(self.state_dict(), filename)
88 |     
89 |     def forward(self, x):
90 |         b, c, h, w = x.size()
91 |         x = self.conv(x)
92 |         x = self.fc6(x.view(b, -1))
93 |         x = self.fc7(x)
94 |         x = self.classifier(x)
95 |         return x
96 |     
97 | 


--------------------------------------------------------------------------------
/reference/Loader.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Fri Aug 18 11:58:07 2017
  4 | 
  5 | @author: Biagio Brattoli
  6 | """
  7 | import os
  8 | import numpy as np
  9 | import torch
 10 | import torch.utils.data as data
 11 | from scipy.misc import imread, imresize
 12 | # from scipy.sparse import csr_matrix
 13 | from PIL import Image
 14 | # import xml.etree.ElementTree as ET
 15 | 
 16 | 
 17 | class MyDataLoader(data.Dataset):
 18 |     def __init__(self, transform, trainval='train', data_path='../VOC2012', random_crops=0):
 19 |         """
 20 |         Initialize the dataset.
 21 |         VOC(Labels only) tree:
 22 |         --root
 23 |          |--train
 24 |          | |--JPEGImages(dir)
 25 |          | |--annotations.txt
 26 |          |
 27 |          |--test
 28 |            |--JPEGImages(dir)
 29 |            |--annotations.txt
 30 |         :param transform: the transformation
 31 |         :param data_path: the root of the datapath
 32 |         :param random_crops:
 33 |         """
 34 |         self.data_path = data_path
 35 |         self.transform = transform
 36 |         self.random_crops = random_crops
 37 |         self.train_or_test = trainval
 38 | 
 39 |         self.__init_classes()
 40 |         self.names, self.labels = self.__dataset_info()
 41 | 
 42 |     def __getitem__(self, index):
 43 |         """
 44 |         This is the getitem func which enables the usage of [] operator
 45 |         :param index: the index of the picture
 46 |         :return: tuple (picture, its label(s))
 47 |         """
 48 |         x = imread(os.path.join(self.data_path, self.train_or_test, '/JPEGImages/', self.names[index] + '.jpg'),
 49 |                    mode='RGB')
 50 |         x = Image.fromarray(x)
 51 | 
 52 |         # Resize directly instead of the strange operations done below...
 53 |         x = x.resize(224, 224)
 54 | 
 55 |         # scale = np.random.rand() * 2 + 0.25
 56 |         # w = int(x.size[0] * scale)
 57 |         # h = int(x.size[1] * scale)
 58 |         # if min(w, h) < 227:
 59 |         #     scale = 227 / min(w, h)
 60 |         #     w = int(x.size[0] * scale)
 61 |         #     h = int(x.size[1] * scale)
 62 | 
 63 |         # x = x.resize((w,h), Image.BILINEAR) # Random scale
 64 | 
 65 |         if self.random_crops == 0:
 66 |             x = self.transform(x)
 67 |         else:
 68 |             crops = []
 69 |             for i in range(self.random_crops):
 70 |                 crops.append(self.transform(x))
 71 |             x = torch.stack(crops)
 72 | 
 73 |         y = self.labels[index]
 74 |         return x, y
 75 | 
 76 |     def __len__(self):
 77 |         return len(self.names)
 78 | 
 79 |     def __dataset_info(self):
 80 |         """
 81 |         Generate names(np.array, with string elements) and labels(np.array, with array(number) elements).
 82 |         :return: names labels
 83 |         """
 84 |         annotation_file = os.path.join(self.data_path, 'annotations.txt')
 85 |         with open(annotation_file, 'r') as fp:
 86 |             lines = fp.readlines()
 87 | 
 88 |         names = []
 89 |         labels = []
 90 |         for line in lines:
 91 |             names.append(line.split('\0')[0])
 92 |             labels.append(np.array(line.split('\0')[1:]))
 93 | 
 94 |         return np.array(names), np.array(labels).astype(np.float32)
 95 | 
 96 | 
 97 |         # annotation_files = os.listdir(self.data_path+'/Annotations')
 98 |         # with open(self.data_path + '/ImageSets/Main/' + self.trainval + '.txt') as f:
 99 |         #     annotations = f.readlines()
100 |         #
101 |         # annotations = [n[:-1] for n in annotations]
102 |         #
103 |         # names = []
104 |         # labels = []
105 |         # for af in annotations:
106 |         #     if len(af) != 6:
107 |         #         continue
108 |         #     filename = os.path.join(self.data_path, 'Annotations', af)
109 |         #     tree = ET.parse(filename + '.xml')
110 |         #     objs = tree.findall('object')
111 |         #     num_objs = len(objs)
112 |         #
113 |         #     boxes = np.zeros((num_objs, 4), dtype=np.uint16)
114 |         #     boxes_cl = np.zeros((num_objs), dtype=np.int32)
115 |         #
116 |         #     for ix, obj in enumerate(objs):
117 |         #         bbox = obj.find('bndbox')
118 |         #         # Make pixel indexes 0-based
119 |         #         x1 = float(bbox.find('xmin').text) - 1
120 |         #         y1 = float(bbox.find('ymin').text) - 1
121 |         #         x2 = float(bbox.find('xmax').text) - 1
122 |         #         y2 = float(bbox.find('ymax').text) - 1
123 |         #
124 |         #         cls = self.class_to_ind[obj.find('name').text.lower().strip()]
125 |         #         boxes[ix, :] = [x1, y1, x2, y2]
126 |         #         boxes_cl[ix] = cls
127 |         #
128 |         #     lbl = np.zeros(self.num_classes)
129 |         #     lbl[boxes_cl] = 1
130 |         #     labels.append(lbl)
131 |         #     names.append(af)
132 |         #
133 |         # return np.array(names), np.array(labels).astype(np.float32)
134 | 
135 | 
136 |     def __init_classes(self):
137 |         self.classes = ('__background__', 'aeroplane', 'bicycle', 'bird', 'boat',
138 |                         'bottle', 'bus', 'car', 'cat', 'chair',
139 |                         'cow', 'diningtable', 'dog', 'horse',
140 |                         'motorbike', 'person', 'pottedplant',
141 |                         'sheep', 'sofa', 'train', 'tvmonitor')
142 |         self.num_classes = len(self.classes)
143 |         self.class_to_ind = dict(zip(self.classes, range(self.num_classes)))


--------------------------------------------------------------------------------
/src/Train.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | import time
  4 | import numpy as np
  5 | import argparse
  6 | 
  7 | # sys.path.append('../Utils')
  8 | # TODO: Finish the class Logger(I don't actually know what it is used for)
  9 | 
 10 | import torch
 11 | import torchvision.transforms as transforms
 12 | 
 13 | CORES = 4  # int(float(multiprocessing.cpu_count())*0.25)
 14 | 
 15 | from Network import resnet18, resnet34, resnet50, resnet101, resnet152
 16 | from Utils import MyDataLoader, adjust_learning_rate, load_model_from_file, compute_mAP, eval_map
 17 | 
 18 | parser = argparse.ArgumentParser(description='Train network on Pascal VOC 2012')
 19 | parser.add_argument('pascal_path', type=str, help='Path to Pascal VOC 2012 folder')
 20 | parser.add_argument('--finetune', default=None, type=int, help='whether to use pytorch pretrained model and finetune')
 21 | parser.add_argument('--model', default='resnet18', type=str, help='which backbone network to use',
 22 |                     choices=['resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152'])
 23 | parser.add_argument('--modelpath', default=None, type=str, help='pretrained model path')
 24 | parser.add_argument('--fc', default=None, type=int, help='load fc6 and fc7 from model')
 25 | parser.add_argument('--gpu', default=None, type=int, help='gpu id')
 26 | parser.add_argument('--epochs', default=160, type=int, help='max training epochs')
 27 | parser.add_argument('--iter_start', default=0, type=int, help='Starting iteration count')
 28 | parser.add_argument('--batch', default=10, type=int, help='batch size')
 29 | parser.add_argument('--checkpoint', default='../checkpoints/', type=str, help='checkpoint folder')
 30 | parser.add_argument('--lr', default=0.001, type=float, help='learning rate for SGD optimizer')
 31 | parser.add_argument('--crops', default=10, type=int, help='number of random crops during testing')
 32 | args = parser.parse_args()
 33 | # args = parser.parse_args([
 34 | #    '../dataset',
 35 | #    '--gpu','0',
 36 | #    '--finetune','1'
 37 | #    '--model','resnet18'
 38 | # ])
 39 | prefix = time.strftime("%y%m%d_%H%M", time.localtime())
 40 | models = {
 41 |     'resnet18': resnet18,
 42 |     'resnet34': resnet34,
 43 |     'resnet50': resnet50,
 44 |     'resnet101': resnet101,
 45 |     'resnet152': resnet152
 46 | }
 47 | 
 48 | 
 49 | def main():
 50 |     # Training devices
 51 |     if args.gpu is not None:
 52 |         print('Using GPU %d' % args.gpu)
 53 |         os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
 54 |         os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
 55 |     else:
 56 |         print('CPU mode')
 57 | 
 58 |     normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
 59 |                                      std=[0.229, 0.224, 0.225])
 60 |     
 61 |     train_transform = transforms.Compose([
 62 |             transforms.RandomResizedCrop(224),
 63 |             transforms.RandomHorizontalFlip(),
 64 |             transforms.ToTensor(),
 65 |             normalize,
 66 |         ])
 67 |     
 68 |     val_transform = transforms.Compose([
 69 |             transforms.RandomResizedCrop(224),
 70 |             transforms.RandomHorizontalFlip(),
 71 |             transforms.ToTensor(),
 72 |             normalize,
 73 |         ])
 74 | 
 75 |     # Load the dataset. When training, disable the random crop.
 76 |     train_data = MyDataLoader(transform=train_transform, trainval='train', data_path=args.pascal_path, random_crops=0)
 77 |     train_loader = torch.utils.data.DataLoader(dataset=train_data,
 78 |                                                batch_size=args.batch, 
 79 |                                                shuffle=True,
 80 |                                                num_workers=CORES)
 81 | 
 82 |     val_data = MyDataLoader(transform=val_transform, trainval='test', data_path=args.pascal_path,
 83 |                             random_crops=args.crops)
 84 |     val_loader = torch.utils.data.DataLoader(dataset=val_data,
 85 |                                              batch_size=args.batch, 
 86 |                                              shuffle=False,
 87 |                                              num_workers=CORES)
 88 |     
 89 |     N = len(train_data.names)
 90 |     iter_per_epoch = int(N/args.batch)
 91 | 
 92 |     # Network initialize
 93 |     # finetune: freeze some layers and modify the fc layer.
 94 |     if args.finetune is not None:
 95 |         # Initialize the network
 96 |         net = models[args.model](pretrained=True)
 97 |         # Freeze conv layers
 98 |         for i, (name, param) in enumerate(net.named_parameters()):
 99 |             if 'conv' in name:
100 |                 param.requires_grad = False
101 |         # Modify the fc layer
102 |         in_channel = net.fc.in_features
103 |         net.fc = torch.nn.Linear(in_features=in_channel, out_features=20)
104 | 
105 |     elif args.modelpath is not None:
106 |         net = load_model_from_file(args.modelpath, model=args.model, load_fc=args.fc)
107 | 
108 |     else:
109 |         net = models[args.model](pretrained=False, num_classes=20)
110 | 
111 |     if args.gpu is not None:
112 |         net.cuda()
113 | 
114 |     criterion = torch.nn.MultiLabelSoftMarginLoss()
115 |     optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, net.parameters()),
116 |                                 lr=args.lr, momentum=0.9, weight_decay=0.0001)
117 |     
118 | 
119 |     ############## TRAINING ###############
120 |     print("Start training, lr: %f, batch-size: %d" % (args.lr, args.batch))
121 |     print("Model: " + args.model)
122 |     print("Checkpoint Path: "+args.checkpoint)
123 |     print("Time: "+prefix)
124 |     if args.modelpath is not None:
125 |         print("Training from past model: "+args.modelpath)
126 |     
127 |     # Train the Model
128 |     steps = args.iter_start
129 |     for epoch in range(iter_per_epoch*args.iter_start, args.epochs):
130 |         adjust_learning_rate(optimizer, epoch, init_lr=args.lr, step=20, decay=0.1)
131 |         
132 |         mAP = []
133 |         for i, (images, labels) in enumerate(train_loader):
134 |             if args.gpu is not None:
135 |                 images = images.cuda()
136 |                 labels = labels.cuda()
137 |             
138 |             # Forward + Backward + Optimize
139 |             optimizer.zero_grad()
140 |             outputs = net(images)
141 |             
142 |             mAP.append(compute_mAP(labels.data, outputs.data))
143 |             
144 |             loss = criterion(outputs, labels)
145 |             loss.backward()
146 |             optimizer.step()
147 |             loss = loss.cpu().data.numpy()
148 |             
149 |             if steps % 100 == 0:
150 |                 print('[%d/%d] %d), Loss: %.3f, mAP %.2f%%' % (epoch+1, args.epochs, steps, loss,100*np.mean(mAP[-20:])))
151 |             
152 |             steps += 1
153 |         
154 |         if epoch % 5 == 0:
155 |             filename = '%s/%s_%s_%03i.pth' % (args.checkpoint, args.model, prefix, epoch+1)
156 |             torch.save(net.state_dict(), filename)
157 |             print('Saved: '+args.checkpoint+"/"+filename)
158 | 
159 |             eval_map(net, None, val_loader, steps, args.gpu, args.crops)
160 |         
161 |         if os.path.exists(args.checkpoint+'/stop.txt'):
162 |             # break without using CTRL+C
163 |             break
164 | 
165 | 
166 | if __name__ == "__main__":
167 |     main()


--------------------------------------------------------------------------------
/src/Network.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.utils.model_zoo as model_zoo
  3 | 
  4 | 
  5 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
  6 |            'resnet152']
  7 | 
  8 | 
  9 | model_urls = {
 10 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 11 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 12 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 13 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 14 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 15 | }
 16 | 
 17 | 
 18 | def conv3x3(in_planes, out_planes, stride=1):
 19 |     """3x3 convolution with padding"""
 20 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 21 |                      padding=1, bias=False)
 22 | 
 23 | 
 24 | def conv1x1(in_planes, out_planes, stride=1):
 25 |     """1x1 convolution"""
 26 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 27 | 
 28 | 
 29 | class BasicBlock(nn.Module):
 30 |     expansion = 1
 31 | 
 32 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 33 |         super(BasicBlock, self).__init__()
 34 |         self.conv1 = conv3x3(inplanes, planes, stride)
 35 |         self.bn1 = nn.BatchNorm2d(planes)
 36 |         self.relu = nn.ReLU(inplace=True)
 37 |         self.conv2 = conv3x3(planes, planes)
 38 |         self.bn2 = nn.BatchNorm2d(planes)
 39 |         self.downsample = downsample
 40 |         self.stride = stride
 41 | 
 42 |     def forward(self, x):
 43 |         identity = x
 44 | 
 45 |         out = self.conv1(x)
 46 |         out = self.bn1(out)
 47 |         out = self.relu(out)
 48 | 
 49 |         out = self.conv2(out)
 50 |         out = self.bn2(out)
 51 | 
 52 |         if self.downsample is not None:
 53 |             identity = self.downsample(x)
 54 | 
 55 |         out += identity
 56 |         out = self.relu(out)
 57 | 
 58 |         return out
 59 | 
 60 | 
 61 | class Bottleneck(nn.Module):
 62 |     expansion = 4
 63 | 
 64 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 65 |         super(Bottleneck, self).__init__()
 66 |         self.conv1 = conv1x1(inplanes, planes)
 67 |         self.bn1 = nn.BatchNorm2d(planes)
 68 |         self.conv2 = conv3x3(planes, planes, stride)
 69 |         self.bn2 = nn.BatchNorm2d(planes)
 70 |         self.conv3 = conv1x1(planes, planes * self.expansion)
 71 |         self.bn3 = nn.BatchNorm2d(planes * self.expansion)
 72 |         self.relu = nn.ReLU(inplace=True)
 73 |         self.downsample = downsample
 74 |         self.stride = stride
 75 | 
 76 |     def forward(self, x):
 77 |         identity = x
 78 | 
 79 |         out = self.conv1(x)
 80 |         out = self.bn1(out)
 81 |         out = self.relu(out)
 82 | 
 83 |         out = self.conv2(out)
 84 |         out = self.bn2(out)
 85 |         out = self.relu(out)
 86 | 
 87 |         out = self.conv3(out)
 88 |         out = self.bn3(out)
 89 | 
 90 |         if self.downsample is not None:
 91 |             identity = self.downsample(x)
 92 | 
 93 |         out += identity
 94 |         out = self.relu(out)
 95 | 
 96 |         return out
 97 | 
 98 | 
 99 | class ResNet(nn.Module):
100 | 
101 |     def __init__(self, block, layers, num_classes=1000, zero_init_residual=False):
102 |         super(ResNet, self).__init__()
103 |         self.inplanes = 64
104 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
105 |                                bias=False)
106 |         self.bn1 = nn.BatchNorm2d(64)
107 |         self.relu = nn.ReLU(inplace=True)
108 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
109 |         self.layer1 = self._make_layer(block, 64, layers[0])
110 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
111 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
112 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
113 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
114 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
115 | 
116 |         for m in self.modules():
117 |             if isinstance(m, nn.Conv2d):
118 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
119 |             elif isinstance(m, nn.BatchNorm2d):
120 |                 nn.init.constant_(m.weight, 1)
121 |                 nn.init.constant_(m.bias, 0)
122 | 
123 |         # Zero-initialize the last BN in each residual branch,
124 |         # so that the residual branch starts with zeros, and each residual block behaves like an identity.
125 |         # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
126 |         if zero_init_residual:
127 |             for m in self.modules():
128 |                 if isinstance(m, Bottleneck):
129 |                     nn.init.constant_(m.bn3.weight, 0)
130 |                 elif isinstance(m, BasicBlock):
131 |                     nn.init.constant_(m.bn2.weight, 0)
132 | 
133 |     def _make_layer(self, block, planes, blocks, stride=1):
134 |         downsample = None
135 |         if stride != 1 or self.inplanes != planes * block.expansion:
136 |             downsample = nn.Sequential(
137 |                 conv1x1(self.inplanes, planes * block.expansion, stride),
138 |                 nn.BatchNorm2d(planes * block.expansion),
139 |             )
140 | 
141 |         layers = []
142 |         layers.append(block(self.inplanes, planes, stride, downsample))
143 |         self.inplanes = planes * block.expansion
144 |         for _ in range(1, blocks):
145 |             layers.append(block(self.inplanes, planes))
146 | 
147 |         return nn.Sequential(*layers)
148 | 
149 |     def forward(self, x):
150 |         x = self.conv1(x)
151 |         x = self.bn1(x)
152 |         x = self.relu(x)
153 |         x = self.maxpool(x)
154 | 
155 |         x = self.layer1(x)
156 |         x = self.layer2(x)
157 |         x = self.layer3(x)
158 |         x = self.layer4(x)
159 | 
160 |         x = self.avgpool(x)
161 |         x = x.view(x.size(0), -1)
162 |         x = self.fc(x)
163 | 
164 |         return x
165 | 
166 | 
167 | def resnet18(pretrained=False, **kwargs):
168 |     """Constructs a ResNet-18 model.
169 | 
170 |     Args:
171 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
172 |     """
173 |     model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
174 |     if pretrained:
175 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
176 |     return model
177 | 
178 | 
179 | def resnet34(pretrained=False, **kwargs):
180 |     """Constructs a ResNet-34 model.
181 | 
182 |     Args:
183 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
184 |     """
185 |     model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
186 |     if pretrained:
187 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
188 |     return model
189 | 
190 | 
191 | def resnet50(pretrained=False, **kwargs):
192 |     """Constructs a ResNet-50 model.
193 | 
194 |     Args:
195 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
196 |     """
197 |     model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
198 |     if pretrained:
199 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
200 |     return model
201 | 
202 | 
203 | def resnet101(pretrained=False, **kwargs):
204 |     """Constructs a ResNet-101 model.
205 | 
206 |     Args:
207 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
208 |     """
209 |     model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
210 |     if pretrained:
211 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
212 |     return model
213 | 
214 | 
215 | def resnet152(pretrained=False, **kwargs):
216 |     """Constructs a ResNet-152 model.
217 | 
218 |     Args:
219 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
220 |     """
221 |     model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
222 |     if pretrained:
223 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
224 |     return model
225 | 


--------------------------------------------------------------------------------
/src/Utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import torch
  4 | import torch.utils.data as data
  5 | from scipy.misc import imread
  6 | from PIL import Image
  7 | from Network import *
  8 | from sklearn.metrics import average_precision_score
  9 | 
 10 | models = {
 11 |     'resnet18': resnet18,
 12 |     'resnet34': resnet34,
 13 |     'resnet50': resnet50,
 14 |     'resnet101': resnet101,
 15 |     'resnet152': resnet152
 16 | }
 17 | 
 18 | 
 19 | def compute_mAP(labels, outputs):
 20 |     y_true = labels.cpu().numpy()
 21 |     y_pred = outputs.cpu().numpy()
 22 |     AP = []
 23 |     for i in range(y_true.shape[0]):
 24 |         AP.append(average_precision_score(y_true[i], y_pred[i]))
 25 |     return np.mean(AP)
 26 | 
 27 | 
 28 | def eval_map(net, logger, val_loader, steps, gpu, crops):
 29 |     if gpu is not None:
 30 |         net.cuda()
 31 |         os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
 32 |         os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu)
 33 |     mAP = []
 34 |     net.eval()
 35 |     for i, (images, labels) in enumerate(val_loader):
 36 |         images = images.view((-1, 3, 224, 224))
 37 |         if gpu is not None:
 38 |             images = images.cuda()
 39 | 
 40 |         # Forward + Backward + Optimize
 41 |         outputs = net(images)
 42 |         outputs = outputs.cpu().data
 43 |         if crops != 0:
 44 |             outputs = outputs.view((-1, crops, 20))
 45 |             outputs = outputs.mean(dim=1).view((-1, 20))
 46 |         else:
 47 |             outputs = outputs.view((-1, 20))
 48 | 
 49 |         # score = tnt.meter.mAPMeter(outputs, labels)
 50 |         mAP.append(compute_mAP(labels, outputs))
 51 | 
 52 |     if logger is not None:
 53 |         logger.scalar_summary('mAP', np.mean(mAP), steps)
 54 |     print('TESTING: %d), mAP %.2f%%' % (steps, 100 * np.mean(mAP)))
 55 |     net.train()
 56 | 
 57 | 
 58 | def eval_macc(val_loader, model_path="../checkpoints/resnet18_190515_2049_001.pth",
 59 |               model="resnet18", gpu=None, crops=0):
 60 |     """
 61 |     Evaluate a model on a dataset, using mAcc as index
 62 |     :param val_loader: the dataloader(torch.utils.dataloader) object
 63 |     :param model_path: the path to the model
 64 |     :param model: which kind is the model
 65 |     :param gpu: which gpu to use
 66 |     :param crops: how many random crops
 67 |     :return: mAcc on the dataset
 68 |     """
 69 |     net = load_model_from_file(model_path, model=model, load_fc=True)
 70 |     if gpu is not None:
 71 |         net.cuda()
 72 |         os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
 73 |         os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu)
 74 | 
 75 |     acc = []
 76 |     net.eval()
 77 |     for idx, (images, labels) in enumerate(val_loader):
 78 |         images = images.view((-1, 3, 224, 224))
 79 |         if gpu is not None:
 80 |             images = images.cuda()
 81 | 
 82 |         outputs = net(images)
 83 |         outputs = outputs.cpu().data
 84 |         if crops != 0:
 85 |             outputs = outputs.view((-1, crops, 20))
 86 |             outputs = outputs.max(dim=1)[0].view((-1, 20))
 87 |         else:
 88 |             outputs = outputs.view((-1, 20))
 89 | 
 90 |         # outputs: shape [batchsize * num_classes]
 91 |         outputs = (outputs > 0)
 92 |         acc.append(np.sum((outputs.numpy() == labels.numpy()).astype(float)) / (val_loader.batch_size * 20))
 93 | 
 94 |         print("Evaluating mAcc, Batch_size: %d" % idx, end='\r')
 95 | 
 96 |     macc = sum(acc) / len(acc)
 97 |     print("\nFinal mAcc: %f" % macc)
 98 |     return macc
 99 | 
100 | 
101 | def eval_wacc(val_loader, model_path="../checkpoints/resnet18_190515_2049_001.pth",
102 |               model="resnet18", gpu=None, crops=0):
103 |     """
104 |     Evaluate a model on a dataset, using wAcc as index
105 |     :param val_loader: the dataloader(torch.utils.dataloader) object
106 |     :param model_path: the path to the model
107 |     :param model: which kind is the model
108 |     :param gpu: which gpu to use
109 |     :param crops: how many random crops
110 |     :return: mAcc on the dataset
111 |     """
112 |     net = load_model_from_file(model_path, model=model, load_fc=True)
113 |     if gpu is not None:
114 |         net.cuda()
115 |         os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
116 |         os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu)
117 | 
118 |     acc = np.zeros(20)
119 |     net.eval()
120 |     freq = np.zeros(20)
121 | 
122 |     for idx, (images, labels) in enumerate(val_loader):
123 |         # Frequency of the labels
124 |         freq += np.sum(labels.numpy(), axis=0)
125 |         images = images.view((-1, 3, 224, 224))
126 | 
127 |         if gpu is not None:
128 |             images = images.cuda()
129 |         outputs = net(images)
130 |         outputs = outputs.cpu().data
131 |         if crops != 0:
132 |             outputs = outputs.view((-1, crops, 20))
133 |             outputs = outputs.max(dim=1)[0].view((-1, 20))
134 |         else:
135 |             outputs = outputs.view((-1, 20))
136 |         outputs = (outputs > 0)
137 |         acc += np.sum((outputs.numpy() == labels.numpy()), axis=0).astype(float)
138 | 
139 |         print("Evaluating wAcc, Batch_size: %d" % idx, end="\r")
140 | 
141 |     freq = freq / np.sum(freq)
142 |     acc = acc / len(val_loader.dataset)
143 | 
144 |     wacc = np.dot(freq, acc)
145 |     print("\nFinal wAcc: %f" % wacc)
146 |     return wacc
147 | 
148 | 
149 | def eval_f1(val_loader, model_path="../checkpoints/resnet18_190515_2049_001.pth",
150 |               model="resnet18", gpu=None, crops=0):
151 |     """
152 |     Evaluate a model on a dataset, using f1 as index
153 |     :param val_loader: the dataloader(torch.utils.dataloader) object
154 |     :param model_path: the path to the model
155 |     :param model: which kind is the model
156 |     :param gpu: which gpu to use
157 |     :param crops: how many random crops
158 |     :return: f1 score on the dataset
159 |     """
160 |     net = load_model_from_file(model_path, model=model, load_fc=True)
161 |     if gpu is not None:
162 |         net.cuda()
163 |         os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
164 |         os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu)
165 | 
166 |     f1 = []
167 |     precision = []
168 |     recall = []
169 |     net.eval()
170 |     for idx, (images, labels) in enumerate(val_loader):
171 |         images = images.view((-1, 3, 224, 224))
172 |         if gpu is not None:
173 |             images = images.cuda()
174 | 
175 |         outputs = net(images)
176 |         outputs = outputs.cpu().data
177 |         if crops != 0:
178 |             outputs = outputs.view((-1, crops, 20))
179 |             outputs = outputs.max(dim=1)[0].view((-1, 20))
180 |         else:
181 |             outputs = outputs.view((-1, 20))
182 | 
183 |         # outputs: shape [batchsize * num_classes]
184 |         outputs = (outputs > 0)
185 |         TP = np.sum((outputs.numpy() == 1) & (labels.numpy() == 1))
186 |         # TN = np.sum((outputs.numpy() == 0) & (labels.numpy() == 0))
187 |         FN = np.sum((outputs.numpy() == 0) & (labels.numpy() == 1))
188 |         FP = np.sum((outputs.numpy() == 1) & (labels.numpy() == 0))
189 |         precision.append(TP/(TP+FP))
190 |         recall.append(TP/(TP+FN))
191 |         f1.append((2*precision[-1]*recall[-1])/(precision[-1]+recall[-1]))
192 | 
193 |         print("Evaluating f1, Batch_size: %d" % idx, end='\r')
194 | 
195 |     mf1 = sum(f1) / len(f1)
196 |     mprecision = sum(precision)/len(precision)
197 |     mrecall = sum(recall)/len(recall)
198 |     print("\nFinal f1-score: %f" % mf1)
199 |     print("precision: %f" % mprecision)
200 |     print("recall: %f" % mrecall)
201 |     return mf1
202 | 
203 | 
204 | def predict(transform, model_path='../checkpoints/190513.2359_011_0.917.pth', img_path='../test.jpg', model="resnet18",
205 |             crops=0, gpu=None):
206 |     """
207 |     Predict a image with the model
208 |     :param transform: the torchvision.transforms object. Proper transform may help prediction
209 |     :param model_path: the path to the model
210 |     :param img_path: the path to the image
211 |     :param model: the species of the model
212 |     :param gpu: which gpu to use
213 |     :return: None. The result will be show on the screen directly
214 |     """
215 |     net = load_model_from_file(model_path, model, True)
216 |     if gpu is not None:
217 |         net.cuda()
218 |         os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
219 |         os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu)
220 | 
221 |     net.eval()
222 |     img = imread(img_path, mode='RGB')
223 |     img = Image.fromarray(img)
224 |     if crops == 0:
225 |         img = transform(img)
226 |     else:
227 |         img_crop = []
228 |         for i in range(0, crops):
229 |             img_crop.append(transform(img))
230 |         img = torch.stack(img_crop)
231 |     img = img.view((-1, 3, 224, 224))
232 |     if gpu is not None:
233 |         img = img.cuda()
234 | 
235 |     outputs = net(img)
236 |     outputs = outputs.cpu().data
237 |     if crops != 0:
238 |         outputs = outputs.view((-1, crops, 20))
239 |         outputs = outputs.max(dim=1)[0].view((-1, 20))
240 |     else:
241 |         outputs = outputs.view((-1, 20))
242 |     print("output tensor:", outputs)
243 |     print("Results:", (outputs > 0) * 1)
244 |     Categories = np.array(['person', 'bird', 'cat', 'cow',
245 |                            'dog', 'horse', 'sheep', 'aeroplane', 'bicycle',
246 |                            'boat', 'bus', 'car', 'motorbike',
247 |                            'train', 'bottle', 'chair',
248 |                            'diningtable', 'pottedplant', 'sofa', 'tvmonitor'])
249 |     print("Categories:", Categories[np.where(outputs[0].numpy() > 0)])
250 | 
251 | 
252 | def adjust_learning_rate(optimizer, epoch, init_lr, step=80, decay=0.1):
253 |     """
254 |     This function adjust the learning rate automatically during training.
255 |     https://www.pytorchtutorial.com/pytorch-learning-rate-decay/
256 |     :param optimizer: the optimizer
257 |     :param epoch: current epoch
258 |     :param init_lr: initial learning rate
259 |     :param step: literally
260 |     :param decay: literally
261 |     :return: Nothing
262 |     """
263 |     lr = init_lr * (decay ** (epoch // step))
264 |     for param_group in optimizer.param_groups:
265 |         param_group['lr'] = lr
266 | 
267 | 
268 | def load_model_from_file(filepath, model="resnet18", load_fc=None):
269 |     """
270 |     Load the trained model from .pth file. Only for the same model trained before
271 |     :param filepath: the path to .pth file
272 |     :param model: the backbone network
273 |     :param load_fc: whether to load fc layer
274 |     :return: loaded model
275 |     """
276 |     # Get the initial network
277 |     dict_init = torch.load(filepath)
278 |     keys = [k for k, v in dict_init.items()]
279 |     keys.sort()
280 |     # Generate a new network
281 |     net = models[model](pretrained=False, num_classes=20)
282 |     model_dict = net.state_dict()
283 |     # load the layers
284 |     to_load = []
285 |     for k in keys:
286 |         if k not in model_dict:
287 |             continue
288 |         if load_fc is not None or 'fc' not in k:
289 |             to_load.append(k)
290 |     # load the dict
291 |     dict_init = {k: v for k, v in dict_init.items() if k in to_load and k in model_dict}
292 |     model_dict.update(dict_init)
293 |     net.load_state_dict(model_dict)
294 | 
295 |     return net
296 | 
297 | 
298 | class Logger:
299 |     def __init__(self, path):
300 |         self.path = path
301 | 
302 |     def scalar_summary(self, name, value, steps):
303 |         self.__dict__[name] = (steps, value)
304 | 
305 | 
306 | class MyDataLoader(data.Dataset):
307 |     def __init__(self, transform, trainval='train', data_path='../dataset', random_crops=0):
308 |         """
309 |         Initialize the dataset. Inherited from torch.data.Dataset, __len__ and __getitem__ need to be implemented.
310 |         VOC(Labels only) tree:
311 |         --dataset root
312 |          |--train
313 |          | |--JPEGImages(dir)
314 |          | |--annotations.txt
315 |          |
316 |          |--test
317 |            |--JPEGImages(dir)
318 |            |--annotations.txt
319 |         :param transform: the transformation
320 |         :param data_path: the root of the datapath
321 |         :param random_crops:
322 |         """
323 |         self.data_path = data_path
324 |         self.transform = transform
325 |         self.random_crops = random_crops
326 |         self.train_or_test = trainval
327 | 
328 |         self.__init_classes()
329 |         self.names, self.labels = self.__dataset_info()
330 | 
331 |     def __getitem__(self, index):
332 |         """
333 |         This is the getitem func which enables enumerator. Implemented.
334 |         :param index: the index of the picture
335 |         :return: tuple (picture, its label(s))
336 |         """
337 |         x = imread(os.path.join(self.data_path, self.train_or_test, 'JPEGImages', self.names[index] + '.jpg'),
338 |                    mode='RGB')
339 |         x = Image.fromarray(x)
340 | 
341 |         # Resize directly
342 |         x = x.resize((224, 224), Image.BILINEAR)
343 | 
344 |         if self.random_crops == 0:
345 |             x = self.transform(x)
346 |         else:
347 |             crops = []
348 |             for i in range(self.random_crops):
349 |                 crops.append(self.transform(x))
350 |             x = torch.stack(crops)
351 | 
352 |         y = self.labels[index]
353 |         return x, y
354 | 
355 |     def __len__(self):
356 |         """
357 |         How many images are there. Implemented.
358 |         :return: length
359 |         """
360 |         return len(self.names)
361 | 
362 |     def __dataset_info(self):
363 |         """
364 |         Generate names(np.array, with string elements) and labels(np.array, with array(number) elements).
365 |         The labels appears like this: [0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1 0 0 0 0]
366 |         Those with value 1 means the object exists in this image
367 |         :return: names labels
368 |         """
369 |         annotation_file = os.path.join(self.data_path, self.train_or_test, 'annotations.txt')
370 |         with open(annotation_file, 'r') as fp:
371 |             lines = fp.readlines()
372 | 
373 |         names = []
374 |         labels = []
375 |         for line in lines:
376 |             # Name
377 |             names.append(line.strip('\n').split(' ')[0])
378 | 
379 |             # Label
380 |             str_label = line.strip('\n').split(' ')[1:]
381 |             num_label = [int(x) for x in str_label]
382 |             flag_label = np.zeros(self.num_classes)
383 |             flag_label[num_label] = 1
384 | 
385 |             labels.append(np.array(flag_label))
386 | 
387 |         return np.array(names), np.array(labels).astype(np.float32)
388 | 
389 |     def __init_classes(self):
390 |         self.classes = ('person', 'bird', 'cat', 'cow',
391 |                         'dog', 'horse', 'sheep', 'aeroplane', 'bicycle',
392 |                         'boat', 'bus', 'car', 'motorbike',
393 |                         'train', 'bottle', 'chair',
394 |                         'diningtable', 'pottedplant', 'sofa', 'tvmonitor')
395 |         self.num_classes = len(self.classes)
396 |         self.class_to_ind = dict(zip(self.classes, range(self.num_classes)))
397 | 


--------------------------------------------------------------------------------