├── .gitignore ├── gen_dataset.py ├── threshold.py ├── src ├── Test.py ├── Train.py ├── Network.py └── Utils.py ├── README.md └── reference ├── PascalLoader.py ├── PascalNetwork.py └── Loader.py /.gitignore: -------------------------------------------------------------------------------- 1 | */**/*checkpoint* 2 | .idea 3 | __pycache__ 4 | dataset/ 5 | checkpoints/ 6 | *.jpg 7 | *.out -------------------------------------------------------------------------------- /gen_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import argparse 4 | 5 | 6 | def gen_dataset(dataset_root="/home/tsinghuaee13/dataset"): 7 | origin_path = os.path.join(dataset_root, "origin") 8 | train_path = os.path.join(dataset_root, "train") 9 | test_path = os.path.join(dataset_root, "test") 10 | 11 | if not os.path.exists(train_path): 12 | os.mkdir(train_path) 13 | os.mkdir(os.path.join(train_path, "JPEGImages")) 14 | 15 | if not os.path.exists(test_path): 16 | os.mkdir(test_path) 17 | os.mkdir(os.path.join(test_path, "JPEGImages")) 18 | # copy images 19 | for (idx, img_name) in enumerate(os.listdir(os.path.join(origin_path, "JPEGImages"))): 20 | if img_name[0:4] == "2007" or img_name[0:4] == "2008": 21 | shutil.copy(os.path.join(origin_path, "JPEGImages", img_name), 22 | os.path.join(test_path, "JPEGImages", img_name)) 23 | print("image written, index%d" % idx, end='\r') 24 | else: 25 | shutil.copy(os.path.join(origin_path, "JPEGImages", img_name), 26 | os.path.join(train_path, "JPEGImages", img_name)) 27 | print("image written, index%d" % idx, end='\r') 28 | # write annotations 29 | with open(os.path.join(origin_path, "annotations.txt"), "r") as fin: 30 | with open(os.path.join(test_path, "annotations.txt"), "w") as test_out: 31 | with open(os.path.join(train_path, 'annotations.txt'), 'w') as train_out: 32 | for line in fin.readlines(): 33 | if line[0:4] == "2007" or line[0:4] == "2008": 34 | test_out.write(line) 35 | else: 36 | train_out.write(line) 37 | 38 | 39 | parser = argparse.ArgumentParser(description="A tool used to generate train set and test set") 40 | parser.add_argument("datasetpath", default="/home/tsinghuaee13/dataset") 41 | args = parser.parse_args() 42 | 43 | if __name__ == "__main__": 44 | gen_dataset(args.datasetpath) 45 | -------------------------------------------------------------------------------- /threshold.py: -------------------------------------------------------------------------------- 1 | “”“ 2 | This is added to the project for manually figure out the best threshold in predicting. 3 | The file is not directly depended by any of this project, so I put it outside along with preprocess 4 | ”“” 5 | import torch 6 | import os 7 | import torchvision.transforms as transforms 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | import argparse 11 | 12 | from Utils import predict, eval_macc, MyDataLoader, eval_wacc, eval_map, eval_f1, load_model_from_file 13 | 14 | if __name__ == '__main__': 15 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 16 | std=[0.229, 0.224, 0.225]) 17 | 18 | val_transform = transforms.Compose([ 19 | transforms.Resize((224, 224)), 20 | # transforms.RandomHorizontalFlip(), 21 | transforms.ToTensor(), 22 | normalize, 23 | ]) 24 | testpath = "../dataset" 25 | batch = 10 26 | modelpath = "../checkpoints/resnet18_190515_2049_001.pth" 27 | gpu = "0" 28 | val_data = MyDataLoader(transform=val_transform, trainval='test', data_path=testpath, 29 | random_crops=0) 30 | val_loader = torch.utils.data.DataLoader(dataset=val_data, batch_size=batch, shuffle=False, num_workers=4) 31 | net = load_model_from_file(modelpath, "resnet18", True) 32 | if gpu is not None: 33 | net.cuda() 34 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 35 | os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu) 36 | 37 | pos = [] 38 | neg = [] 39 | net.eval() 40 | for idx, (images, labels) in enumerate(val_loader): 41 | images = images.view((-1, 3, 224, 224)) 42 | if gpu is not None: 43 | images = images.cuda() 44 | 45 | outputs = net(images) 46 | outputs = outputs.cpu().data 47 | outputs = outputs.view((-1, 20)) 48 | 49 | # outputs: shape [batchsize * num_classes] 50 | for i, lbl in enumerate(labels): 51 | # print(lbl, outputs) 52 | if lbl[0] == 1: 53 | pos.append(outputs[i].numpy()) 54 | else: 55 | neg.append(outputs[i].numpy()) 56 | 57 | print("Evaluating threshold, Batch_size: %d" % idx, end='\r') 58 | 59 | pos = np.array(pos) 60 | neg = np.array(neg) 61 | plt.hist(pos[:, 0]) 62 | plt.savefig("figure1.jpg") 63 | plt.close() 64 | plt.hist(neg[:, 0]) 65 | plt.savefig("figure2.jpg") 66 | plt.close() 67 | -------------------------------------------------------------------------------- /src/Test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision.transforms as transforms 3 | import argparse 4 | 5 | from Utils import predict, eval_macc, MyDataLoader, eval_wacc, eval_map, eval_f1, load_model_from_file 6 | 7 | parser = argparse.ArgumentParser(description='Predict a picture or evaluate the model on a test dataset') 8 | parser.add_argument("modelpath", type=str, help="The model for prediction or evaluation") 9 | parser.add_argument("--mode", type=str, default="evaluate", 10 | choices=["predict", "evaluate", "evalmacc", "evalwacc", "evalmap", "evalf1"], 11 | help="Whether to predict a single image or evaluate a model on a dataset") 12 | parser.add_argument("--testpath", type=str, required=True, help="The path to the test image or dataset") 13 | parser.add_argument("--gpu", type=int, default=None, help="Which gpu to use(leave it None for cpu)") 14 | parser.add_argument("--model", type=str, required=True, help="Which kind of the model is the one for test") 15 | parser.add_argument("--crops", type=int, default=0, help="How many crops while testing") 16 | parser.add_argument("--batch", type=int, default=8, help="Batch size while evaluating") 17 | args = parser.parse_args() 18 | 19 | if __name__ == '__main__': 20 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 21 | std=[0.229, 0.224, 0.225]) 22 | if args.crops == 0: 23 | val_transform = transforms.Compose([ 24 | transforms.Resize((224, 224)), 25 | # transforms.RandomHorizontalFlip(), 26 | transforms.ToTensor(), 27 | normalize, 28 | ]) 29 | else: 30 | val_transform = transforms.Compose([ 31 | transforms.RandomResizedCrop((224, 224)), 32 | # transforms.RandomHorizontalFlip(), 33 | transforms.ToTensor(), 34 | normalize, 35 | ]) 36 | 37 | if args.mode == "predict": 38 | predict(val_transform, model_path=args.modelpath, img_path=args.testpath, model=args.model, gpu=args.gpu, 39 | crops=args.crops) 40 | else: 41 | val_data = MyDataLoader(transform=val_transform, trainval='test', data_path=args.testpath, 42 | random_crops=args.crops) 43 | val_loader = torch.utils.data.DataLoader(dataset=val_data, batch_size=args.batch, shuffle=False, num_workers=4) 44 | if args.mode == "evalmacc": 45 | eval_macc(val_loader, model_path=args.modelpath, model=args.model, gpu=args.gpu, crops=args.crops) 46 | if args.mode == "evalwacc": 47 | eval_wacc(val_loader, model_path=args.modelpath, model=args.model, gpu=args.gpu, crops=args.crops) 48 | if args.mode == "evalmap": 49 | eval_map(load_model_from_file(args.modelpath, model=args.model, load_fc=1), logger=None, 50 | val_loader=val_loader, steps=0, gpu=args.gpu, crops=args.crops) 51 | if args.mode == "evalf1": 52 | eval_f1(val_loader, model_path=args.modelpath, model=args.model, gpu=args.gpu, crops=args.crops) 53 | if args.mode == "evaluate": 54 | eval_macc(val_loader, model_path=args.modelpath, model=args.model, gpu=args.gpu, crops=args.crops) 55 | eval_wacc(val_loader, model_path=args.modelpath, model=args.model, gpu=args.gpu, crops=args.crops) 56 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MultiLabelClassification 2 | This is a project for Media&Recognition course. 3 | 4 | Reference: https://github.com/bbrattoli/PascalClassificationPytorch 5 | 6 | ## Preprocess 7 | For training on VOC2012 with 09-12 pictures, and test on 07-08(our homework requirement actually), run: 8 | ```bash 9 | python ./gen_dataset.py {$YOUR_DATASET_PATH} 10 | ``` 11 | Note: All the required dataset path is the root to the dataset path with required strcture. 12 | Namely the structure should be like this: 13 | 14 | --dataset_root 15 | |--origin 16 | | |--JPEGImages(dir) 17 | | |--annotations.txt 18 | | 19 | |--train 20 | | |--JPEGImages(dir) 21 | | |--annotations.txt 22 | | 23 | |--test 24 | |--JPEGImages(dir) 25 | |--annotations.txt 26 | 27 | And YOUR_DATASET_PATH is the dataset_root above. 28 | 29 | 'origin' is the original dataset, which is PascalVOC2012 in our homework. It differs from standard VOC2012 30 | dataset in that it only has one annotation file in it, with the format: $FILENAME(without suffix) label0 label1... each line. 31 | 32 | If you want to train it on other datasets, perhaps you should do the preprocess yourself, and upon 33 | training you just need to make sure you have correct file structure like above(origin is not needed) 34 | 35 | ## Training Guide 36 | 37 | ### Training from scratch 38 | The working directory is src/, first enter the directory: 39 | ```Bash 40 | cd src 41 | ``` 42 | Then run: 43 | ```Bash 44 | python ./Train.py\ 45 | {$YOUR_DATASET_PATH}\ 46 | --gpu={$YOUR_GPU_INDEX} 47 | ``` 48 | This will automatically generate a checkpoint path, and then the .pth checkpoint file would be written there. 49 | 50 | You can use --epochs, --batch, --lr, --model to appoint the parameters too: 51 | ```Bash 52 | python ./Train.py\ 53 | {$YOUR_DATASET_PATH}\ 54 | --gpu={$YOUR_GPU_INDEX}\ 55 | --epochs={$MAX_TRAINING_EPOCH}\ 56 | --batch={$BATCH_SIZE}\ 57 | --lr={$LEARNING_RATE}\ 58 | --model={$YOUR_MODEL} 59 | ``` 60 | Notes: models can be chosen from resnet18, 34, 50, 101, 152 61 | 62 | ### Finetuning(May need Internet) 63 | Use the parameter '--finetune' would enable the usage of pytorch implemented pretrained model. 64 | Then the layers close to input would be frozen, and the fc layer will be modified for finetuning the model pretrained on ImageNet(1000 classes)
65 | For example: 66 | ```Bash 67 | python ./Train.py\ 68 | ../dataset\ 69 | --gpu=0\ 70 | --finetune=1 71 | ``` 72 | ### Restart 73 | Use the parameter '--model' would help you continue your training after a stop. If you want to load the fc layer as well, add the parameter --fc 74 | ```Bash 75 | python ./Train.py\ 76 | ../dataset\ 77 | --gpu=0\ 78 | --modelpath=../checkpoints/example.pth\ 79 | --fc=1 80 | ``` 81 | ## Evaluating Guide 82 | Run the Test.py to evaluate your model on a dataset(output: mAcc wAcc): 83 | ```Bash 84 | python ./Test.py\ 85 | {$YOUR_MODEL_PATH}\ 86 | --model={$YOUR_MODEL}\ 87 | --mode=evaluate\ 88 | --testpath={$DATASET_ROOT}\ 89 | --gpu=0 90 | ``` 91 | mAP is not a criterion of our homework, so I did not add it to Test.py. However, there is a eval_map function in Utils.py 92 | which has the same input and output with eval_macc. So you can add it manually if needed. 93 | 94 | If you want to predict the labels of a single image, run: 95 | ```bash 96 | python ./Test.py\ 97 | {$YOUR_MODEL_PATH}\ 98 | --model={$YOUR_MODEL}\ 99 | --mode=predict\ 100 | --testpath={$IMAGE_PATH}\ 101 | --gpu=0 102 | ``` -------------------------------------------------------------------------------- /reference/PascalLoader.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Aug 18 11:58:07 2017 4 | 5 | @author: Biagio Brattoli 6 | """ 7 | import os, numpy as np 8 | import torch 9 | import torch.utils.data as data 10 | from scipy.misc import imread, imresize 11 | # from scipy.sparse import csr_matrix 12 | from PIL import Image 13 | import xml.etree.ElementTree as ET 14 | 15 | class DataLoader(data.Dataset): 16 | def __init__(self,data_path,trainval,transform,random_crops=0): 17 | self.data_path = data_path 18 | self.transform = transform 19 | self.random_crops = random_crops 20 | self.trainval = trainval 21 | 22 | self.__init_classes() 23 | self.names, self.labels = self.__dataset_info() 24 | 25 | def __getitem__(self, index): 26 | x = imread(self.data_path+'/JPEGImages/'+self.names[index]+'.jpg',mode='RGB') 27 | x = Image.fromarray(x) 28 | 29 | scale = np.random.rand()*2+0.25 30 | w = int(x.size[0]*scale) 31 | h = int(x.size[1]*scale) 32 | if min(w,h)<227: 33 | scale = 227/min(w,h) 34 | w = int(x.size[0]*scale) 35 | h = int(x.size[1]*scale) 36 | 37 | #x = x.resize((w,h), Image.BILINEAR) # Random scale 38 | 39 | if self.random_crops==0: 40 | x = self.transform(x) 41 | else: 42 | crops = [] 43 | for i in range(self.random_crops): 44 | crops.append(self.transform(x)) 45 | x = torch.stack(crops) 46 | 47 | y = self.labels[index] 48 | return x, y 49 | 50 | def __len__(self): 51 | return len(self.names) 52 | 53 | def __dataset_info(self): 54 | #annotation_files = os.listdir(self.data_path+'/Annotations') 55 | with open(self.data_path+'/ImageSets/Main/'+self.trainval+'.txt') as f: 56 | annotations = f.readlines() 57 | 58 | annotations = [n[:-1] for n in annotations] 59 | 60 | names = [] 61 | labels = [] 62 | for af in annotations: 63 | if len(af)!=6: 64 | continue 65 | filename = os.path.join(self.data_path,'Annotations',af) 66 | tree = ET.parse(filename+'.xml') 67 | objs = tree.findall('object') 68 | num_objs = len(objs) 69 | 70 | boxes = np.zeros((num_objs, 4), dtype=np.uint16) 71 | boxes_cl = np.zeros((num_objs), dtype=np.int32) 72 | 73 | for ix, obj in enumerate(objs): 74 | bbox = obj.find('bndbox') 75 | # Make pixel indexes 0-based 76 | x1 = float(bbox.find('xmin').text) - 1 77 | y1 = float(bbox.find('ymin').text) - 1 78 | x2 = float(bbox.find('xmax').text) - 1 79 | y2 = float(bbox.find('ymax').text) - 1 80 | 81 | cls = self.class_to_ind[obj.find('name').text.lower().strip()] 82 | boxes[ix, :] = [x1, y1, x2, y2] 83 | boxes_cl[ix] = cls 84 | 85 | lbl = np.zeros(self.num_classes) 86 | lbl[boxes_cl] = 1 87 | labels.append(lbl) 88 | names.append(af) 89 | 90 | return np.array(names), np.array(labels).astype(np.float32) 91 | 92 | def __init_classes(self): 93 | self.classes = ('__background__','aeroplane', 'bicycle', 'bird', 'boat', 94 | 'bottle', 'bus', 'car', 'cat', 'chair', 95 | 'cow', 'diningtable', 'dog', 'horse', 96 | 'motorbike', 'person', 'pottedplant', 97 | 'sheep', 'sofa', 'train', 'tvmonitor') 98 | self.num_classes = len(self.classes) 99 | self.class_to_ind = dict(zip(self.classes, range(self.num_classes))) 100 | 101 | 102 | -------------------------------------------------------------------------------- /reference/PascalNetwork.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Sep 13 15:57:01 2017 4 | 5 | @author: bbrattol 6 | """ 7 | import torch 8 | import torch.nn as nn 9 | 10 | import sys 11 | sys.path.append('../Utils') 12 | from torch.nn import BatchNorm2d as BN 13 | from collections import OrderedDict 14 | 15 | 16 | class Network(nn.Module): 17 | 18 | def __init__(self, num_classes=21, groups=2): 19 | super(Network, self).__init__() 20 | self.conv = nn.Sequential() 21 | self.fc6 = nn.Sequential() 22 | 23 | def vggnet(self, num_classes=21, groups=2): 24 | self.conv.add_module('conv1_s1', nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0)) 25 | self.conv.add_module('relu1_s1', nn.ReLU(inplace=True)) 26 | # self.conv.add_module('bn1_s1',nn.BatchNorm2d(96)) 27 | self.conv.add_module('pool1_s1', nn.MaxPool2d(kernel_size=3, stride=2)) 28 | self.conv.add_module('lrn1_s1', BN(local_size=5, alpha=0.0001, beta=0.75)) 29 | 30 | self.conv.add_module('conv2_s1', nn.Conv2d(96, 256, kernel_size=5, padding=2, groups=groups)) 31 | self.conv.add_module('relu2_s1', nn.ReLU(inplace=True)) 32 | # self.conv.add_module('bn2_s1',nn.BatchNorm2d(256)) 33 | self.conv.add_module('pool2_s1', nn.MaxPool2d(kernel_size=3, stride=2)) 34 | self.conv.add_module('lrn2_s1', BN(local_size=5, alpha=0.0001, beta=0.75)) 35 | 36 | self.conv.add_module('conv3_s1', nn.Conv2d(256, 384, kernel_size=3, padding=1)) 37 | self.conv.add_module('relu3_s1', nn.ReLU(inplace=True)) 38 | # self.conv.add_module('bn3_s1',nn.BatchNorm2d(384)) 39 | 40 | self.conv.add_module('conv4_s1', nn.Conv2d(384, 384, kernel_size=3, padding=1, groups=groups)) 41 | # self.conv.add_module('bn4_s1',nn.BatchNorm2d(384)) 42 | self.conv.add_module('relu4_s1', nn.ReLU(inplace=True)) 43 | 44 | self.conv.add_module('conv5_s1', nn.Conv2d(384, 256, kernel_size=3, padding=1, groups=groups)) 45 | # self.conv.add_module('bn5_s1',nn.BatchNorm2d(256)) 46 | self.conv.add_module('relu5_s1', nn.ReLU(inplace=True)) 47 | self.conv.add_module('pool5_s1', nn.MaxPool2d(kernel_size=3, stride=2)) 48 | 49 | self.fc6.add_module('fc6_s1', nn.Linear(256 * 6 * 6, 4096)) 50 | self.fc6.add_module('relu6_s1', nn.ReLU(inplace=True)) 51 | self.fc6.add_module('drop6_s1', nn.Dropout(p=0.5)) 52 | 53 | self.fc7 = nn.Sequential() 54 | self.fc7.add_module('fc7', nn.Linear(4096, 4096)) 55 | self.fc7.add_module('relu7', nn.ReLU(inplace=True)) 56 | self.fc7.add_module('drop7', nn.Dropout(p=0.5)) 57 | 58 | self.classifier = nn.Sequential() 59 | self.classifier.add_module('fc8', nn.Linear(4096, num_classes)) 60 | 61 | def load(self, checkpoint, load_fc=False): 62 | model_dict = self.state_dict() 63 | layers = [k for k, v in model_dict.items()] 64 | 65 | pretrained_dict = torch.load(checkpoint) 66 | keys = [k for k, v in pretrained_dict.items()] 67 | keys.sort() 68 | #keys = keys[2:-4] #load until conv5 69 | 70 | to_load = [] 71 | for k in keys: 72 | if k not in model_dict: 73 | continue 74 | # if 'conv5' in k or 'bn5' in k: 75 | # continue 76 | if 'conv' in k: 77 | to_load.append(k) 78 | if 'fc' in k and load_fc: 79 | to_load.append(k) 80 | 81 | pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in to_load and k in model_dict} 82 | model_dict.update(pretrained_dict) 83 | self.load_state_dict(model_dict) 84 | 85 | def save(self, checkpointFold, epoch): 86 | filename = '%s/jps_%03i.pth.tar'%(checkpointFold,epoch) 87 | torch.save(self.state_dict(), filename) 88 | 89 | def forward(self, x): 90 | b, c, h, w = x.size() 91 | x = self.conv(x) 92 | x = self.fc6(x.view(b, -1)) 93 | x = self.fc7(x) 94 | x = self.classifier(x) 95 | return x 96 | 97 | -------------------------------------------------------------------------------- /reference/Loader.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Aug 18 11:58:07 2017 4 | 5 | @author: Biagio Brattoli 6 | """ 7 | import os 8 | import numpy as np 9 | import torch 10 | import torch.utils.data as data 11 | from scipy.misc import imread, imresize 12 | # from scipy.sparse import csr_matrix 13 | from PIL import Image 14 | # import xml.etree.ElementTree as ET 15 | 16 | 17 | class MyDataLoader(data.Dataset): 18 | def __init__(self, transform, trainval='train', data_path='../VOC2012', random_crops=0): 19 | """ 20 | Initialize the dataset. 21 | VOC(Labels only) tree: 22 | --root 23 | |--train 24 | | |--JPEGImages(dir) 25 | | |--annotations.txt 26 | | 27 | |--test 28 | |--JPEGImages(dir) 29 | |--annotations.txt 30 | :param transform: the transformation 31 | :param data_path: the root of the datapath 32 | :param random_crops: 33 | """ 34 | self.data_path = data_path 35 | self.transform = transform 36 | self.random_crops = random_crops 37 | self.train_or_test = trainval 38 | 39 | self.__init_classes() 40 | self.names, self.labels = self.__dataset_info() 41 | 42 | def __getitem__(self, index): 43 | """ 44 | This is the getitem func which enables the usage of [] operator 45 | :param index: the index of the picture 46 | :return: tuple (picture, its label(s)) 47 | """ 48 | x = imread(os.path.join(self.data_path, self.train_or_test, '/JPEGImages/', self.names[index] + '.jpg'), 49 | mode='RGB') 50 | x = Image.fromarray(x) 51 | 52 | # Resize directly instead of the strange operations done below... 53 | x = x.resize(224, 224) 54 | 55 | # scale = np.random.rand() * 2 + 0.25 56 | # w = int(x.size[0] * scale) 57 | # h = int(x.size[1] * scale) 58 | # if min(w, h) < 227: 59 | # scale = 227 / min(w, h) 60 | # w = int(x.size[0] * scale) 61 | # h = int(x.size[1] * scale) 62 | 63 | # x = x.resize((w,h), Image.BILINEAR) # Random scale 64 | 65 | if self.random_crops == 0: 66 | x = self.transform(x) 67 | else: 68 | crops = [] 69 | for i in range(self.random_crops): 70 | crops.append(self.transform(x)) 71 | x = torch.stack(crops) 72 | 73 | y = self.labels[index] 74 | return x, y 75 | 76 | def __len__(self): 77 | return len(self.names) 78 | 79 | def __dataset_info(self): 80 | """ 81 | Generate names(np.array, with string elements) and labels(np.array, with array(number) elements). 82 | :return: names labels 83 | """ 84 | annotation_file = os.path.join(self.data_path, 'annotations.txt') 85 | with open(annotation_file, 'r') as fp: 86 | lines = fp.readlines() 87 | 88 | names = [] 89 | labels = [] 90 | for line in lines: 91 | names.append(line.split('\0')[0]) 92 | labels.append(np.array(line.split('\0')[1:])) 93 | 94 | return np.array(names), np.array(labels).astype(np.float32) 95 | 96 | 97 | # annotation_files = os.listdir(self.data_path+'/Annotations') 98 | # with open(self.data_path + '/ImageSets/Main/' + self.trainval + '.txt') as f: 99 | # annotations = f.readlines() 100 | # 101 | # annotations = [n[:-1] for n in annotations] 102 | # 103 | # names = [] 104 | # labels = [] 105 | # for af in annotations: 106 | # if len(af) != 6: 107 | # continue 108 | # filename = os.path.join(self.data_path, 'Annotations', af) 109 | # tree = ET.parse(filename + '.xml') 110 | # objs = tree.findall('object') 111 | # num_objs = len(objs) 112 | # 113 | # boxes = np.zeros((num_objs, 4), dtype=np.uint16) 114 | # boxes_cl = np.zeros((num_objs), dtype=np.int32) 115 | # 116 | # for ix, obj in enumerate(objs): 117 | # bbox = obj.find('bndbox') 118 | # # Make pixel indexes 0-based 119 | # x1 = float(bbox.find('xmin').text) - 1 120 | # y1 = float(bbox.find('ymin').text) - 1 121 | # x2 = float(bbox.find('xmax').text) - 1 122 | # y2 = float(bbox.find('ymax').text) - 1 123 | # 124 | # cls = self.class_to_ind[obj.find('name').text.lower().strip()] 125 | # boxes[ix, :] = [x1, y1, x2, y2] 126 | # boxes_cl[ix] = cls 127 | # 128 | # lbl = np.zeros(self.num_classes) 129 | # lbl[boxes_cl] = 1 130 | # labels.append(lbl) 131 | # names.append(af) 132 | # 133 | # return np.array(names), np.array(labels).astype(np.float32) 134 | 135 | 136 | def __init_classes(self): 137 | self.classes = ('__background__', 'aeroplane', 'bicycle', 'bird', 'boat', 138 | 'bottle', 'bus', 'car', 'cat', 'chair', 139 | 'cow', 'diningtable', 'dog', 'horse', 140 | 'motorbike', 'person', 'pottedplant', 141 | 'sheep', 'sofa', 'train', 'tvmonitor') 142 | self.num_classes = len(self.classes) 143 | self.class_to_ind = dict(zip(self.classes, range(self.num_classes))) -------------------------------------------------------------------------------- /src/Train.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import time 4 | import numpy as np 5 | import argparse 6 | 7 | # sys.path.append('../Utils') 8 | # TODO: Finish the class Logger(I don't actually know what it is used for) 9 | 10 | import torch 11 | import torchvision.transforms as transforms 12 | 13 | CORES = 4 # int(float(multiprocessing.cpu_count())*0.25) 14 | 15 | from Network import resnet18, resnet34, resnet50, resnet101, resnet152 16 | from Utils import MyDataLoader, adjust_learning_rate, load_model_from_file, compute_mAP, eval_map 17 | 18 | parser = argparse.ArgumentParser(description='Train network on Pascal VOC 2012') 19 | parser.add_argument('pascal_path', type=str, help='Path to Pascal VOC 2012 folder') 20 | parser.add_argument('--finetune', default=None, type=int, help='whether to use pytorch pretrained model and finetune') 21 | parser.add_argument('--model', default='resnet18', type=str, help='which backbone network to use', 22 | choices=['resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152']) 23 | parser.add_argument('--modelpath', default=None, type=str, help='pretrained model path') 24 | parser.add_argument('--fc', default=None, type=int, help='load fc6 and fc7 from model') 25 | parser.add_argument('--gpu', default=None, type=int, help='gpu id') 26 | parser.add_argument('--epochs', default=160, type=int, help='max training epochs') 27 | parser.add_argument('--iter_start', default=0, type=int, help='Starting iteration count') 28 | parser.add_argument('--batch', default=10, type=int, help='batch size') 29 | parser.add_argument('--checkpoint', default='../checkpoints/', type=str, help='checkpoint folder') 30 | parser.add_argument('--lr', default=0.001, type=float, help='learning rate for SGD optimizer') 31 | parser.add_argument('--crops', default=10, type=int, help='number of random crops during testing') 32 | args = parser.parse_args() 33 | # args = parser.parse_args([ 34 | # '../dataset', 35 | # '--gpu','0', 36 | # '--finetune','1' 37 | # '--model','resnet18' 38 | # ]) 39 | prefix = time.strftime("%y%m%d_%H%M", time.localtime()) 40 | models = { 41 | 'resnet18': resnet18, 42 | 'resnet34': resnet34, 43 | 'resnet50': resnet50, 44 | 'resnet101': resnet101, 45 | 'resnet152': resnet152 46 | } 47 | 48 | 49 | def main(): 50 | # Training devices 51 | if args.gpu is not None: 52 | print('Using GPU %d' % args.gpu) 53 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 54 | os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) 55 | else: 56 | print('CPU mode') 57 | 58 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 59 | std=[0.229, 0.224, 0.225]) 60 | 61 | train_transform = transforms.Compose([ 62 | transforms.RandomResizedCrop(224), 63 | transforms.RandomHorizontalFlip(), 64 | transforms.ToTensor(), 65 | normalize, 66 | ]) 67 | 68 | val_transform = transforms.Compose([ 69 | transforms.RandomResizedCrop(224), 70 | transforms.RandomHorizontalFlip(), 71 | transforms.ToTensor(), 72 | normalize, 73 | ]) 74 | 75 | # Load the dataset. When training, disable the random crop. 76 | train_data = MyDataLoader(transform=train_transform, trainval='train', data_path=args.pascal_path, random_crops=0) 77 | train_loader = torch.utils.data.DataLoader(dataset=train_data, 78 | batch_size=args.batch, 79 | shuffle=True, 80 | num_workers=CORES) 81 | 82 | val_data = MyDataLoader(transform=val_transform, trainval='test', data_path=args.pascal_path, 83 | random_crops=args.crops) 84 | val_loader = torch.utils.data.DataLoader(dataset=val_data, 85 | batch_size=args.batch, 86 | shuffle=False, 87 | num_workers=CORES) 88 | 89 | N = len(train_data.names) 90 | iter_per_epoch = int(N/args.batch) 91 | 92 | # Network initialize 93 | # finetune: freeze some layers and modify the fc layer. 94 | if args.finetune is not None: 95 | # Initialize the network 96 | net = models[args.model](pretrained=True) 97 | # Freeze conv layers 98 | for i, (name, param) in enumerate(net.named_parameters()): 99 | if 'conv' in name: 100 | param.requires_grad = False 101 | # Modify the fc layer 102 | in_channel = net.fc.in_features 103 | net.fc = torch.nn.Linear(in_features=in_channel, out_features=20) 104 | 105 | elif args.modelpath is not None: 106 | net = load_model_from_file(args.modelpath, model=args.model, load_fc=args.fc) 107 | 108 | else: 109 | net = models[args.model](pretrained=False, num_classes=20) 110 | 111 | if args.gpu is not None: 112 | net.cuda() 113 | 114 | criterion = torch.nn.MultiLabelSoftMarginLoss() 115 | optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, net.parameters()), 116 | lr=args.lr, momentum=0.9, weight_decay=0.0001) 117 | 118 | 119 | ############## TRAINING ############### 120 | print("Start training, lr: %f, batch-size: %d" % (args.lr, args.batch)) 121 | print("Model: " + args.model) 122 | print("Checkpoint Path: "+args.checkpoint) 123 | print("Time: "+prefix) 124 | if args.modelpath is not None: 125 | print("Training from past model: "+args.modelpath) 126 | 127 | # Train the Model 128 | steps = args.iter_start 129 | for epoch in range(iter_per_epoch*args.iter_start, args.epochs): 130 | adjust_learning_rate(optimizer, epoch, init_lr=args.lr, step=20, decay=0.1) 131 | 132 | mAP = [] 133 | for i, (images, labels) in enumerate(train_loader): 134 | if args.gpu is not None: 135 | images = images.cuda() 136 | labels = labels.cuda() 137 | 138 | # Forward + Backward + Optimize 139 | optimizer.zero_grad() 140 | outputs = net(images) 141 | 142 | mAP.append(compute_mAP(labels.data, outputs.data)) 143 | 144 | loss = criterion(outputs, labels) 145 | loss.backward() 146 | optimizer.step() 147 | loss = loss.cpu().data.numpy() 148 | 149 | if steps % 100 == 0: 150 | print('[%d/%d] %d), Loss: %.3f, mAP %.2f%%' % (epoch+1, args.epochs, steps, loss,100*np.mean(mAP[-20:]))) 151 | 152 | steps += 1 153 | 154 | if epoch % 5 == 0: 155 | filename = '%s/%s_%s_%03i.pth' % (args.checkpoint, args.model, prefix, epoch+1) 156 | torch.save(net.state_dict(), filename) 157 | print('Saved: '+args.checkpoint+"/"+filename) 158 | 159 | eval_map(net, None, val_loader, steps, args.gpu, args.crops) 160 | 161 | if os.path.exists(args.checkpoint+'/stop.txt'): 162 | # break without using CTRL+C 163 | break 164 | 165 | 166 | if __name__ == "__main__": 167 | main() -------------------------------------------------------------------------------- /src/Network.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.utils.model_zoo as model_zoo 3 | 4 | 5 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 6 | 'resnet152'] 7 | 8 | 9 | model_urls = { 10 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 11 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 12 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 13 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 14 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 15 | } 16 | 17 | 18 | def conv3x3(in_planes, out_planes, stride=1): 19 | """3x3 convolution with padding""" 20 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 21 | padding=1, bias=False) 22 | 23 | 24 | def conv1x1(in_planes, out_planes, stride=1): 25 | """1x1 convolution""" 26 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 27 | 28 | 29 | class BasicBlock(nn.Module): 30 | expansion = 1 31 | 32 | def __init__(self, inplanes, planes, stride=1, downsample=None): 33 | super(BasicBlock, self).__init__() 34 | self.conv1 = conv3x3(inplanes, planes, stride) 35 | self.bn1 = nn.BatchNorm2d(planes) 36 | self.relu = nn.ReLU(inplace=True) 37 | self.conv2 = conv3x3(planes, planes) 38 | self.bn2 = nn.BatchNorm2d(planes) 39 | self.downsample = downsample 40 | self.stride = stride 41 | 42 | def forward(self, x): 43 | identity = x 44 | 45 | out = self.conv1(x) 46 | out = self.bn1(out) 47 | out = self.relu(out) 48 | 49 | out = self.conv2(out) 50 | out = self.bn2(out) 51 | 52 | if self.downsample is not None: 53 | identity = self.downsample(x) 54 | 55 | out += identity 56 | out = self.relu(out) 57 | 58 | return out 59 | 60 | 61 | class Bottleneck(nn.Module): 62 | expansion = 4 63 | 64 | def __init__(self, inplanes, planes, stride=1, downsample=None): 65 | super(Bottleneck, self).__init__() 66 | self.conv1 = conv1x1(inplanes, planes) 67 | self.bn1 = nn.BatchNorm2d(planes) 68 | self.conv2 = conv3x3(planes, planes, stride) 69 | self.bn2 = nn.BatchNorm2d(planes) 70 | self.conv3 = conv1x1(planes, planes * self.expansion) 71 | self.bn3 = nn.BatchNorm2d(planes * self.expansion) 72 | self.relu = nn.ReLU(inplace=True) 73 | self.downsample = downsample 74 | self.stride = stride 75 | 76 | def forward(self, x): 77 | identity = x 78 | 79 | out = self.conv1(x) 80 | out = self.bn1(out) 81 | out = self.relu(out) 82 | 83 | out = self.conv2(out) 84 | out = self.bn2(out) 85 | out = self.relu(out) 86 | 87 | out = self.conv3(out) 88 | out = self.bn3(out) 89 | 90 | if self.downsample is not None: 91 | identity = self.downsample(x) 92 | 93 | out += identity 94 | out = self.relu(out) 95 | 96 | return out 97 | 98 | 99 | class ResNet(nn.Module): 100 | 101 | def __init__(self, block, layers, num_classes=1000, zero_init_residual=False): 102 | super(ResNet, self).__init__() 103 | self.inplanes = 64 104 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 105 | bias=False) 106 | self.bn1 = nn.BatchNorm2d(64) 107 | self.relu = nn.ReLU(inplace=True) 108 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 109 | self.layer1 = self._make_layer(block, 64, layers[0]) 110 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 111 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 112 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 113 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 114 | self.fc = nn.Linear(512 * block.expansion, num_classes) 115 | 116 | for m in self.modules(): 117 | if isinstance(m, nn.Conv2d): 118 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 119 | elif isinstance(m, nn.BatchNorm2d): 120 | nn.init.constant_(m.weight, 1) 121 | nn.init.constant_(m.bias, 0) 122 | 123 | # Zero-initialize the last BN in each residual branch, 124 | # so that the residual branch starts with zeros, and each residual block behaves like an identity. 125 | # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 126 | if zero_init_residual: 127 | for m in self.modules(): 128 | if isinstance(m, Bottleneck): 129 | nn.init.constant_(m.bn3.weight, 0) 130 | elif isinstance(m, BasicBlock): 131 | nn.init.constant_(m.bn2.weight, 0) 132 | 133 | def _make_layer(self, block, planes, blocks, stride=1): 134 | downsample = None 135 | if stride != 1 or self.inplanes != planes * block.expansion: 136 | downsample = nn.Sequential( 137 | conv1x1(self.inplanes, planes * block.expansion, stride), 138 | nn.BatchNorm2d(planes * block.expansion), 139 | ) 140 | 141 | layers = [] 142 | layers.append(block(self.inplanes, planes, stride, downsample)) 143 | self.inplanes = planes * block.expansion 144 | for _ in range(1, blocks): 145 | layers.append(block(self.inplanes, planes)) 146 | 147 | return nn.Sequential(*layers) 148 | 149 | def forward(self, x): 150 | x = self.conv1(x) 151 | x = self.bn1(x) 152 | x = self.relu(x) 153 | x = self.maxpool(x) 154 | 155 | x = self.layer1(x) 156 | x = self.layer2(x) 157 | x = self.layer3(x) 158 | x = self.layer4(x) 159 | 160 | x = self.avgpool(x) 161 | x = x.view(x.size(0), -1) 162 | x = self.fc(x) 163 | 164 | return x 165 | 166 | 167 | def resnet18(pretrained=False, **kwargs): 168 | """Constructs a ResNet-18 model. 169 | 170 | Args: 171 | pretrained (bool): If True, returns a model pre-trained on ImageNet 172 | """ 173 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 174 | if pretrained: 175 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) 176 | return model 177 | 178 | 179 | def resnet34(pretrained=False, **kwargs): 180 | """Constructs a ResNet-34 model. 181 | 182 | Args: 183 | pretrained (bool): If True, returns a model pre-trained on ImageNet 184 | """ 185 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 186 | if pretrained: 187 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) 188 | return model 189 | 190 | 191 | def resnet50(pretrained=False, **kwargs): 192 | """Constructs a ResNet-50 model. 193 | 194 | Args: 195 | pretrained (bool): If True, returns a model pre-trained on ImageNet 196 | """ 197 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 198 | if pretrained: 199 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) 200 | return model 201 | 202 | 203 | def resnet101(pretrained=False, **kwargs): 204 | """Constructs a ResNet-101 model. 205 | 206 | Args: 207 | pretrained (bool): If True, returns a model pre-trained on ImageNet 208 | """ 209 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 210 | if pretrained: 211 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) 212 | return model 213 | 214 | 215 | def resnet152(pretrained=False, **kwargs): 216 | """Constructs a ResNet-152 model. 217 | 218 | Args: 219 | pretrained (bool): If True, returns a model pre-trained on ImageNet 220 | """ 221 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) 222 | if pretrained: 223 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) 224 | return model 225 | -------------------------------------------------------------------------------- /src/Utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | import torch.utils.data as data 5 | from scipy.misc import imread 6 | from PIL import Image 7 | from Network import * 8 | from sklearn.metrics import average_precision_score 9 | 10 | models = { 11 | 'resnet18': resnet18, 12 | 'resnet34': resnet34, 13 | 'resnet50': resnet50, 14 | 'resnet101': resnet101, 15 | 'resnet152': resnet152 16 | } 17 | 18 | 19 | def compute_mAP(labels, outputs): 20 | y_true = labels.cpu().numpy() 21 | y_pred = outputs.cpu().numpy() 22 | AP = [] 23 | for i in range(y_true.shape[0]): 24 | AP.append(average_precision_score(y_true[i], y_pred[i])) 25 | return np.mean(AP) 26 | 27 | 28 | def eval_map(net, logger, val_loader, steps, gpu, crops): 29 | if gpu is not None: 30 | net.cuda() 31 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 32 | os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu) 33 | mAP = [] 34 | net.eval() 35 | for i, (images, labels) in enumerate(val_loader): 36 | images = images.view((-1, 3, 224, 224)) 37 | if gpu is not None: 38 | images = images.cuda() 39 | 40 | # Forward + Backward + Optimize 41 | outputs = net(images) 42 | outputs = outputs.cpu().data 43 | if crops != 0: 44 | outputs = outputs.view((-1, crops, 20)) 45 | outputs = outputs.mean(dim=1).view((-1, 20)) 46 | else: 47 | outputs = outputs.view((-1, 20)) 48 | 49 | # score = tnt.meter.mAPMeter(outputs, labels) 50 | mAP.append(compute_mAP(labels, outputs)) 51 | 52 | if logger is not None: 53 | logger.scalar_summary('mAP', np.mean(mAP), steps) 54 | print('TESTING: %d), mAP %.2f%%' % (steps, 100 * np.mean(mAP))) 55 | net.train() 56 | 57 | 58 | def eval_macc(val_loader, model_path="../checkpoints/resnet18_190515_2049_001.pth", 59 | model="resnet18", gpu=None, crops=0): 60 | """ 61 | Evaluate a model on a dataset, using mAcc as index 62 | :param val_loader: the dataloader(torch.utils.dataloader) object 63 | :param model_path: the path to the model 64 | :param model: which kind is the model 65 | :param gpu: which gpu to use 66 | :param crops: how many random crops 67 | :return: mAcc on the dataset 68 | """ 69 | net = load_model_from_file(model_path, model=model, load_fc=True) 70 | if gpu is not None: 71 | net.cuda() 72 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 73 | os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu) 74 | 75 | acc = [] 76 | net.eval() 77 | for idx, (images, labels) in enumerate(val_loader): 78 | images = images.view((-1, 3, 224, 224)) 79 | if gpu is not None: 80 | images = images.cuda() 81 | 82 | outputs = net(images) 83 | outputs = outputs.cpu().data 84 | if crops != 0: 85 | outputs = outputs.view((-1, crops, 20)) 86 | outputs = outputs.max(dim=1)[0].view((-1, 20)) 87 | else: 88 | outputs = outputs.view((-1, 20)) 89 | 90 | # outputs: shape [batchsize * num_classes] 91 | outputs = (outputs > 0) 92 | acc.append(np.sum((outputs.numpy() == labels.numpy()).astype(float)) / (val_loader.batch_size * 20)) 93 | 94 | print("Evaluating mAcc, Batch_size: %d" % idx, end='\r') 95 | 96 | macc = sum(acc) / len(acc) 97 | print("\nFinal mAcc: %f" % macc) 98 | return macc 99 | 100 | 101 | def eval_wacc(val_loader, model_path="../checkpoints/resnet18_190515_2049_001.pth", 102 | model="resnet18", gpu=None, crops=0): 103 | """ 104 | Evaluate a model on a dataset, using wAcc as index 105 | :param val_loader: the dataloader(torch.utils.dataloader) object 106 | :param model_path: the path to the model 107 | :param model: which kind is the model 108 | :param gpu: which gpu to use 109 | :param crops: how many random crops 110 | :return: mAcc on the dataset 111 | """ 112 | net = load_model_from_file(model_path, model=model, load_fc=True) 113 | if gpu is not None: 114 | net.cuda() 115 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 116 | os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu) 117 | 118 | acc = np.zeros(20) 119 | net.eval() 120 | freq = np.zeros(20) 121 | 122 | for idx, (images, labels) in enumerate(val_loader): 123 | # Frequency of the labels 124 | freq += np.sum(labels.numpy(), axis=0) 125 | images = images.view((-1, 3, 224, 224)) 126 | 127 | if gpu is not None: 128 | images = images.cuda() 129 | outputs = net(images) 130 | outputs = outputs.cpu().data 131 | if crops != 0: 132 | outputs = outputs.view((-1, crops, 20)) 133 | outputs = outputs.max(dim=1)[0].view((-1, 20)) 134 | else: 135 | outputs = outputs.view((-1, 20)) 136 | outputs = (outputs > 0) 137 | acc += np.sum((outputs.numpy() == labels.numpy()), axis=0).astype(float) 138 | 139 | print("Evaluating wAcc, Batch_size: %d" % idx, end="\r") 140 | 141 | freq = freq / np.sum(freq) 142 | acc = acc / len(val_loader.dataset) 143 | 144 | wacc = np.dot(freq, acc) 145 | print("\nFinal wAcc: %f" % wacc) 146 | return wacc 147 | 148 | 149 | def eval_f1(val_loader, model_path="../checkpoints/resnet18_190515_2049_001.pth", 150 | model="resnet18", gpu=None, crops=0): 151 | """ 152 | Evaluate a model on a dataset, using f1 as index 153 | :param val_loader: the dataloader(torch.utils.dataloader) object 154 | :param model_path: the path to the model 155 | :param model: which kind is the model 156 | :param gpu: which gpu to use 157 | :param crops: how many random crops 158 | :return: f1 score on the dataset 159 | """ 160 | net = load_model_from_file(model_path, model=model, load_fc=True) 161 | if gpu is not None: 162 | net.cuda() 163 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 164 | os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu) 165 | 166 | f1 = [] 167 | precision = [] 168 | recall = [] 169 | net.eval() 170 | for idx, (images, labels) in enumerate(val_loader): 171 | images = images.view((-1, 3, 224, 224)) 172 | if gpu is not None: 173 | images = images.cuda() 174 | 175 | outputs = net(images) 176 | outputs = outputs.cpu().data 177 | if crops != 0: 178 | outputs = outputs.view((-1, crops, 20)) 179 | outputs = outputs.max(dim=1)[0].view((-1, 20)) 180 | else: 181 | outputs = outputs.view((-1, 20)) 182 | 183 | # outputs: shape [batchsize * num_classes] 184 | outputs = (outputs > 0) 185 | TP = np.sum((outputs.numpy() == 1) & (labels.numpy() == 1)) 186 | # TN = np.sum((outputs.numpy() == 0) & (labels.numpy() == 0)) 187 | FN = np.sum((outputs.numpy() == 0) & (labels.numpy() == 1)) 188 | FP = np.sum((outputs.numpy() == 1) & (labels.numpy() == 0)) 189 | precision.append(TP/(TP+FP)) 190 | recall.append(TP/(TP+FN)) 191 | f1.append((2*precision[-1]*recall[-1])/(precision[-1]+recall[-1])) 192 | 193 | print("Evaluating f1, Batch_size: %d" % idx, end='\r') 194 | 195 | mf1 = sum(f1) / len(f1) 196 | mprecision = sum(precision)/len(precision) 197 | mrecall = sum(recall)/len(recall) 198 | print("\nFinal f1-score: %f" % mf1) 199 | print("precision: %f" % mprecision) 200 | print("recall: %f" % mrecall) 201 | return mf1 202 | 203 | 204 | def predict(transform, model_path='../checkpoints/190513.2359_011_0.917.pth', img_path='../test.jpg', model="resnet18", 205 | crops=0, gpu=None): 206 | """ 207 | Predict a image with the model 208 | :param transform: the torchvision.transforms object. Proper transform may help prediction 209 | :param model_path: the path to the model 210 | :param img_path: the path to the image 211 | :param model: the species of the model 212 | :param gpu: which gpu to use 213 | :return: None. The result will be show on the screen directly 214 | """ 215 | net = load_model_from_file(model_path, model, True) 216 | if gpu is not None: 217 | net.cuda() 218 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 219 | os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu) 220 | 221 | net.eval() 222 | img = imread(img_path, mode='RGB') 223 | img = Image.fromarray(img) 224 | if crops == 0: 225 | img = transform(img) 226 | else: 227 | img_crop = [] 228 | for i in range(0, crops): 229 | img_crop.append(transform(img)) 230 | img = torch.stack(img_crop) 231 | img = img.view((-1, 3, 224, 224)) 232 | if gpu is not None: 233 | img = img.cuda() 234 | 235 | outputs = net(img) 236 | outputs = outputs.cpu().data 237 | if crops != 0: 238 | outputs = outputs.view((-1, crops, 20)) 239 | outputs = outputs.max(dim=1)[0].view((-1, 20)) 240 | else: 241 | outputs = outputs.view((-1, 20)) 242 | print("output tensor:", outputs) 243 | print("Results:", (outputs > 0) * 1) 244 | Categories = np.array(['person', 'bird', 'cat', 'cow', 245 | 'dog', 'horse', 'sheep', 'aeroplane', 'bicycle', 246 | 'boat', 'bus', 'car', 'motorbike', 247 | 'train', 'bottle', 'chair', 248 | 'diningtable', 'pottedplant', 'sofa', 'tvmonitor']) 249 | print("Categories:", Categories[np.where(outputs[0].numpy() > 0)]) 250 | 251 | 252 | def adjust_learning_rate(optimizer, epoch, init_lr, step=80, decay=0.1): 253 | """ 254 | This function adjust the learning rate automatically during training. 255 | https://www.pytorchtutorial.com/pytorch-learning-rate-decay/ 256 | :param optimizer: the optimizer 257 | :param epoch: current epoch 258 | :param init_lr: initial learning rate 259 | :param step: literally 260 | :param decay: literally 261 | :return: Nothing 262 | """ 263 | lr = init_lr * (decay ** (epoch // step)) 264 | for param_group in optimizer.param_groups: 265 | param_group['lr'] = lr 266 | 267 | 268 | def load_model_from_file(filepath, model="resnet18", load_fc=None): 269 | """ 270 | Load the trained model from .pth file. Only for the same model trained before 271 | :param filepath: the path to .pth file 272 | :param model: the backbone network 273 | :param load_fc: whether to load fc layer 274 | :return: loaded model 275 | """ 276 | # Get the initial network 277 | dict_init = torch.load(filepath) 278 | keys = [k for k, v in dict_init.items()] 279 | keys.sort() 280 | # Generate a new network 281 | net = models[model](pretrained=False, num_classes=20) 282 | model_dict = net.state_dict() 283 | # load the layers 284 | to_load = [] 285 | for k in keys: 286 | if k not in model_dict: 287 | continue 288 | if load_fc is not None or 'fc' not in k: 289 | to_load.append(k) 290 | # load the dict 291 | dict_init = {k: v for k, v in dict_init.items() if k in to_load and k in model_dict} 292 | model_dict.update(dict_init) 293 | net.load_state_dict(model_dict) 294 | 295 | return net 296 | 297 | 298 | class Logger: 299 | def __init__(self, path): 300 | self.path = path 301 | 302 | def scalar_summary(self, name, value, steps): 303 | self.__dict__[name] = (steps, value) 304 | 305 | 306 | class MyDataLoader(data.Dataset): 307 | def __init__(self, transform, trainval='train', data_path='../dataset', random_crops=0): 308 | """ 309 | Initialize the dataset. Inherited from torch.data.Dataset, __len__ and __getitem__ need to be implemented. 310 | VOC(Labels only) tree: 311 | --dataset root 312 | |--train 313 | | |--JPEGImages(dir) 314 | | |--annotations.txt 315 | | 316 | |--test 317 | |--JPEGImages(dir) 318 | |--annotations.txt 319 | :param transform: the transformation 320 | :param data_path: the root of the datapath 321 | :param random_crops: 322 | """ 323 | self.data_path = data_path 324 | self.transform = transform 325 | self.random_crops = random_crops 326 | self.train_or_test = trainval 327 | 328 | self.__init_classes() 329 | self.names, self.labels = self.__dataset_info() 330 | 331 | def __getitem__(self, index): 332 | """ 333 | This is the getitem func which enables enumerator. Implemented. 334 | :param index: the index of the picture 335 | :return: tuple (picture, its label(s)) 336 | """ 337 | x = imread(os.path.join(self.data_path, self.train_or_test, 'JPEGImages', self.names[index] + '.jpg'), 338 | mode='RGB') 339 | x = Image.fromarray(x) 340 | 341 | # Resize directly 342 | x = x.resize((224, 224), Image.BILINEAR) 343 | 344 | if self.random_crops == 0: 345 | x = self.transform(x) 346 | else: 347 | crops = [] 348 | for i in range(self.random_crops): 349 | crops.append(self.transform(x)) 350 | x = torch.stack(crops) 351 | 352 | y = self.labels[index] 353 | return x, y 354 | 355 | def __len__(self): 356 | """ 357 | How many images are there. Implemented. 358 | :return: length 359 | """ 360 | return len(self.names) 361 | 362 | def __dataset_info(self): 363 | """ 364 | Generate names(np.array, with string elements) and labels(np.array, with array(number) elements). 365 | The labels appears like this: [0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1 0 0 0 0] 366 | Those with value 1 means the object exists in this image 367 | :return: names labels 368 | """ 369 | annotation_file = os.path.join(self.data_path, self.train_or_test, 'annotations.txt') 370 | with open(annotation_file, 'r') as fp: 371 | lines = fp.readlines() 372 | 373 | names = [] 374 | labels = [] 375 | for line in lines: 376 | # Name 377 | names.append(line.strip('\n').split(' ')[0]) 378 | 379 | # Label 380 | str_label = line.strip('\n').split(' ')[1:] 381 | num_label = [int(x) for x in str_label] 382 | flag_label = np.zeros(self.num_classes) 383 | flag_label[num_label] = 1 384 | 385 | labels.append(np.array(flag_label)) 386 | 387 | return np.array(names), np.array(labels).astype(np.float32) 388 | 389 | def __init_classes(self): 390 | self.classes = ('person', 'bird', 'cat', 'cow', 391 | 'dog', 'horse', 'sheep', 'aeroplane', 'bicycle', 392 | 'boat', 'bus', 'car', 'motorbike', 393 | 'train', 'bottle', 'chair', 394 | 'diningtable', 'pottedplant', 'sofa', 'tvmonitor') 395 | self.num_classes = len(self.classes) 396 | self.class_to_ind = dict(zip(self.classes, range(self.num_classes))) 397 | --------------------------------------------------------------------------------