├── README.md
├── __pycache__
    ├── dataset.cpython-36.pyc
    ├── dataset.cpython-37.pyc
    ├── draw.cpython-37.pyc
    ├── model.cpython-36.pyc
    ├── model.cpython-37.pyc
    ├── model_parallel.cpython-37.pyc
    ├── train.cpython-36.pyc
    ├── train.cpython-37.pyc
    ├── utils.cpython-36.pyc
    └── utils.cpython-37.pyc
├── dataset.py
├── det_2008_000008_ep=100.png
├── det_2008_000008_ep=150.png
├── det_2008_000008_ep=25.png
├── det_2008_000008_ep=50.png
├── draw.py
├── loss_history_lr=1e-05_ep=150_wo.png
├── mAP_history_lr=1e-05_ep=150_wo.png
├── main_model_parallel.py
├── main_test_voc.py
├── main_train_voc.py
├── main_voc.py
├── model.py
├── model_parallel.py
├── pallete
├── readme-pic
├── train.py
├── utils.py
└── voc_label.py


/README.md:
--------------------------------------------------------------------------------
  1 | # README
  2 | This is my PyTorch implementation of 
  3 | [YOLO v1](https://pjreddie.com/media/files/papers/yolo.pdf) from scratch, which includes scripts for **train/val** and **test**. 
  4 | 
  5 | It not only helps me gain learning experience of using PyTorch, but also serves as a framework for
  6 |  One-Stage Detector facilitates future development.
  7 |  Please See [Descriptions](#description). 
  8 | 
  9 | 
 10 | This implementation pass [sanity check](#Sanity-Check). 
 11 | 
 12 | ## Requirements
 13 | **Packages**
 14 | - Python 3.7
 15 | 
 16 | - CUDA 10.0
 17 | 
 18 | - PyTorch 1.1
 19 | 
 20 | - Numpy >= 1.15
 21 | 
 22 | - Scikit-image >= 0.14
 23 | 
 24 | - Matplotlib >= 2.2.3
 25 | 
 26 | **Hardware**
 27 | - **2 GPUs** each with at least **11 GB** RAM.
 28 | 
 29 | ## Descriptions
 30 | 
 31 | **Modules**
 32 | 
 33 | - `utils.py` -- data format transformation and performance evaluation
 34 | 
 35 | - `draw.py` -- output visualization
 36 | 
 37 | - `dataset.py` -- dataset and dataloader
 38 | 
 39 | - `model.py` -- define network architecture 
 40 | 
 41 | - `model_parallel.py` -- build model in parallel 
 42 | (**placing 2 different sub-networks of the model onto 2 GPUs**)
 43 | 
 44 | - `train.py` -- calculate loss
 45 | 
 46 | **Scripts**
 47 | 
 48 | - `train_model_parallel.py` -- **train** model on VOC  
 49 | 
 50 | - `test_voc.py` -- **test** model on VOC
 51 | 
 52 | 
 53 | 
 54 | 
 55 | 
 56 | ## Usage
 57 | **Warning:** Since Pytorch does not come with the `same padding` option, minor modification is required:
 58 | 
 59 | 
 60 | ### Step 1: Modify `conv` module
 61 | 
 62 | Go to PyTorch site package folder (e.g.
 63 | `/venv/lib/python3.7/site-packages/torch/nn/modules/conv.py`).
 64 | 
 65 | 
 66 | <!--**Step 2: Add custom function**-->
 67 | 
 68 | Define `conv2d_same_padding` as follows.
 69 |     
 70 |     def conv2d_same_padding(input, weight, bias=None, stride=1, padding=1, dilation=1, groups=1):
 71 | 
 72 |         input_rows = input.size(2)
 73 |         filter_rows = weight.size(2)
 74 |         effective_filter_size_rows = (filter_rows - 1) * dilation[0] + 1
 75 |         out_rows = (input_rows + stride[0] - 1) // stride[0]
 76 |         padding_needed = max(0, (out_rows - 1) * stride[0] + effective_filter_size_rows -
 77 |                   input_rows)
 78 |         padding_rows = max(0, (out_rows - 1) * stride[0] +
 79 |                         (filter_rows - 1) * dilation[0] + 1 - input_rows)
 80 |         rows_odd = (padding_rows % 2 != 0)
 81 |         padding_cols = max(0, (out_rows - 1) * stride[0] +
 82 |                         (filter_rows - 1) * dilation[0] + 1 - input_rows)
 83 |         cols_odd = (padding_rows % 2 != 0)
 84 | 
 85 |         if rows_odd or cols_odd:
 86 |             input = F.pad(input, [0, int(cols_odd), 0, int(rows_odd)])
 87 | 
 88 |         return F.conv2d(input, weight, bias, stride,
 89 |                   padding=(padding_rows // 2, padding_cols // 2),
 90 |                   dilation=dilation, groups=groups)
 91 | 
 92 | <!--**Step 3: Modify `forward( )`**-->
 93 | 
 94 | Modify `forward` function in `class Conv2d( _ConvNd)` by replacing `F.conv2d` with `conv2d_same_padding`.
 95 | 
 96 |     class Conv2d( _ConvNd):
 97 | 
 98 |         @weak_script_method
 99 |         def forward(self, input):
100 |             #return F.conv2d(input, self.weight, self.bias, self.stride,
101 |             #                        self.padding, self.dilation, self.groups)
102 |             return conv2d_same_padding(input, self.weight, self.bias, self.stride,
103 |                         self.padding, self.dilation, self.groups) ## same padding like TensorFlow    
104 | 
105 | 
106 | ### Step 2: Download data
107 | Please follow instructions **Get The Pascal VOC Data** and **Generate Label for VOC** at
108 |  https://pjreddie.com/darknet/yolo/.
109 |  
110 | **Warning**: Make sure you see these in the dataset directory (e.g. folder `VOC_yolo_format`):
111 | 
112 |     2007_test.txt   VOCdevkit
113 |     2007_train.txt  voc_label.py
114 |     2007_val.txt    VOCtest_06-Nov-2007.tar
115 |     2012_train.txt  VOCtrainval_06-Nov-2007.tar
116 |     2012_val.txt    VOCtrainval_11-May-2012.tar
117 | 
118 | 
119 | ### Step 3: Train model
120 | 
121 | **Default settings**
122 | 
123 |     batch_size = 32
124 |     use_float64 = False
125 |     use_scheduler = True
126 |     use_bn = True
127 |     learning_rate = 1e-5
128 |     model_weights = None
129 |     phases = ['train', 'val']
130 | 
131 | *Note*: For sanity check, set `use_bn = False` and `phase = ['train']` instead.
132 | 
133 | **How to run?**
134 | 
135 | `$ python train_model_parallel.py -n [num_epoch] -t [train_txt]`
136 | ## Outputs
137 | `Training log`, `plots`, `checkpoints` and **best** `weights` will be automatically saved in these folders.
138 | 
139 |                 ./log
140 |                 ./plot
141 |                 ./checkpoints
142 |                 ./weights
143 | 
144 | 
145 | 
146 | ## Experiments
147 | ### Sanity Check
148 | Overfit the model with two samples. 
149 | 
150 | Set regularization to zero by `use_bn = False`, and use **train** mode ONLY via `phase = ['train']`.
151 | 
152 | **Default settings**
153 |     
154 |     num_epoch = 150
155 |     use_float64 = False
156 |     use_scheduler = False
157 |     use_bn = False
158 |     phase = ['train']
159 |     learning_rate = 1e-5
160 |     model_weights = None   
161 | 
162 | #### Detections
163 | 
164 | The following shows the detection output of epoch `25`, `50`, `100` and `150` respectively. 
165 | Model converges after `100` epochs with `loss` close to `0` and `mAP` equals `1.0`.
166 | 
167 | Notes:
168 | 
169 | * By VOC convention, `IOU >=0.5` is considered as True Positive.
170 | 
171 | * Bounding boxes with `confidence_score <= 0.1` will be filtered out.
172 | To customize, go to `utils.py`, and declare your preferred value for `conf_threshold` in `prediction2detection()`.
173 | 
174 | ![image 1](./det_2008_000008_ep=25.png) *epoch = 25*
175 | ![image 2](./det_2008_000008_ep=50.png) *epoch = 50*
176 | 
177 | ![image 3](./det_2008_000008_ep=100.png) *epoch = 100*
178 | ![image 4](./det_2008_000008_ep=150.png) *epoch = 150*
179 | ![image gt](./det_2008_000008_gt.png) *Ground Truth*
180 | 
181 | #### Loss
182 | 
183 | ![image loss](./loss_history_lr=1e-05_ep=150_wo.png) 
184 | 
185 | #### mAP
186 | 
187 | ![image mAP](./mAP_history_lr=1e-05_ep=150_wo.png) 
188 | 
189 | 
190 | 
191 | ## Reference
192 | [1] You Only Look Once: Unified, Real-Time Object Detection. https://pjreddie.com/media/files/papers/yolo.pdf
193 | 
194 | 
195 | 
196 | 
197 | 


--------------------------------------------------------------------------------
/__pycache__/dataset.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzzheng/pytorch-yolo-v1/524dd63b8d3029c1815b89de0b32014c8b0d0def/__pycache__/dataset.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/dataset.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzzheng/pytorch-yolo-v1/524dd63b8d3029c1815b89de0b32014c8b0d0def/__pycache__/dataset.cpython-37.pyc


--------------------------------------------------------------------------------
/__pycache__/draw.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzzheng/pytorch-yolo-v1/524dd63b8d3029c1815b89de0b32014c8b0d0def/__pycache__/draw.cpython-37.pyc


--------------------------------------------------------------------------------
/__pycache__/model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzzheng/pytorch-yolo-v1/524dd63b8d3029c1815b89de0b32014c8b0d0def/__pycache__/model.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzzheng/pytorch-yolo-v1/524dd63b8d3029c1815b89de0b32014c8b0d0def/__pycache__/model.cpython-37.pyc


--------------------------------------------------------------------------------
/__pycache__/model_parallel.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzzheng/pytorch-yolo-v1/524dd63b8d3029c1815b89de0b32014c8b0d0def/__pycache__/model_parallel.cpython-37.pyc


--------------------------------------------------------------------------------
/__pycache__/train.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzzheng/pytorch-yolo-v1/524dd63b8d3029c1815b89de0b32014c8b0d0def/__pycache__/train.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/train.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzzheng/pytorch-yolo-v1/524dd63b8d3029c1815b89de0b32014c8b0d0def/__pycache__/train.cpython-37.pyc


--------------------------------------------------------------------------------
/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzzheng/pytorch-yolo-v1/524dd63b8d3029c1815b89de0b32014c8b0d0def/__pycache__/utils.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzzheng/pytorch-yolo-v1/524dd63b8d3029c1815b89de0b32014c8b0d0def/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/dataset.py:
--------------------------------------------------------------------------------
  1 | from torch.utils.data import Dataset, DataLoader
  2 | from skimage import io
  3 | from skimage.transform import rescale, resize, downscale_local_mean
  4 | import matplotlib.pyplot as plt
  5 | from train import *
  6 | from torchvision import transforms
  7 | from torchvision.transforms import Normalize
  8 | 
  9 | 
 10 | data_transform = transforms.Compose([
 11 |     # transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) # for imageNet
 12 |     transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))  # good
 13 | ])
 14 | 
 15 | class VOC(Dataset):
 16 |     """
 17 |         Pascal VOC dataset.
 18 |     Note:
 19 |         using YOLO label format
 20 |         https://github.com/pjreddie/darknet
 21 |     Example:
 22 |         voc2012 = VOC('2012_train_short.txt', 448, 448)
 23 |         dataloader = DataLoader(voc2012, batch_size=4)
 24 |         I = voc2012[0][0]
 25 |         I = I.permute(1, 2, 0)
 26 |         plt.imshow(I)
 27 |         plt.show()
 28 |     """
 29 | 
 30 |     def __init__(self, txt_file, img_width=None, img_height=None, transform=None):
 31 |         """
 32 | 
 33 |         :param txt_file: all image directories
 34 |         """
 35 |         with open(txt_file, 'r') as f:
 36 |             lines = f.readlines()
 37 |         self.image_list = [i.rstrip('\n') for i in lines]
 38 |         self.label_list = [str.replace('JPEGImages', 'labels').replace('.jpg', '.txt')
 39 |                            for str in self.image_list]
 40 | 
 41 |         self.img_width = img_width
 42 |         self.img_height = img_height
 43 |         self.transform = transform
 44 | 
 45 |     def __len__(self):
 46 |         return len(self.image_list)
 47 | 
 48 |     def __getitem__(self, idx):
 49 |         # get image
 50 |         image = io.imread(self.image_list[idx])
 51 | 
 52 |         if self.img_width and self.img_height:
 53 |             image = resize(image, (self.img_width, self.img_height))
 54 |             image = torch.Tensor(image).permute(2, 0, 1)  # pytorch format: C W H
 55 | 
 56 |         if self.transform:
 57 |             image = self.transform(image)
 58 | 
 59 |         # get label
 60 |         label = read_labels(self.label_list[idx])
 61 |         # convert to S*S*5 Tensor with format <x> <y> <w> <h> <cls>
 62 |         label = labels2tensor(label)
 63 | 
 64 |         # get filename
 65 |         filename = self.image_list[idx].split('/')[-1]
 66 | 
 67 |         return image, label, filename
 68 | 
 69 | 
 70 | if __name__ == "__main__":
 71 | 
 72 | 
 73 | 
 74 |     """
 75 |     # Train on VOC
 76 |     """
 77 |     voc2012 = VOC('/home/bizon/Dataset/VOC_yolo_format/2012_train_short.txt', IMG_WIDTH, IMG_HEIGHT, data_transform)
 78 |     dataloader = DataLoader(voc2012, batch_size=4)
 79 | 
 80 |     # Model
 81 |     yolo_model = build_darknet()
 82 |     yolo_model.train()
 83 | 
 84 |     # Optimize
 85 |     learning_rate = 1e-4
 86 |     optimizer = torch.optim.Adam(yolo_model.parameters(), lr=learning_rate)
 87 | 
 88 |     num_epoch = 1
 89 |     y_out_epoch = torch.Tensor()  # record all output in a single epoch
 90 |     img_name_epoch = []
 91 |     for epoch in range(num_epoch):
 92 |         for i, (image_batch, label_batch, img_name_batch) in enumerate(dataloader):
 93 | 
 94 |             print('batch = ', i)
 95 |             print('image  = ', image_batch.size())
 96 |             print('label =', label_batch.size())
 97 | 
 98 |             # foward pass
 99 |             y_out = yolo_model(image_batch)
100 |             y_out_epoch = torch.cat((y_out_epoch, y_out), 0)
101 |             img_name_epoch += img_name_batch
102 |             # compute loss
103 |             loss = calc_loss(y_out.clone(), label_batch.clone())
104 |             optimizer.zero_grad()
105 |             loss.backward()
106 |             optimizer.step()
107 |             print('\nEpoch = ', epoch, 'Batch = ', i, 'Loss = ', loss.item())
108 | 
109 |         # evaluation
110 |         det = prediction2detection(y_out_epoch, img_name_epoch)
111 |         ground_truth = ground_truth_detection(voc2012.label_list)
112 |         res = evaluate_IOU(det, ground_truth)
113 |         res_tp_fp = evaluate_TP_FP(res, 0.5)
114 |         results, acc_tps, acc_fps = evaluate_precision_recall(res_tp_fp, 0.5, ground_truth)
115 | 
116 |         print('Epoch {} done.'.format(epoch))
117 |         print('Acc TP for all classes = {} \n, Acc FP for all classes = {}\n'.format(acc_tps, acc_fps))
118 |     print('Done.')
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | 


--------------------------------------------------------------------------------
/det_2008_000008_ep=100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzzheng/pytorch-yolo-v1/524dd63b8d3029c1815b89de0b32014c8b0d0def/det_2008_000008_ep=100.png


--------------------------------------------------------------------------------
/det_2008_000008_ep=150.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzzheng/pytorch-yolo-v1/524dd63b8d3029c1815b89de0b32014c8b0d0def/det_2008_000008_ep=150.png


--------------------------------------------------------------------------------
/det_2008_000008_ep=25.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzzheng/pytorch-yolo-v1/524dd63b8d3029c1815b89de0b32014c8b0d0def/det_2008_000008_ep=25.png


--------------------------------------------------------------------------------
/det_2008_000008_ep=50.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzzheng/pytorch-yolo-v1/524dd63b8d3029c1815b89de0b32014c8b0d0def/det_2008_000008_ep=50.png


--------------------------------------------------------------------------------
/draw.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import cv2
  4 | import pickle as pkl
  5 | import random
  6 | from utils import *
  7 | 
  8 | classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat",
  9 |            "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
 10 | 
 11 | 
 12 | def draw_single(img_name, label_dir='./', out_dir='./', show_flag=False):
 13 |     """
 14 |     Draw bounding boxes of a SINGLE image.
 15 |     Automatically find labels based on image names.
 16 | 
 17 |     Note: Labels share the same name as images, using YOLO format.
 18 |           e.g. Image = 000001.jpg
 19 |                Label = 000001.txt
 20 |                         format = <class> <x> <y> <w> <h>
 21 |                         11 0.344192634561 0.611 0.416430594901 0.262
 22 |                         14 0.509915014164 0.51 0.974504249292 0.972
 23 | 
 24 | 
 25 |     :param img_name:    single image name / path + name
 26 |     :param label_dir:   the corresponding label directory
 27 |     :param out_dir:     declare output directory, which will be created if not exist.
 28 |     :param show_flag:   display if True.
 29 |     :return:
 30 |     """
 31 |     # Read image
 32 |     file_name = img_name.split('/')[-1].split('.')[0]
 33 | 
 34 |     img = cv2.imread(img_name)
 35 |     height, width = img.shape[:2]
 36 | 
 37 |     # Read label
 38 |     labels = read_labels(os.path.join(label_dir, file_name + '.txt'))
 39 | 
 40 |     # Color
 41 |     colors = pkl.load(open('pallete', 'rb'))
 42 |     font = cv2.FONT_HERSHEY_SIMPLEX
 43 |     m = 10
 44 | 
 45 |     # Draw box + class
 46 |     for l in labels:
 47 |         cls = classes[int(l[0])]
 48 |         upper_left_x = int((l[1] - l[3] / 2) * width)
 49 |         upper_left_y = int((l[2] - l[4] / 2) * height)
 50 |         bottom_right_x = int((l[1] + l[3] / 2) * width)
 51 |         bottom_right_y = int((l[2] + l[4] / 2) * height)
 52 | 
 53 |         color = random.choice(colors)
 54 |         cv2.rectangle(img, (upper_left_x, upper_left_y), (bottom_right_x, bottom_right_y), color, 3)
 55 | 
 56 |         if len(l) > 5:
 57 |             # has confidence score
 58 |             cv2.putText(img, cls + ' ' + str(l[5]), (upper_left_x - m, upper_left_y - m), font, 0.8, color, 2)
 59 |         else:
 60 |             # no confidence score
 61 |             cv2.putText(img, cls, (upper_left_x - m, upper_left_y - m), font, 0.8, color, 2)
 62 | 
 63 |     cv2.imwrite(os.path.join(out_dir, 'det_' + file_name + '.png'), img)
 64 | 
 65 |     if show_flag:
 66 |         cv2.imshow(file_name, img)
 67 |         cv2.waitKey(0)
 68 |         cv2.destroyAllWindows()
 69 | 
 70 | 
 71 | def draw(img_dir, label_dir, out_dir, show_flag=False):
 72 |     """
 73 |     Draw bounding boxes of MULTIPLE images.
 74 | 
 75 |         Note: Labels share the same name as images, using YOLO format.
 76 |           e.g. Image = 000001.jpg
 77 |                Label = 000001.txt
 78 |                         format = <class> <x> <y> <w> <h>
 79 |                         11 0.344192634561 0.611 0.416430594901 0.262
 80 |                         14 0.509915014164 0.51 0.974504249292 0.972
 81 | 
 82 |     :param img_dir:     directory of images OR 
 83 |                         list of image names
 84 |     :param label_dir:   directory of labels
 85 |     :param out_dir:     declare output directory, which will be created if not exist.
 86 |     :param show_flag:   display if True.
 87 |     :return:
 88 |     """
 89 |     if not os.path.exists(out_dir):
 90 |         os.makedirs(out_dir)
 91 |         print('"{}" is created.'.format(out_dir))
 92 |     else:
 93 |         print('"{}" exists.'.format(out_dir))
 94 | 
 95 | 
 96 |     # Image sources    
 97 |     if isinstance(img_dir, list):  # from list of image names
 98 |         img_list = img_dir
 99 |     else:                          # from directory of images
100 |         img_list = os.listdir(img_dir)
101 |         img_list = [os.path.join(img_dir, elem) for elem in img_list]
102 | 
103 |     for img_name in img_list:
104 |         draw_single(img_name, label_dir, out_dir, show_flag)  # core
105 | 
106 | 
107 | def visualize(y_out_epoch, img_name_epoch, image_list, out_dir, conf_threshold=0.1):
108 |     """
109 |     Visualize bbox a batch/epoch of images
110 |     :param y_out_epoch:         N * S * S * (B * 5+C) Tensor
111 |     :param img_name_epoch:      list of image name
112 |     :param image_list:          list of path + image_name
113 |     :param out_dir:             output to be stored here
114 |     :param conf_threshold:      filter out bbox with small confidence
115 |     :return:
116 |     """
117 |     assert y_out_epoch.size(0) == len(img_name_epoch)
118 | 
119 |     # convert to image coordinate [0,1]
120 |     # #### Do ONLY once !!!
121 |     Tensors = [convert_coord_cell2img(y_out_epoch[i]) for i in range(y_out_epoch.size(0))]
122 | 
123 |     # loop over each image
124 |     for k in range(y_out_epoch.size(0)):
125 |         T = y_out_epoch[k]
126 |         img_name = img_name_epoch[k]
127 |         res = []  # results to be write to .txt
128 | 
129 |         # loop over each grid cell
130 |         for i in range(S):
131 |             for j in range(S):
132 |                 _, cls = torch.max(T[i, j, :][-C:], 0)
133 | 
134 |                 best_conf = 0
135 |                 for b in range(B):
136 |                     bbox = [cls.item()]
137 |                     bbox = bbox + T[i, j, 5*b: 5*b+5].tolist()
138 | 
139 |                     if b == 0:
140 |                         best_bbox = bbox
141 | 
142 |                     # for each grid cell, select the box with highest confidence score
143 |                     if T[i, j, 5*b+4] > best_conf:
144 |                         best_bbox = bbox
145 | 
146 |                 # filter out bbox with small confidence
147 |                 if best_bbox[-1] > conf_threshold:
148 |                     res.append(best_bbox)
149 | 
150 |         # write to file
151 |         with open(os.path.join(out_dir, img_name.split('.')[0] + '.txt'), 'w') as f:
152 |             for r in res:
153 |                 for index in range(len(r)):
154 |                     if index == 0:
155 |                         f.write("%d " % r[index])
156 |                     else:
157 |                         f.write("%.4f " % r[index])
158 |                 f.write("\n")
159 | 
160 |     # draw box
161 |     draw(image_list, out_dir, out_dir)
162 | 
163 | 
164 | if __name__ == "__main__":
165 |     # #  Single
166 |     # img_name = '000001.jpg'
167 |     img_name = '2008_000008.jpg'
168 |     draw_single(img_name, show_flag=True)  # automatically find label based on image name
169 | 
170 |     # # Multiple
171 | 
172 |     img_dir = '/Users/erica/Workspace/my-yolo-implementation/data/image'
173 |     label_dir = '/Users/erica/Workspace/my-yolo-implementation/data/label'
174 |     out_dir = '/Users/erica/Workspace/my-yolo-implementation/det'
175 |     
176 |     draw(img_dir, label_dir, out_dir, show_flag=True)
177 | 
178 | 
179 |     print('Done.')


--------------------------------------------------------------------------------
/loss_history_lr=1e-05_ep=150_wo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzzheng/pytorch-yolo-v1/524dd63b8d3029c1815b89de0b32014c8b0d0def/loss_history_lr=1e-05_ep=150_wo.png


--------------------------------------------------------------------------------
/mAP_history_lr=1e-05_ep=150_wo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzzheng/pytorch-yolo-v1/524dd63b8d3029c1815b89de0b32014c8b0d0def/mAP_history_lr=1e-05_ep=150_wo.png


--------------------------------------------------------------------------------
/main_model_parallel.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | This script implement Model Parallel.
  3 | 
  4 | Output to folders:
  5 |                 ./log
  6 |                 ./plot
  7 |                 ./checkpoints
  8 |                 ./weights
  9 | '''
 10 | 
 11 | import time
 12 | import copy
 13 | import os.path
 14 | import matplotlib.pyplot as plt
 15 | from dataset import *
 16 | from model_parallel import *
 17 | from draw import *
 18 | from torch.optim.lr_scheduler import ReduceLROnPlateau
 19 | from torch.optim.lr_scheduler import StepLR
 20 | 
 21 | 
 22 | # def train(num_epoch, dataloader, model, optimizer, learning_rate=1e-4, scheduler=None,
 23 | #           phases=['train'], use_float64=True, checkpoint_interval=10):
 24 | def train(num_epoch, train_txt, val_txt, model, optimizer, learning_rate=1e-4, scheduler=None,
 25 |           phases=['train'], use_float64=True, use_visualization=False, checkpoint_interval=10):
 26 |     # suffix
 27 |     if scheduler is not None:
 28 |         suffix = '_lr={}_ep={}_w'.format(learning_rate, num_epoch)
 29 |     else:
 30 |         suffix = '_lr={}_ep={}_wo'.format(learning_rate, num_epoch)
 31 | 
 32 |     # Outputs
 33 |     fd_log = './log'
 34 |     if not os.path.exists(fd_log):
 35 |         os.mkdir(fd_log)
 36 |         print('{} not exists, it has been created.'.format(fd_log))
 37 | 
 38 |     log_file = 'log' + suffix + '.txt'
 39 |     if os.path.exists(log_file):
 40 |         os.remove(log_file)
 41 |         print('"{}" exists, it has been removed.'.format(log_file))
 42 | 
 43 |     fd_plot = './plot'
 44 |     if not os.path.exists(fd_plot):
 45 |         os.mkdir(fd_plot)
 46 |         print('{} not exists, it has been created.'.format(fd_plot))
 47 | 
 48 |     fd_checkpoints = './checkpoints'
 49 |     if not os.path.exists(fd_checkpoints):
 50 |         os.mkdir(fd_checkpoints)
 51 |         print('{} not exists, it has been created.'.format(fd_checkpoints))
 52 | 
 53 |     fd_weights = './weights'
 54 |     if not os.path.exists(fd_weights):
 55 |         os.mkdir(fd_weights)
 56 |         print('{} not exists, it has been created.'.format(fd_weights))
 57 | 
 58 | 
 59 |     # Dataset & Dataloader
 60 |     voc2012_train = VOC(train_txt, IMG_WIDTH, IMG_HEIGHT, data_transform)
 61 |     voc2012_val = VOC(val_txt, IMG_WIDTH, IMG_HEIGHT, data_transform)
 62 | 
 63 |     dataloader = dict()
 64 |     dataloader['train'] = DataLoader(voc2012_train, batch_size=batch_size, shuffle=True)
 65 |     dataloader['val'] = DataLoader(voc2012_val, batch_size=batch_size, shuffle=True)
 66 | 
 67 |     # Train
 68 |     epoch_loss_hist = dict()
 69 |     epoch_mAP_hist = dict()
 70 |     for elem in phases:
 71 |         epoch_loss_hist[elem] = []
 72 |         epoch_mAP_hist[elem] = []
 73 | 
 74 |     best_mAP = -1000.0
 75 |     min_loss = 1e10
 76 | 
 77 |     for epoch in range(num_epoch):
 78 |         since = time.time()
 79 |         print('\n-----------------------------------------')
 80 |         print('Epoch {}/{}'.format(epoch, num_epoch - 1))
 81 |         y_out_epoch = torch.Tensor().to("cuda:1")  # record all output in a single epoch
 82 |         if use_float64:
 83 |             y_out_epoch = y_out_epoch.double()
 84 |         img_name_epoch = []
 85 | 
 86 |         for phase in phases:
 87 |             if phase == 'train':
 88 |                 model.train()
 89 |             else:
 90 |                 model.eval()
 91 | 
 92 |             running_loss = 0
 93 | 
 94 |             # Step 1: forward for all, and backward if in train phase
 95 |             for i, (image_batch, label_batch, img_name_batch) in enumerate(dataloader[phase]):
 96 | 
 97 |                 image_batch = image_batch.to('cuda:0')
 98 |                 label_batch = label_batch.to('cuda:1')
 99 | 
100 |                 if use_float64:
101 |                     image_batch = image_batch.double()
102 |                     label_batch = label_batch.double()
103 | 
104 |                 # zero the parameter gradients
105 |                 optimizer.zero_grad()
106 | 
107 |                 with torch.set_grad_enabled(phase == 'train'):
108 |                     # 1.1 forward
109 |                     y_out = model(image_batch)
110 |                     print('y_out mean = ', torch.mean(y_out))
111 |                     print('y_out std = ', torch.std(y_out))
112 | 
113 |                     y_out_epoch = torch.cat((y_out_epoch, y_out), 0)
114 |                     img_name_epoch += img_name_batch
115 | 
116 |                     # loss
117 |                     loss = calc_loss(y_out.clone(), label_batch.clone(), device, use_float64)
118 |                     running_loss += loss.item() * image_batch.size(0)
119 | 
120 |                     if scheduler is not None:
121 |                         scheduler.step(loss)  # lr scheduler
122 | 
123 |                     # 1.2 backward
124 |                     if phase == 'train':
125 |                         loss.backward()
126 |                         torch.nn.utils.clip_grad_value_(model.parameters(), clip_value=1)  # gradient clip
127 |                         optimizer.step()
128 | 
129 |                 print('{}\tEpoch = {}\tBatch = {}\tLoss = {:.4f}'.format(phase, epoch, i, loss.item()))
130 | 
131 |             # Step 2: evaluation
132 |             iou_threshold = 0.5
133 |             det = prediction2detection(y_out_epoch, img_name_epoch)
134 | 
135 |             if phase == 'train':
136 |                 ground_truth = ground_truth_detection(voc2012_train.label_list)
137 |             else:
138 |                 ground_truth = ground_truth_detection(voc2012_val.label_list)
139 | 
140 |             res = evaluate_IOU(det, ground_truth, device, use_float64=use_float64)
141 |             res_tp_fp = evaluate_TP_FP(res, iou_threshold)
142 | 
143 |             results, acc_tps, acc_fps, precisions, recalls = evaluate_precision_recall(res_tp_fp, iou_threshold, ground_truth)
144 |             mAP = calc_mean_average_precision(precisions, recalls)
145 | 
146 |             epoch_loss = running_loss / len(dataloader[phase].dataset)
147 |             epoch_loss_hist[phase].append(epoch_loss)
148 |             epoch_mAP_hist[phase].append(mAP)
149 | 
150 |             time_elapsed = time.time() - since
151 | 
152 |             # Step 3: log, save weights
153 |             # log
154 |             print('{}\tLoss = {:.4f}\tmAP = {:.4f}\ttime_elapsed = {:.2f} s\n'.format(phase, epoch_loss, mAP, time_elapsed))
155 |             f = open(os.path.join(fd_log, log_file), 'a')
156 |             f.write('{}\tEpoch = {}\tLoss = {:.4f}\tmAP = {:.4f}\ttime_elapsed = {:.2f} s\n'.format(
157 |                 phase, epoch, epoch_loss, mAP, time_elapsed))
158 |             f.close()
159 | 
160 |             # save weights
161 |             # - min loss
162 |             if phase == 'val' and epoch_loss < min_loss:
163 |                 torch.save(copy.deepcopy(model.state_dict()), os.path.join(fd_weights, 'min_loss_weights' + suffix + '.pth'))
164 |                 torch.save(copy.deepcopy(model.state_dict()), os.path.join(fd_weights, 'min_loss_weights.pth'))
165 |                 min_loss = epoch_loss
166 |                 print('[val] A smaller loss is found. \nModel saved.')
167 | 
168 |             # - best mAP
169 |             # if mAP > best_mAP:
170 |             if phase == 'val' and mAP > best_mAP:
171 |                 best_mAP = mAP
172 |                 torch.save(copy.deepcopy(model.state_dict()),
173 |                            os.path.join(fd_weights, 'best_model_weights' + suffix + '.pth'))
174 |                 print('[val] Best model is saved.')
175 | 
176 |             # - checkpoint
177 |             if (epoch + 1) % checkpoint_interval == 0:
178 |                 torch.save(copy.deepcopy(model.state_dict()),
179 |                            os.path.join(fd_checkpoints, 'checkpoint_weights_ep{}.pth'.format(epoch+1)))
180 |                 print('Checkpoint is saved at {}.'.format(fd_checkpoints))
181 | 
182 |             # Step 4: visualization
183 |             if use_visualization:
184 |                 visualize(y_out_epoch, img_name_epoch, voc2012_train.image_list, fd_plot)
185 |                 print('Visualization completed.')
186 |             
187 |         # plot history
188 |         plt.figure(1)
189 |         for p in phases:
190 |             color = 'r' if p == 'train' else 'm'
191 |             plt.plot(range(epoch+1), epoch_loss_hist[p], color)
192 |         plt.title('Loss         lr = {}'.format(learning_rate))
193 |         plt.xlabel('Epoch')
194 |         plt.ylabel('Loss')
195 |         plt.legend(phases)
196 |         plt.savefig(os.path.join(fd_plot, 'loss_history' + suffix + '.png'))
197 | 
198 |         plt.figure(2)
199 |         for p in phases:
200 |             color = 'b' if p == 'train' else 'g'
201 |             plt.plot(range(epoch+1), epoch_mAP_hist[p], color)
202 |         plt.title('mAP         lr = {}'.format(learning_rate))
203 |         plt.xlabel('Epoch')
204 |         plt.ylabel('mAP')
205 |         plt.legend(phases)
206 |         plt.savefig(os.path.join(fd_plot, 'mAP_history' + suffix + '.png'))    
207 |     return min_loss, best_mAP
208 | 
209 | 
210 | if __name__ == "__main__":
211 |     # Memory / Storage
212 |     os.system('rm -rf checkpoints/ log/ weights/ plot/')
213 |     torch.cuda.empty_cache()
214 | 
215 |     # Device
216 |     device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
217 | 
218 |     # Parameter
219 |     num_epoch = 150
220 |     batch_size = 32
221 |     use_float64 = False
222 |     use_scheduler = False
223 |     use_bn = False
224 |     learning_rate = 1e-5
225 | 
226 |     # Weights
227 |     # model_weights = './results/1e-5_ep=1-10/checkpoint_weights_ep10.pth'
228 |     model_weights = None
229 | 
230 |     # Dataset
231 |     # phases = ['train', 'val']
232 |     phases = ['train']
233 |     # train_txt = '/home/bizon/Dataset/VOC_yolo_format/2007_train.txt'
234 |     # val_txt = '/home/bizon/Dataset/VOC_yolo_format/2007_val.txt'
235 |     train_txt = '/home/bizon/Dataset/VOC_yolo_format/2012_sanity.txt'
236 |     val_txt = '/home/bizon/Dataset/VOC_yolo_format/2012_sanity.txt'
237 | 
238 |     # Model
239 |     yolo_model = build_darknet_parallel(path=model_weights, use_bn=use_bn)
240 |     if use_float64:
241 |         yolo_model = yolo_model.double()
242 |     # Optimizer
243 |     optimizer = torch.optim.Adam(yolo_model.parameters(), lr=learning_rate, eps=1e-6)
244 |     # scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=True) \
245 |     #     if use_scheduler else None
246 | 
247 |     scheduler = StepLR(optimizer, step_size=5, gamma=0.5, last_epoch=-1) if use_scheduler else None
248 | 
249 |     ################### Train ###################
250 |     print('\n\nlearning rate = ', learning_rate)
251 | 
252 |     min_loss, best_mAP = train(num_epoch=num_epoch, train_txt=train_txt, val_txt=val_txt, phases=phases, model=yolo_model,
253 |                                optimizer=optimizer, learning_rate=learning_rate, scheduler=scheduler,
254 |                                checkpoint_interval=50, use_float64=use_float64, use_visualization=True)
255 | 
256 |     print('=======================================')
257 |     print('Training completed.')
258 |     print('Best mAP = {:4f}'.format(best_mAP))
259 |     print('Min loss = {:4f}'.format(min_loss))
260 |     print('Done.')
261 | 
262 | 
263 | 
264 | 
265 | 


--------------------------------------------------------------------------------
/main_test_voc.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | This script implements the training procedure.
 3 | '''
 4 | import time
 5 | import os
 6 | import copy
 7 | from dataset import *
 8 | from utils import *
 9 | from draw import *
10 | 
11 | 
12 | if __name__ == "__main__":
13 | 
14 |     # Detect if GPU is available
15 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
16 | 
17 |     # Parameters
18 |     out_dir = './res'
19 | 
20 |     if not os.path.exists(out_dir):
21 |         os.makedirs(out_dir)
22 |         print('"{}" is created.'.format(out_dir))
23 |     else:
24 |         print('"{}" exists.'.format(out_dir))
25 | 
26 | 
27 |     # Dataset
28 |     """
29 |     Test on VOC
30 |     """
31 |     # voc2012_train = VOC('/Users/erica/Dataset/Pascal/2012_train_short.txt', IMG_WIDTH, IMG_HEIGHT)
32 |     voc2012_val = VOC('/Users/erica/Dataset/Pascal/2012_val_short.txt', IMG_WIDTH, IMG_HEIGHT)
33 |     dataloader = DataLoader(voc2012_val, batch_size=1)
34 | 
35 |     # Model
36 |     yolo_model = build_darknet()
37 |     yolo_model = yolo_model.to(device)
38 | 
39 |     yolo_model.load_state_dict(torch.load('best_model_weight.pth'))  # weights
40 |     print('Weights loaded.')
41 | 
42 |     yolo_model.eval()
43 |     print('Evaluation mode.')
44 | 
45 |     since = time.time()
46 |     y_out_epoch = torch.Tensor()  # record all output in a single epoch
47 |     img_name_epoch = []
48 | 
49 |     running_loss = 0
50 | 
51 |     for i, (image_batch, label_batch, img_name_batch) in enumerate(dataloader):
52 | 
53 |         image_batch = image_batch.to(device)
54 |         label_batch = label_batch.to(device)
55 | 
56 |         # forward pass
57 |         y_out = yolo_model(image_batch)
58 |         y_out_epoch = torch.cat((y_out_epoch, y_out), 0)
59 |         img_name_epoch += img_name_batch
60 | 
61 |         # compute loss
62 |         loss = calc_loss(y_out.clone(), label_batch.clone())
63 |         running_loss += loss.item() * image_batch.size(0)
64 | 
65 |         print('Batch = {}\tLoss = {:.4f}'.format(i, loss.item()))
66 | 
67 |     # evaluation
68 |     threshold = 0.5
69 |     det = prediction2detection(y_out_epoch, img_name_epoch)
70 |     ground_truth = ground_truth_detection(voc2012_val.label_list)
71 |     res = evaluate_IOU(det, ground_truth)
72 |     res_tp_fp = evaluate_TP_FP(res, threshold)
73 |     results, acc_tps, acc_fps, precisions, recalls = evaluate_precision_recall(res_tp_fp, threshold, ground_truth)
74 | 
75 |     mAP = calc_mean_average_precision(precisions, recalls)
76 |     epoch_loss = running_loss / len(dataloader.dataset)
77 | 
78 |     time_elapsed = time.time() - since
79 | 
80 |     print('\tLoss = {:.4f}\tmAP = {:.4f}\ttime_elapsed = {:.2f}\n'.format(epoch_loss, mAP, time_elapsed))
81 |     print('Testing completed.')
82 |     print('mAP = {:4f}'.format(mAP))
83 | 
84 |     # visualization
85 |     visualize(y_out_epoch, img_name_epoch, voc2012_val.image_list, out_dir)
86 |     print('Visualization completed.')
87 |     print('done.')


--------------------------------------------------------------------------------
/main_train_voc.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | This script implements the training procedure.
  3 | '''
  4 | import time
  5 | import copy
  6 | import os.path
  7 | import matplotlib.pyplot as plt
  8 | from dataset import *
  9 | from torch.optim.lr_scheduler import ReduceLROnPlateau
 10 | from torch.optim.lr_scheduler import StepLR
 11 | 
 12 | 
 13 | # Memory
 14 | torch.cuda.empty_cache()
 15 | 
 16 | # Detect if GPU is available
 17 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 18 | print('run in {}'.format(device))
 19 | 
 20 | 
 21 | 
 22 | if __name__ == "__main__":
 23 | 
 24 |     # Parameters
 25 |     num_epoch = 100
 26 |     learning_rate = 1e-2
 27 |     batch_size = 2
 28 | 
 29 |     # model_weights = 'min_loss_weights.pth'
 30 |     model_weights = None
 31 | 
 32 |     use_scheduler = True
 33 | 
 34 | 
 35 | 
 36 |     # Setting
 37 |     # phases = ['train', 'val']
 38 |     phases = ['train']
 39 | 
 40 |     # train_txt = '/home/bizon/Dataset/VOC_yolo_format/2012_train.txt'
 41 |     # val_txt = '/home/bizon/Dataset/VOC_yolo_format/2012_val.txt'
 42 | 
 43 |     # train_txt = '/home/bizon/Dataset/VOC_yolo_format/2012_train_short.txt'
 44 |     # val_txt = '/home/bizon/Dataset/VOC_yolo_format/2012_train_short.txt'
 45 | 
 46 |     train_txt = '/home/bizon/Dataset/VOC_yolo_format/2012_sanity.txt'
 47 |     val_txt = '/home/bizon/Dataset/VOC_yolo_format/2012_sanity.txt'
 48 | 
 49 |     # Memory
 50 |     torch.cuda.empty_cache()
 51 | 
 52 |     # Dataset
 53 |     voc2012_train = VOC(train_txt, IMG_WIDTH, IMG_HEIGHT, data_transform)
 54 |     voc2012_val = VOC(val_txt, IMG_WIDTH, IMG_HEIGHT, data_transform)
 55 | 
 56 |     dataloader = dict()
 57 |     dataloader['train'] = DataLoader(voc2012_train, batch_size=batch_size)
 58 |     dataloader['val'] = DataLoader(voc2012_val, batch_size=batch_size)
 59 | 
 60 |     # Model
 61 |     yolo_model = build_darknet(path=model_weights)
 62 |     yolo_model = nn.DataParallel(yolo_model, device_ids=[1])
 63 | 
 64 |     # Optimizer
 65 |     optimizer = torch.optim.Adam(yolo_model.parameters(), lr=learning_rate)
 66 |     # optimizer = torch.optim.SGD(yolo_model.parameters(), lr=learning_rate, momentum=0.9, nesterov=True)
 67 | 
 68 |     scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, verbose=True)
 69 |     # scheduler = StepLR(optimizer, step_size=10, gamma=0.1)
 70 | 
 71 | 
 72 |     # Log
 73 |     log_file = 'log.txt'
 74 |     if os.path.exists(log_file):
 75 |         os.remove(log_file)
 76 |         print('"{}" exists, it has been removed.'.format(log_file))
 77 | 
 78 |     epoch_loss_hist = dict()
 79 |     for elem in phases:
 80 |         epoch_loss_hist[elem] = []
 81 | 
 82 |     # Train
 83 |     best_mAP = 0.0
 84 |     min_loss = 1e10
 85 | 
 86 |     for epoch in range(num_epoch):
 87 |         since = time.time()
 88 |         print('\n-----------------------------------------')
 89 |         print('Epoch {}/{}'.format(epoch, num_epoch - 1))
 90 |         y_out_epoch = torch.Tensor().to(device)  # record all output in a single epoch
 91 |         img_name_epoch = []
 92 | 
 93 | 
 94 | 
 95 |         for phase in phases:
 96 |             if phase == 'train':
 97 |                 yolo_model.train()
 98 |             else:
 99 |                 yolo_model.eval()
100 | 
101 |             running_loss = 0
102 | 
103 |             # Step 1: forward / backward
104 |             for i, (image_batch, label_batch, img_name_batch) in enumerate(dataloader[phase]):
105 | 
106 |                 image_batch = image_batch.to(device)
107 |                 label_batch = label_batch.to(device)
108 | 
109 |                 # zero the parameter gradients
110 |                 optimizer.zero_grad()
111 | 
112 |                 with torch.set_grad_enabled(phase == 'train'):
113 |                     # foward pass
114 |                     y_out = yolo_model(image_batch)
115 |                     y_out_epoch = torch.cat((y_out_epoch, y_out), 0)
116 |                     y_out_epoch = y_out_epoch.to(device)  # ZZ added
117 |                     img_name_epoch += img_name_batch
118 | 
119 |                     # compute loss
120 |                     loss = calc_loss(y_out.clone(), label_batch.clone())
121 |                     running_loss += loss.item() * image_batch.size(0)
122 | 
123 |                     if use_scheduler:
124 |                         scheduler.step(loss)  # lr scheduler
125 | 
126 |                     # backward pass
127 |                     if phase == 'train':
128 |                         loss.backward()
129 |                         optimizer.step()
130 | 
131 |                 print('{}\tEpoch = {}\tBatch = {}\tLoss = {:.4f}'.format(phase, epoch, i, loss.item()))
132 | 
133 | 
134 | 
135 |             # Step 2: evaluation
136 |             threshold = 0.5
137 |             det = prediction2detection(y_out_epoch, img_name_epoch)
138 | 
139 |             if phase == 'train':
140 |                 ground_truth = ground_truth_detection(voc2012_train.label_list)
141 |             else:
142 |                 ground_truth = ground_truth_detection(voc2012_val.label_list)
143 | 
144 |             res = evaluate_IOU(det, ground_truth)
145 |             res_tp_fp = evaluate_TP_FP(res, threshold)
146 |             results, acc_tps, acc_fps, precisions, recalls = evaluate_precision_recall(res_tp_fp, threshold, ground_truth)
147 | 
148 |             mAP = calc_mean_average_precision(precisions, recalls)
149 |             epoch_loss = running_loss / len(dataloader[phase].dataset)
150 |             epoch_loss_hist[phase].append(epoch_loss)
151 | 
152 |             time_elapsed = time.time() - since
153 | 
154 |             # Step 3: log, save weights
155 |             # log
156 |             print('{}\tLoss = {:.4f}\tmAP = {:.4f}\ttime_elapsed = {:.2f} s\n'.format(phase, epoch_loss, mAP, time_elapsed))
157 |             f = open(log_file, 'a')
158 |             f.write('{}\tEpoch = {}\tLoss = {:.4f}\tmAP = {:.4f}\ttime_elapsed = {:.2f} s\n'.format(phase, epoch, epoch_loss, mAP, time_elapsed))
159 |             f.close()
160 | 
161 |             # weights
162 |             # - min loss
163 |             if epoch_loss < min_loss:
164 |                 torch.save(copy.deepcopy(yolo_model.state_dict()), 'min_loss_weights.pth')
165 |                 min_loss = epoch_loss
166 |                 print('a smaller loss is found.')
167 |             # - best mAP
168 |             if phase == 'val' and mAP > best_mAP:  
169 |                 best_mAP = mAP
170 |                 torch.save(copy.deepcopy(yolo_model.state_dict()), 'best_model_weights.pth')
171 | 
172 |             # plot history
173 |             for p in phases:
174 |                 plt.plot(range(epoch+1), epoch_loss_hist[p])
175 |             plt.title('lr = {}'.format(learning_rate))
176 |             plt.xlabel('Epoch')
177 |             plt.ylabel('Loss')
178 |             plt.legend(phases)
179 |             plt.savefig('loss_history.png')
180 | 
181 |     print('Training completed.')
182 |     print('Best mAP = {:4f}'.format(best_mAP))
183 |     print('Done.')
184 | 


--------------------------------------------------------------------------------
/main_voc.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | This script implements the training procedure.
 3 | '''
 4 | from dataset import *
 5 | 
 6 | 
 7 | # Detect if GPU is available
 8 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 9 | 
10 | 
11 | if __name__ == "__main__":
12 | 
13 |     # Parameters
14 |     # optimizer
15 |     num_epoch = 3
16 |     learning_rate = 1e-4
17 | 
18 |     # Dataset
19 |     """
20 |     Train on VOC
21 |     """
22 |     voc2012_train = VOC('/Users/erica/Dataset/Pascal/2012_train_short.txt', IMG_WIDTH, IMG_HEIGHT)
23 |     voc2012_val = VOC('/Users/erica/Dataset/Pascal/2012_val_short.txt', IMG_WIDTH, IMG_HEIGHT)
24 | 
25 |     dataloader = dict()
26 |     dataloader['train'] = DataLoader(voc2012_train, batch_size=4)
27 |     dataloader['val'] = DataLoader(voc2012_val, batch_size=4)
28 | 
29 |     # Model
30 |     yolo_model = build_darknet()
31 |     yolo_model = yolo_model.to(device)
32 |     yolo_model.train()
33 | 
34 |     # Optimize
35 |     optimizer = torch.optim.Adam(yolo_model.parameters(), lr=learning_rate)
36 | 
37 |     for epoch in range(num_epoch):
38 |         print('\n-----------------------------------------')
39 |         print('Epoch {}/{}'.format(epoch, num_epoch - 1))
40 |         y_out_epoch = torch.Tensor()  # record all output in a single epoch
41 |         img_name_epoch = []
42 | 
43 |         for phase in ['train', 'val']:
44 |             if phase == 'train':
45 |                 yolo_model.train()
46 |             else:
47 |                 yolo_model.eval()
48 | 
49 |             running_loss = 0
50 | 
51 |             for i, (image_batch, label_batch, img_name_batch) in enumerate(dataloader[phase]):
52 | 
53 |                 image_batch = image_batch.to(device)
54 |                 label_batch = label_batch.to(device)
55 | 
56 |                 # zero the parameter gradients
57 |                 optimizer.zero_grad()
58 | 
59 |                 with torch.set_grad_enabled(phase == 'train'):
60 |                     # foward pass
61 |                     y_out = yolo_model(image_batch)
62 |                     y_out_epoch = torch.cat((y_out_epoch, y_out), 0)
63 |                     img_name_epoch += img_name_batch
64 | 
65 |                     # compute loss
66 |                     loss = calc_loss(y_out.clone(), label_batch.clone())
67 |                     running_loss += loss.item() * image_batch.size(0)
68 | 
69 |                     # backward pass
70 |                     if phase == 'train':
71 |                         loss.backward()
72 |                         optimizer.step()
73 | 
74 |                 print('Epoch = ', epoch, '\tBatch = ', i, '\tLoss = ', loss.item())
75 | 
76 |             # evaluation
77 |             threshold = 0.5
78 |             det = prediction2detection(y_out_epoch, img_name_epoch)
79 |             ground_truth = ground_truth_detection(voc2012_train.label_list)
80 |             res = evaluate_IOU(det, ground_truth)
81 |             res_tp_fp = evaluate_TP_FP(res, threshold)
82 |             results, acc_tps, acc_fps, precisions, recalls = evaluate_precision_recall(res_tp_fp, threshold, ground_truth)
83 |             mAP = calc_mean_average_precision(precisions, recalls)
84 | 
85 |             epoch_loss = running_loss / len(dataloader[phase].dataset)
86 | 
87 |             print('{}\tLoss = {}\tmAP = {}\n'.format(phase, epoch_loss, mAP))
88 | 
89 |     print('Done.')
90 | 


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | This script defines the network architecture.
  3 | '''
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | from skimage import io
 10 | from skimage.transform import rescale, resize, downscale_local_mean
 11 | from utils import *
 12 | 
 13 | 
 14 | # model_weights = 'curr_model_weights.pth'
 15 | model_weights = None
 16 | 
 17 | 
 18 | class Darknet(nn.Module):
 19 | 
 20 |     def __init__(self, features, path=None):
 21 |         super(Darknet, self).__init__()
 22 |         self.features = features
 23 |         self.classifier = nn.Sequential(
 24 |             nn.Linear(1024 * S * S, 4096),
 25 |             nn.LeakyReLU(0.1),
 26 |             nn.Linear(4096, S * S * (B * 5 + C)),
 27 |         )
 28 | 
 29 |         if path is None:
 30 |             self._initialize_weights()
 31 |             print("Weights initialized.")
 32 |         else:
 33 |             # load checkpoints / pretrained weights
 34 |             self.load_state_dict({k.replace('module.', ''): v for k, v in torch.load(path).items()})
 35 |             print('Weights loaded from "{}"'.format(path))
 36 | 
 37 |     def forward(self, x):
 38 |         print('x')
 39 |         x = self.features(x)
 40 |         x = x.view(x.size(0), -1)
 41 |         x = self.classifier(x)
 42 |         x = x.view(x.size(0), S, S,  B * 5 + C)
 43 |         return x
 44 | 
 45 |     def _initialize_weights(self):
 46 |         for m in self.modules():
 47 |             if isinstance(m, nn.Conv2d):
 48 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
 49 |                 if m.bias is not None:
 50 |                     nn.init.constant_(m.bias, 0)
 51 |             elif isinstance(m, nn.BatchNorm2d):
 52 |                 nn.init.constant_(m.weight, 1)
 53 |                 nn.init.constant_(m.bias, 0)
 54 |             elif isinstance(m, nn.Linear):
 55 |                 nn.init.normal_(m.weight, 0, 0.01)
 56 |                 nn.init.constant_(m.bias, 0)
 57 | 
 58 | 
 59 | def expand_cfg(cfg):
 60 |     cfg_expanded = []
 61 |     for v in cfg:
 62 |         if isinstance(v, list):
 63 |             times = v[-1]
 64 |             for _ in range(times):
 65 |                 cfg_expanded = cfg_expanded + v[:-1]
 66 |         else:
 67 |             cfg_expanded.append(v)
 68 |     return cfg_expanded
 69 | 
 70 | 
 71 | def make_layers(cfg):
 72 |     '''
 73 |     Make layers based on configuration.
 74 |     :param cfg: expanded cfg, that is, no list as element
 75 |     :return: nn sequential module
 76 |     '''
 77 |     layers = []
 78 |     in_channels = 3
 79 |     for v in cfg:
 80 |         if v == 'M':  # Max pool
 81 |             layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
 82 |         elif isinstance(v, tuple):
 83 |             if len(v) == 3:
 84 |                 # Conv (kernel_size, out_channels, stride)
 85 |                 layers += [nn.Conv2d(in_channels, out_channels=v[1], kernel_size=v[0], stride=2)]
 86 |             else:
 87 |                 # Conv (kernel_size, out_channels)
 88 |                 layers += [nn.Conv2d(in_channels, out_channels=v[1], kernel_size=v[0])]
 89 |                 layers += [nn.BatchNorm2d(num_features=v[1])]  # BN
 90 |                 print('[new] BN is added.')
 91 | 
 92 |             layers += [nn.LeakyReLU(0.1)]   # Leaky rectified linear activation
 93 |             in_channels = v[1]
 94 |     print('Make layers done.')
 95 |     return nn.Sequential(*layers)
 96 | 
 97 | 
 98 | ## Config format
 99 | # M = Maxpool
100 | # tuple = Conv(kernel_size, out_channels, stride)
101 | 
102 | cfg = [
103 |         (7, 64, 2), 'M',  # 1
104 |            (3, 192), 'M',   # 2
105 |            (1, 128), (3, 256), (1, 256), (3, 512), 'M',  # 3
106 |            [(1, 256), (3, 512), 4], (1, 512), (3, 1024), 'M',  # 4
107 |            [(1, 512), (3, 1024), 2], (3, 1024), (3, 1024, 2),  # 5
108 |            (3, 1024), (3, 1024)  # 6
109 |     ]
110 | 
111 | 
112 | def build_darknet(path=None, **kwargs):
113 |     # define architecture
114 |     extract_features = make_layers(cfg)
115 |     model = Darknet(extract_features, path, **kwargs)
116 |     '''
117 |     # load weights if using pre-trained
118 |     if path is not None:
119 |         model.load_state_dict(path)
120 |     '''
121 |     return model
122 | 
123 | 
124 | if __name__ == "__main__":
125 | 
126 |     # model
127 |     yolo_model = build_darknet(path=model_weights)
128 | 
129 |     # input
130 |     '''
131 |     I = io.imread('000001.jpg')
132 |     I = resize(I, (448, 448))
133 |     Imgs = I[np.newaxis, :]
134 |     Imgs = torch.Tensor(Imgs).permute(0, 3, 1, 2)
135 |     print('Imgs.size = ', Imgs.size())
136 |     '''
137 | 
138 |     Imgs = torch.randn(20, 3, 448, 448)  # test image batch
139 |     print('Imgs.size = ', Imgs.size())
140 | 
141 |     # output
142 |     output = yolo_model(Imgs)
143 |     print('Done.')
144 | 
145 | 
146 | 
147 | 


--------------------------------------------------------------------------------
/model_parallel.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | This script defines the network architecture.
  3 | '''
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | from skimage import io
 10 | from skimage.transform import rescale, resize, downscale_local_mean
 11 | from utils import *
 12 | 
 13 | 
 14 | # model_weights = 'curr_model_weights.pth'
 15 | model_weights = None
 16 | 
 17 | 
 18 | class Darknet_Parallel(nn.Module):
 19 | 
 20 |     def __init__(self, features_1, features_2, path=None):
 21 |         super(Darknet_Parallel, self).__init__()
 22 |         self.features_1 = features_1.to('cuda:0')
 23 |         self.features_2 = features_2.to('cuda:1')
 24 |         self.classifier = nn.Sequential(
 25 |             nn.Linear(1024 * S * S, 4096),
 26 |             nn.LeakyReLU(0.1),
 27 |             nn.Linear(4096, S * S * (B * 5 + C)),
 28 |         ).to('cuda:1')
 29 | 
 30 |         if path is None:
 31 |             self._initialize_weights()
 32 |             print("Weights initialized.")
 33 |         else:
 34 |             # load checkpoints / pre-trained weights
 35 |             self.load_state_dict({k.replace('module.', ''): v for k, v in torch.load(path).items()})
 36 |             print('Weights loaded from "{}"'.format(path))
 37 | 
 38 |     def forward(self, x):
 39 |         print('x')
 40 |         x = self.features_1(x).to('cuda:1')
 41 |         x = self.features_2(x)
 42 |         x = x.view(x.size(0), -1)
 43 |         x = self.classifier(x)
 44 |         x = x.view(x.size(0), S, S,  B * 5 + C)
 45 |         return x
 46 | 
 47 |     def _initialize_weights(self):
 48 |         for m in self.modules():
 49 |             if isinstance(m, nn.Conv2d):
 50 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
 51 |                 if m.bias is not None:
 52 |                     nn.init.constant_(m.bias, 0)
 53 |             elif isinstance(m, nn.BatchNorm2d):
 54 |                 nn.init.constant_(m.weight, 1)
 55 |                 nn.init.constant_(m.bias, 0)
 56 |             elif isinstance(m, nn.Linear):
 57 |                 nn.init.normal_(m.weight, 0, 0.01)
 58 |                 nn.init.constant_(m.bias, 0)
 59 | 
 60 | 
 61 | def expand_cfg(cfg):
 62 |     cfg_expanded = []
 63 |     for v in cfg:
 64 |         if isinstance(v, list):
 65 |             times = v[-1]
 66 |             for _ in range(times):
 67 |                 cfg_expanded = cfg_expanded + v[:-1]
 68 |         else:
 69 |             cfg_expanded.append(v)
 70 |     return cfg_expanded
 71 | 
 72 | 
 73 | def make_layers_p(cfg, in_channels=3, use_bn=True):
 74 |     '''
 75 |     Make layers based on configuration (Model parallel version)
 76 |     :param cfg: expanded cfg, that is, no list as element
 77 |     :return: nn sequential module
 78 |     '''
 79 |     layers = []
 80 |     for v in cfg:
 81 |         if v == 'M':  # Max pool
 82 |             layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
 83 |         elif isinstance(v, tuple):
 84 |             if len(v) == 3:
 85 |                 # Conv (kernel_size, out_channels, stride)
 86 |                 layers += [nn.Conv2d(in_channels, out_channels=v[1], kernel_size=v[0], stride=2)]
 87 |             else:
 88 |                 # Conv (kernel_size, out_channels)
 89 |                 layers += [nn.Conv2d(in_channels, out_channels=v[1], kernel_size=v[0])]
 90 | 
 91 |             if use_bn:
 92 |                 layers += [nn.BatchNorm2d(num_features=v[1])]  # BN
 93 |                 print('[new] BN is added.')
 94 | 
 95 |             layers += [nn.LeakyReLU(0.1)]   # Leaky rectified linear activation
 96 |             in_channels = v[1]
 97 |     print('Make layers done.')
 98 |     print('in_channels = ', in_channels)
 99 |     return nn.Sequential(*layers), in_channels
100 | 
101 | 
102 | ## Config format
103 | # M = Maxpool
104 | # tuple = Conv(kernel_size, out_channels, stride)
105 | 
106 | cfg = [
107 |         (7, 64, 2), 'M',  # 1
108 |            (3, 192), 'M',   # 2
109 |            (1, 128), (3, 256), (1, 256), (3, 512), 'M',  # 3
110 |            [(1, 256), (3, 512), 4], (1, 512), (3, 1024), 'M',  # 4
111 |            [(1, 512), (3, 1024), 2], (3, 1024), (3, 1024, 2),  # 5
112 |            (3, 1024), (3, 1024)  # 6
113 |     ]
114 | 
115 | 
116 | cfg_1 = [
117 |         (7, 64, 2), 'M',  # 1
118 |            (3, 192), 'M',   # 2
119 |            (1, 128), (3, 256), (1, 256), (3, 512), 'M',  # 3
120 |            [(1, 256), (3, 512), 4], (1, 512), (3, 1024), 'M',  # 4
121 |            [(1, 512), (3, 1024), 2], (3, 1024), (3, 1024, 2)  # 5
122 |            #,
123 |            # (3, 1024), (3, 1024)  # 6
124 |     ]
125 | 
126 | cfg_2 = [
127 |         # (7, 64, 2), 'M',  # 1
128 |         #    (3, 192), 'M',   # 2
129 |         #    (1, 128), (3, 256), (1, 256), (3, 512), 'M',  # 3
130 |         #    [(1, 256), (3, 512), 4], (1, 512), (3, 1024), 'M',  # 4
131 |         #    [(1, 512), (3, 1024), 2], (3, 1024), (3, 1024, 2),  # 5
132 |            (3, 1024), (3, 1024)  # 6
133 |     ]
134 | 
135 | 
136 | def build_darknet(path=None, **kwargs):
137 |     # define architecture
138 |     extract_features = make_layers(cfg)
139 |     model = Darknet(extract_features, path, **kwargs)
140 | 
141 |     '''
142 |     # load weights if using pre-trained
143 |     if path is not None:
144 |         model.load_state_dict(path)
145 |         print('Weights loaded from', path)
146 |     '''
147 |     return model
148 | 
149 | 
150 | def build_darknet_parallel(path=None, use_bn=True, **kwargs):
151 |     # define architecture
152 |     # extract_features_1, in_channels = make_layers_p(cfg_1)  # no expanded
153 |     # extract_features_2, _ = make_layers_p(cfg_2, in_channels) # no expanded
154 | 
155 |     cfg_1_ex = expand_cfg(cfg_1)
156 |     cfg_2_ex = expand_cfg(cfg_2)
157 |     extract_features_1, in_channels = make_layers_p(cfg_1_ex, use_bn=use_bn)
158 |     extract_features_2, _ = make_layers_p(cfg_2_ex, in_channels, use_bn=use_bn)
159 | 
160 |     model = Darknet_Parallel(extract_features_1, extract_features_2, path, **kwargs)
161 | 
162 |     '''
163 |     # load weights if using pre-trained
164 |     if path is not None:
165 |         model.load_state_dict(path)
166 |         print('Weights loaded from', path)
167 |     '''
168 |     return model
169 | 
170 | 
171 | if __name__ == "__main__":
172 | 
173 |     # model
174 |     yolo_model = build_darknet_parallel(path=model_weights)
175 | 
176 |     # input
177 |     '''
178 |     I = io.imread('000001.jpg')
179 |     I = resize(I, (448, 448))
180 |     Imgs = I[np.newaxis, :]
181 |     Imgs = torch.Tensor(Imgs).permute(0, 3, 1, 2)
182 |     print('Imgs.size = ', Imgs.size())
183 |     '''
184 | 
185 |     Imgs = torch.randn(20, 3, 448, 448).to('cuda:0')  # test image batch
186 |     print('Imgs.size = ', Imgs.size())
187 | 
188 |     # output
189 |     output = yolo_model(Imgs)
190 |     print('Done.')
191 | 
192 | 
193 | 
194 | 


--------------------------------------------------------------------------------
/pallete:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzzheng/pytorch-yolo-v1/524dd63b8d3029c1815b89de0b32014c8b0d0def/pallete


--------------------------------------------------------------------------------
/readme-pic:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | This script implements the training procedure.
  3 | '''
  4 | from model import *
  5 | from utils import *
  6 | # import math
  7 | # import sys
  8 | import torch
  9 | from dataset import *
 10 | 
 11 | LAMBDA_COORD = 5
 12 | LAMBDA_NOOBJ = 0.5
 13 | 
 14 | 
 15 | def predict_one_bbox(P, G, device=torch.device("cpu"), use_float64=False):
 16 |     """
 17 |     Tensor version of predict_one_box
 18 |     Select ONE bounding box per grid cell.
 19 |     Note:
 20 |         YOLO predicts MULTIPLE bounding boxes per grid cell.
 21 |         At training time we only want one bounding box predictor to be responsible for each object.
 22 |         We assign one predictor to be “responsible” for predicting an object
 23 |         based on which prediction has the highest current IOU with the ground truth.
 24 | 
 25 |     :param P: Prediction tensor S*S*(B*5+C) with MULTIPLE bounding boxes per grid cell.
 26 |                 format < <x> <y> <w> <h> <confidence> > * B + <cls_prob>
 27 | 
 28 |     :param G: GroundTruth tensor S*S*5
 29 |                 format <x> <y> <w> <h> <class name>
 30 | 
 31 |     :return: Q: Prediction tensor S*S*(5+C) with SINGLE bounding box per grid cell.
 32 |                 format  <x> <y> <w> <h> <confidence> <cls_prob>
 33 |     """
 34 |     if P.size(0) != S or P.size(1) != S or P.size(2) != 5*B+C:
 35 |         raise Exception("Tensor size not match")
 36 | 
 37 |     # convert to image coordinate [0,1]
 38 |     # #### Do ONLY once !!!
 39 |     P = convert_coord_cell2img(P)  # todo: not compatible
 40 |     Q = torch.zeros(S, S, 5+C)     # init
 41 |     if use_float64:
 42 |         Q = Q.double()
 43 | 
 44 |     select = torch.tensor(0).to(device)  # init
 45 | 
 46 |     for i in range(S):              # loop over each grid cell
 47 |         for j in range(S):
 48 | 
 49 |             # localization loss
 50 |             # boxes = torch.tensor([], dtype=torch.float32)          # store all boxes' position (x, y, w, h)
 51 |             # get all bbox assigned for this grid cell
 52 |             # format < <x> <y> <w> <h> <confidence>
 53 |             for b in range(B):
 54 |                 if b == 0:
 55 |                     boxes = P[i, j, b*5: b*5+5].to(device)
 56 |                 else:
 57 |                     # boxes.append((P[i, j, b*5], P[i, j, b*5+1], P[i, j, b*5+2], P[i, j, b*5+3], P[i, j, b*5+4]))
 58 |                     boxes = torch.stack((boxes, P[i, j, b*5: b*5+5])).to(device)
 59 | 
 60 |             # case 1: ground truth has bbox at this grid cell
 61 |             #         Select one box has the highest IOU with ground truth
 62 |             if len(G[i, j, :].nonzero()) > 1:
 63 |                 # max_iou = 0  # init
 64 |                 max_iou = torch.tensor([0.]).to(device)  # init
 65 |                 if use_float64:
 66 |                     max_iou = max_iou.double()
 67 | 
 68 |                 groundtruth_box = torch.clone(G[i, j, :4])
 69 | 
 70 |                 for b in range(B):
 71 |                     # iou = calc_IOU(groundtruth_box, boxes[b][:-1])
 72 |                     iou = calc_IOU(groundtruth_box, boxes[b][:-1], device, use_float64) # use Tensor version
 73 | 
 74 |                     if iou > max_iou:
 75 |                         max_iou = iou
 76 |                         select = torch.tensor(b).to(device)
 77 | 
 78 |             # case 2: ground truth has NO bbox at this grid cell
 79 |             #         Pick one box with highest confidence
 80 |             # todo: slightly different from the original paper
 81 |             else:
 82 |                 max_confidence = torch.tensor(0.).to(device)  # init
 83 |                 if use_float64:
 84 |                     max_confidence = max_confidence.double()
 85 | 
 86 |                 for b in range(B):
 87 |                     confidence = boxes[b][-1]
 88 | 
 89 |                     if use_float64:
 90 |                         confidence = confidence.double()
 91 | 
 92 |                     if confidence > max_confidence:
 93 |                         max_confidence = confidence
 94 |                         select = torch.tensor(b).to(device)
 95 | 
 96 |             # classification loss
 97 |             # copy the selected box info to Q
 98 |             Q[i, j, :5] = boxes[select]  # bbox (pos + confidence)
 99 |             Q[i, j, 5:] = P[i, j, -C:]                 # class probabilities
100 |     return Q
101 | 
102 | 
103 | def calc_loss_single(P, G, use_float64=False):
104 |     """
105 |     Compute multi-part loss function on a Single instance, for a Single bbox.
106 |     :param P: Prediction tensor S*S*(5+C) with SINGLE bounding box per grid cell.
107 |     :param G: GroundTruth tensor S*S*5
108 | 
109 |     :return: loss
110 |     """
111 |     if P.size(0) != S or P.size(1) != S or P.size(2) != 5+C:
112 |         print(" Prediction tensor size is ", P.size())
113 |         raise Exception("Tensor size not match")
114 | 
115 |     if G.size(0) != S or G.size(1) != S or G.size(2) != 5:
116 |         print(" GroundTruth tensor size is ", G.size())
117 |         raise Exception("Tensor size not match")
118 |     
119 |     loss = torch.zeros(1)  # init
120 |     if use_float64:
121 |         loss = loss.double()
122 | 
123 |     for i in range(S):
124 |         for j in range(S):
125 |             # case 1: grid cell HAS object
126 |             if len(G[i, j, :].nonzero()) > 1:
127 |                 # localization
128 |                 loss = loss + LAMBDA_COORD * (torch.pow(P[i, j, 0] - G[i, j, 0], 2) + torch.pow(P[i, j, 1] - G[i, j, 1], 2))
129 | 
130 |                 loss = loss + LAMBDA_COORD * (torch.pow(torch.sqrt(torch.abs(P[i, j, 2])) - torch.sqrt(torch.abs(G[i, j,2])), 2) \
131 |                         + torch.pow(torch.sqrt(torch.abs(P[i, j, 3])) - torch.sqrt(torch.abs(G[i, j, 3])), 2))  # org
132 |                 # loss = loss + LAMBDA_COORD * (torch.sqrt(torch.abs(P[i, j, 2] - G[i, j, 2])) +
133 |                 #                               torch.sqrt(torch.abs(P[i, j, 3] - G[i, j, 3])))  # ZZ
134 | 
135 |                 loss = loss + torch.pow(P[i, j, 4]-1, 2)   # Ground truth confidence is constant 1
136 | 
137 |                 # classification
138 |                 true_cls = G[i, j, -1].type(torch.int64)
139 |                 true_cls_vec = torch.zeros(C)
140 |                 true_cls_vec[true_cls] = torch.tensor(1)
141 |                 pred_cls_vec = P[i, j, -C:]
142 | 
143 |                 if use_float64:
144 |                     pred_cls_vec = pred_cls_vec.double()
145 |                     true_cls_vec = true_cls_vec.double()
146 |                 loss = loss + torch.sum(torch.pow(pred_cls_vec - true_cls_vec, 2))
147 | 
148 |             # case 2: grid cell NO object
149 |             # classification
150 |             else:
151 |                 loss = loss + LAMBDA_NOOBJ * torch.pow(P[i, j, 4]-0, 2)  # Ground truth confidence is constant 0
152 |     return loss
153 | 
154 | 
155 | def calc_loss(P_batch, G_batch, device=torch.device("cpu"), use_float64=False):
156 |     """
157 |     Compute multi-part loss function on a Batch.
158 |     :param P_batch: Model Output.
159 |                     Prediction tensor batch N*S*S*(5*B+C) with MULTIPLE bounding box per grid cell.
160 |     :param G_batch:  GroundTruth tensor batch N*S*S*5
161 |     :return:
162 |     """
163 |     if P_batch.size(0) != G_batch.size(0):
164 |         raise Exception("Batch size does not match.")
165 | 
166 |     if len(P_batch.size()) != 4 or len(G_batch.size()) != 4:
167 |         raise Exception("Input or Ground truth is not a Batch. ")
168 | 
169 |     total_loss = torch.tensor(0.0)
170 |     if use_float64:
171 |         total_loss.double()
172 | 
173 |     for i in range(P_batch.size(0)):
174 |         P = P_batch[i]
175 |         G = G_batch[i]
176 |         Q = predict_one_bbox(P, G, device, use_float64)      # predict ONE bbox for each Grid cell
177 |         total_loss = total_loss + calc_loss_single(Q, G, use_float64)   # compute Loss for one instance
178 | 
179 |     total_loss = total_loss / P_batch.size(0)
180 |     return total_loss
181 | 
182 | 
183 | if __name__ == "__main__":
184 |     labels = read_labels('000001.txt')
185 |     G = labels2tensor(labels)
186 | 
187 |     # Feed model
188 |     yolo_model = build_darknet()
189 | 
190 |     # Data
191 |     X = torch.randn(20, 3, 448, 448)                 # image batch (random)
192 |     Y = torch.clamp(torch.randn(20, 7, 7, 5), 0, 1)  # label batch (random)
193 | 
194 |     # X.requires_grad = True
195 |     # Y.requires_grad = True
196 | 
197 |     # Prediction
198 |     Y_out = yolo_model(X)
199 |     # Y_pred = [predict_one_bbox(Y_out[i].clone(), Y[i].clone()) for i in range(20)]
200 |     # Y_pred = torch.stack(Y_pred)
201 | 
202 |     # Loss
203 |     total_loss = calc_loss(Y_out, Y)
204 |     print('total loss = ', total_loss)
205 | 
206 |     # Optimizer
207 |     learning_rate = 1e-4
208 |     optimizer = torch.optim.Adam(yolo_model.parameters(), lr=learning_rate)
209 | 
210 |     # Training
211 |     for t in range(30):
212 |         # forward pass
213 |         Y_out = yolo_model(X)
214 | 
215 |         # compute loss
216 |         loss = calc_loss(Y_out.clone(), Y.clone())
217 |         print('\nEpoch = ', t, 'Loss = ', loss.item())
218 | 
219 |         optimizer.zero_grad()
220 |         loss.backward()
221 |         optimizer.step()
222 | 
223 |     print('Done.')
224 | 
225 | 
226 | 
227 | 
228 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from dataset import *
  4 | 
  5 | IMG_WIDTH = 448
  6 | IMG_HEIGHT = 448
  7 | S = 7   # number of grid cell is S*S
  8 | B = 2   # number of bbox for each grid cell
  9 | C = 20  # number of classes
 10 | # C = 1  # debug!!!
 11 | 
 12 | # torch.set_default_tensor_type('torch.cuda.FloatTensor')
 13 | # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 14 | 
 15 | def read_labels(label_file):
 16 |     """
 17 |     Read labels from files
 18 |     Note:
 19 |     use YOLO author's format <class> <x> <y> <w> <h>
 20 |     :param label_file: a .txt file
 21 |     :return: labels: [list]
 22 |     """
 23 |     with open(label_file, 'r') as f:
 24 |         lines = f.readlines()
 25 |         labels = []
 26 |         for l in lines:
 27 |             l = l.split()
 28 |             l = [float(elem) for elem in l]
 29 |             labels.append(l)
 30 |     return labels
 31 | 
 32 | 
 33 | def labels2tensor(labels):
 34 |     """
 35 |     Build Groundtruth tensor S*S*5.
 36 |     :param labels: list of labels with bounding box classification and position for each image.
 37 |     :return: T: Groundtruth tensor S*S*5.
 38 |                 format <x> <y> <w> <h> <class name>
 39 |     """
 40 |     T = torch.zeros(S, S, 5)  # init
 41 | 
 42 |     gcell_size = 1. / S
 43 |     for label in labels:  # mark labels
 44 |         cls = label[0]
 45 |         x = label[1]
 46 |         y = label[2]
 47 |         w = label[3]
 48 |         h = label[4]
 49 |         # Be aware: row are x-axis image coordinate, in 2nd dimension of Tensor
 50 | 
 51 |         T[int(y/gcell_size), int(x/gcell_size), 0] = x
 52 |         T[int(y/gcell_size), int(x/gcell_size), 1] = y
 53 |         T[int(y/gcell_size), int(x/gcell_size), 2] = w
 54 |         T[int(y/gcell_size), int(x/gcell_size), 3] = h
 55 |         T[int(y/gcell_size), int(x/gcell_size), 4] = cls
 56 | 
 57 |         '''
 58 |         # w,h already related to whole image, no action required
 59 |         # normalize x,y to grid cell offset
 60 |         x = (x - int(x/gcell_size) * gcell_size) / gcell_size
 61 |         y = (y - int(y/gcell_size) * gcell_size) / gcell_size
 62 |         '''
 63 |         T[int(y / gcell_size), int(x / gcell_size)] = torch.tensor([x, y, w, h, cls])
 64 | 
 65 |     return T
 66 | 
 67 | 
 68 | def convert_coord_cell2img(T):
 69 |     """
 70 |     Convert x, y from grid cell offset to image coordinate [0, 1]
 71 |     :param T: Prediction tensor S*S*(B*5+C)
 72 |               format < <x> <y> <w> <h> <confidence> > * B + <cls_prob>
 73 |     :return: T: converted
 74 |     Note:
 75 |             Clone input argument!!!
 76 |     Example:
 77 |             >> T = torch.zeros(3,3,5)
 78 |             >> T2 = convert_coord_cell2img(T.clone())
 79 |     """
 80 |     # T.requires_grad = True  # need backprop
 81 | 
 82 |     if T.size(0) != S or T.size(1) != S or T.size(2) != 5*B+C:
 83 |         raise Exception("Tensor size not match")
 84 | 
 85 |     # Be aware: row are x-axis image coordinate, in 2nd dimension of Tensor
 86 |     for b in range(B):
 87 |         # cells with object, that is, <confidence> != 0
 88 |         cells_index = T[:, :, b*5-1].nonzero()
 89 |         for i in range(cells_index.size(0)):
 90 |             (m, n) = cells_index[i]
 91 |             m = int(m)
 92 |             n = int(n)
 93 |             # grid cell offset to normalized image coordinates
 94 |             T[m, n, b*5] = n*(1. / S) + T[m, n, b*5].clone() *(1. / S)  # x
 95 |             T[m, n, b*5+1] = m*(1. / S) + T[m, n, b*5+1].clone() *(1. / S)  # y
 96 |     return T
 97 | 
 98 | '''
 99 | def calc_IOU(box_1, box_2):
100 |     """
101 |     compute IOU between two bounding boxes
102 |     :param box_1: (x, y, w, h) image coordinates in [0, 1]
103 |     :param box_2: (x, y, w, h) image coordinates in [0, 1]
104 |     :return:
105 |     """
106 |     x_min_1 = torch.clamp(torch.Tensor((box_1[0] - box_1[2] / 2,)), 0, 1)
107 |     x_max_1 = torch.clamp(torch.Tensor((box_1[0] + box_1[2] / 2,)), 0, 1)
108 |     y_min_1 = torch.clamp(torch.Tensor((box_1[1] - box_1[3] / 2,)), 0, 1)
109 |     y_max_1 = torch.clamp(torch.Tensor((box_1[1] + box_1[3] / 2,)), 0, 1)
110 | 
111 |     x_min_2 = torch.clamp(torch.Tensor((box_2[0] - box_2[2] / 2,)), 0, 1)
112 |     x_max_2 = torch.clamp(torch.Tensor((box_2[0] + box_2[2] / 2,)), 0, 1)
113 |     y_min_2 = torch.clamp(torch.Tensor((box_2[1] - box_2[3] / 2,)), 0, 1)
114 |     y_max_2 = torch.clamp(torch.Tensor((box_2[1] + box_2[3] / 2,)), 0, 1)
115 | 
116 |     overlap_width = max(min(x_max_1, x_max_2) - max(x_min_1, x_min_2), 0)
117 |     overlap_height = max(min(y_max_1, y_max_2) - max(y_min_1, y_min_2), 0)
118 | 
119 |     overlap_area = overlap_width * overlap_height
120 |     union_area = (x_max_1 - x_min_1) * (y_max_1 - y_min_1) \
121 |                  + (x_max_2 - x_min_2) * (y_max_2 - y_min_2) \
122 |                  - overlap_area
123 |     intersection_over_union = overlap_area / union_area
124 |     return intersection_over_union
125 | '''
126 | 
127 | # def calc_IOU(box_1, box_2, device=torch.device('cpu'), use_float64=False):
128 | def calc_IOU(box_1, box_2, device=torch.device('cpu'), use_float64=False):
129 |     """
130 |     Tensor version of calc_IOU()
131 |     compute IOU between two bounding boxes
132 |     :param box_1: Detection x, y, w, h image coordinates in [0, 1]
133 |     :param box_2: GroundTruth x, y, w, h image coordinates in [0, 1]
134 |     :return:
135 |     """
136 |     '''
137 |     x_min_1 = torch.clamp((box_1[0] - box_1[2] / 2), 0, 1).to(device)
138 |     x_max_1 = torch.clamp((box_1[0] + box_1[2] / 2), 0, 1).to(device)
139 |     y_min_1 = torch.clamp((box_1[1] - box_1[3] / 2), 0, 1).to(device)
140 |     y_max_1 = torch.clamp((box_1[1] + box_1[3] / 2), 0, 1).to(device)
141 |     '''
142 | 
143 |     x_min_1 = torch.clamp((abs(box_1[0]) - abs(box_1[2]) / 2), 0, 1).to(device)
144 |     x_max_1 = torch.clamp((abs(box_1[0]) + abs(box_1[2]) / 2), 0, 1).to(device)
145 |     y_min_1 = torch.clamp((abs(box_1[1]) - abs(box_1[3]) / 2), 0, 1).to(device)
146 |     y_max_1 = torch.clamp((abs(box_1[1]) + abs(box_1[3]) / 2), 0, 1).to(device)
147 | 
148 |     x_min_2 = torch.clamp((box_2[0] - box_2[2] / 2), 0, 1).to(device)
149 |     x_max_2 = torch.clamp((box_2[0] + box_2[2] / 2), 0, 1).to(device)
150 |     y_min_2 = torch.clamp((box_2[1] - box_2[3] / 2), 0, 1).to(device)
151 |     y_max_2 = torch.clamp((box_2[1] + box_2[3] / 2), 0, 1).to(device)
152 | 
153 | 
154 |     # z = torch.tensor(0, dtype=torch.float).to(device)
155 |     z = torch.tensor(0.).to(device)
156 |     if use_float64:
157 |         z = z.double()
158 | 
159 |     a = torch.min(x_max_1, x_max_2)
160 |     b = torch.max(x_min_1, x_min_2)
161 |     c = torch.min(y_max_1, y_max_2)
162 |     d = torch.max(y_min_1, y_min_2)
163 | 
164 |     overlap_width = torch.max(a-b, z)
165 |     overlap_height = torch.max(c-d, z)
166 |     overlap_area = overlap_width * overlap_height
167 | 
168 |     union_area = (x_max_1 - x_min_1) * (y_max_1 - y_min_1) \
169 |                  + (x_max_2 - x_min_2) * (y_max_2 - y_min_2) \
170 |                  - overlap_area
171 |     intersection_over_union = overlap_area / union_area
172 |     return intersection_over_union
173 | 
174 | 
175 | def prediction2detection(Tensors, Images, conf_threshold=0.1):
176 |     """
177 |     In the Evaluation stage, summarize the detection results in terms of object class.
178 |     :param Tensors:
179 |                 Prediction tensors N*S*S*(B*5+C) with multiple bounding box per grid cell.
180 |                 format < <x> <y> <w> <h> <confidence> > * B + <cls_prob>
181 | 
182 |     :return: Detections: A dictionary contains bounding boxes for each object class over all images.
183 |                 format - [key]  class_id
184 |                          [value]
185 |                                 list of bounding boxes
186 |                                 format <image_name> <x> <y> <w> <h> <confidence>
187 |     Note:
188 |             Model output x, y are in grid cell offset coordinates.
189 |             Must convert to image coordinates before use.
190 | 
191 |     Update:
192 |             06/27/2109: For each grid cell, only the bbox with highest confidence score will be used.
193 | 
194 |     """
195 |     if not isinstance(Images, list):
196 |         raise Exception("Expect a list of Image Names.")
197 |     if Tensors.size(0) != len(Images):
198 |         raise Exception("Number of tensors does not number of images.")
199 |     if Tensors[0].size(0) != S or Tensors[0].size(1) != S:
200 |         raise Exception("Tensor size not match")
201 |     if Tensors[0].size(2) != 5*B+C and Tensors[0].size(2) != 5+C:
202 |         raise Exception("Tensor size not match")
203 | 
204 |     # convert to image coordinate [0,1]
205 |     # #### Do ONLY once !!!
206 |     Tensors = [convert_coord_cell2img(Tensors[i]) for i in range(Tensors.size(0))]
207 | 
208 |     # init
209 |     Detections = dict()
210 |     for c in range(C):
211 |         Detections[c] = []
212 | 
213 |     for k in range(len(Tensors)):
214 |         T = Tensors[k]
215 |         img_name = Images[k]
216 |         for i in range(S):
217 |             for j in range(S):
218 |                 _, cls = torch.max(T[i, j, :][-C:], 0)
219 | 
220 |                 best_conf = 0  # record the highest confidence
221 |                 for b in range(B):
222 |                     bbox = (img_name,)
223 |                     bbox = bbox + tuple(T[i, j, 5*b: 5*b + 5])
224 | 
225 |                     if b == 0:
226 |                         best_bbox = bbox
227 | 
228 |                     if T[i, j, 5*b+4] > best_conf:
229 |                         best_bbox = bbox
230 |                 # bbox with highest confidence score will be used.
231 |                 # Detections[cls.item()].append(best_bbox)
232 |                 if best_bbox[-1] > conf_threshold:
233 |                     Detections[cls.item()].append(best_bbox)
234 |     return Detections
235 | 
236 | 
237 | def ground_truth_detection(label_list):
238 |     """
239 |     In the Evaluation stage, summarize the Ground Truth in terms of object class.
240 |     :param label_list: a list of label file names
241 |     :return: Detections: A dictionary contains Ground Truth bounding boxes
242 |                         for each object class over all images.
243 |                         format - [key]  class_id
244 |                                  [value]
245 |                                     list of bounding boxes
246 |                                     format <image_name> <x> <y> <w> <h>
247 |     """
248 |     Detections = dict()
249 |     for c in range(C):
250 |         Detections[c] = []
251 | 
252 |     for k in range(len(label_list)):
253 |         img_name = label_list[k].split('/')[-1].replace('.txt', '')
254 |         labels = read_labels(label_list[k])
255 |         for label in labels:
256 |             Detections[int(label[0])].append((img_name,) + tuple(label[1:]))
257 |     return Detections
258 | 
259 | 
260 | def evaluate_IOU(Detections, Ground_truth, device=torch.device('cpu'), use_float64=False):
261 |     """
262 |     Compute IOU over all images.
263 |     :param Detections: A dictionary contains bounding boxes
264 |                        for each object class over all images.
265 |                         format - [key]  class_id
266 |                                  [value]
267 |                                     list of bounding boxes
268 |                                     <image_name> <x> <y> <w> <h> <confidence>
269 |     :param Ground_truth: A dictionary contains bounding boxes
270 |                         for each object class over all images.
271 |                         format - [key]  class_id
272 |                                  [value]
273 |                                     list of bounding boxes
274 |                                     <image_name> <x> <y> <w> <h>
275 |     :return:
276 |            Results: A dictionary contains bounding boxes
277 |                        for each object class over all images.
278 |                     format - [key]  class_id
279 |                              [value]  
280 |                                 list of bounding boxes 
281 |                                 <image_name> <x> <y> <w> <h> <confidence> <IOU>
282 |     """
283 |     Results = {}
284 |     for c in range(C):
285 |         Det = Detections[c]  # predicted detection
286 |         GT = Ground_truth[c]  # ground truth
287 |         Results[c] = []
288 |         for det in Det:
289 |             img_ground_truth = list(filter(lambda x: x[0] == det[0].split('.')[0], GT))
290 |             if len(img_ground_truth) > 0:
291 |                 '''
292 |                 if use_float64:
293 |                     inter_over_unions = [calc_IOU(det[1:5], torch.tensor(gt[1:5]).double(), device, use_float64) for gt in img_ground_truth]
294 |                 else:
295 |                     inter_over_unions = [calc_IOU(det[1:5], torch.tensor(gt[1:5]), device, use_float64) for gt in img_ground_truth]
296 |                 '''
297 |                 inter_over_unions = []
298 |                 for gt in img_ground_truth:
299 |                     if use_float64:
300 |                         curr_iou = calc_IOU(det[1:5], torch.tensor(gt[1:5]).double(), device, use_float64)
301 |                     else:
302 |                         curr_iou = calc_IOU(det[1:5], torch.tensor(gt[1:5]), device, use_float64)
303 |                     inter_over_unions.append(curr_iou.item())
304 | 
305 |                 iou = max(inter_over_unions)
306 |                 img_ground_truth.pop(np.argmax(inter_over_unions))  # remove matched ground truth
307 |             else:
308 |                 iou = 0.0
309 |             Results[c].append(list(det) + [iou])
310 |     return Results
311 | 
312 | 
313 | def evaluate_TP_FP(Results, threshold):
314 | 
315 |     """
316 |     Computer TP or FP based on threshold.
317 |     :param Results: A dictionary contains bounding boxes
318 |                         for each object class over all images.
319 |                     format - [key]  class_id
320 |                              [value]
321 |                                 list of bounding boxes
322 |                                 <image_name> <x> <y> <w> <h> <confidence> <IOU>
323 |     :param threshold:
324 |                     detection is TP if IOU > threshold, otherwise FP
325 |                         for each object class over all images.
326 | 
327 |     :return: Results: A dictionary contains bounding boxes
328 |                         for each object class over all images.
329 |                         format - [key]  class_id
330 |                                  [value]
331 |                                     list of bounding boxes
332 |                                     <image_name> <x> <y> <w> <h> <confidence> <IOU> <TP> <FP>
333 |     """
334 |     if not 0 <= threshold <= 1:
335 |         raise Exception("IOU threshold should be in [0, 1]")
336 |     
337 |     for c in range(len(Results)):    
338 |         for i in range(len(Results[c])):
339 |             if Results[c][i][-1] > threshold:  # IOU > threshold
340 |                 Results[c][i] += [1, 0]  # TP
341 |             else:
342 |                 Results[c][i] += [0, 1]  # FP
343 |     return Results
344 |     
345 | 
346 | def evaluate_precision_recall(Results, threshold, all_ground_truths):
347 |     """
348 |     Compute Precision and Recall based on threshold.
349 |     :param Results:  A dictionary contains bounding boxes
350 |                         for each object class over all images.
351 |                         format - [key]  class_id
352 |                                  [value]   
353 |                                     list of bounding boxes
354 |                                     <image_name> <x> <y> <w> <h> <confidence> <IOU> <TP> <FP>
355 |     :param threshold:
356 |                      detection is TP if IOU > threshold, otherwise FP
357 | 
358 |     :return: Results: A dictionary contains bounding boxes
359 |                         for each object class over all images.
360 |                         format - [key]  class_id
361 |                                  [value]
362 |                                     list of bounding boxes
363 |                                     <image_name> <x> <y> <w> <h> <confidence> <IOU> <TP> <FP>
364 |                                     <Acc TP> <Acc FP> <precision> <recall>
365 | 
366 |             Acc_tp_all_cls:         accumulated TP for all classes [List]
367 |             Acc_fp_all_cls:         accumulated FP for all classes [List]
368 |             Precisions_all_cls:     Precisions for all classes [List]
369 |             Recalls_all_cls:        Recalls for all classes [List]
370 | 
371 |     """
372 |     Acc_tp_all_cls = []
373 |     Acc_fp_all_cls = []
374 |     Precisions_all_cls = []   # PR curve points for each class
375 |     Recalls_all_cls = []
376 | 
377 |     acc_tp = 0.0  # accumulated detection
378 |     acc_fp = 0.0  # accumulated detection
379 |     eps = 1e-10   # prevent division over zero
380 |     
381 |     # sort by confidence
382 |     def take_confidence(elem):
383 |         return elem[5]
384 | 
385 |     for c in range(C):    
386 |         Results[c].sort(key=take_confidence, reverse=True)  # order detections by their confidences
387 | 
388 |     # all groundtruth
389 |     num_all_groundtruth = 0
390 |     for c in range(len(Results)):
391 |         for i in range(len(Results[c])):  
392 |             num_all_groundtruth += len(all_ground_truths[c])  
393 |         
394 |     # compute Accumulated TP, Accumulated FP, Precision, Recall
395 |     for c in range(len(Results)):
396 |         precisions = []
397 |         recalls = []
398 | 
399 |         acc_tp = 0 # ZZ
400 |         acc_fp = 0 # ZZ
401 |         for i in range(len(Results[c])):
402 |             res = Results[c][i]
403 |             acc_tp += res[-2]
404 |             acc_fp += res[-1]
405 |             precision = acc_tp / (acc_tp + acc_fp + eps)
406 |             recall = 0 if len(all_ground_truths[c]) == 0 else (acc_tp / len(all_ground_truths[c]))  
407 | 
408 |             # record
409 |             Results[c][i] += [acc_tp, acc_fp, precision, recall]
410 |             precisions.append(precision)  # for Precision-Recall curve
411 |             recalls.append(recall)
412 | 
413 |         Acc_tp_all_cls.append(acc_tp)
414 |         Acc_fp_all_cls.append(acc_fp)
415 |         Precisions_all_cls.append(precisions)
416 |         Recalls_all_cls.append(recalls)
417 | 
418 |     return Results, Acc_tp_all_cls, Acc_fp_all_cls, Precisions_all_cls, Recalls_all_cls
419 | 
420 | 
421 | def calc_average_precision(p, r, show_flag=False):
422 |     """
423 |     Calculate Average Precision by interpolating PR-curve.
424 | 
425 |     Note: Interpolation performed in all points.
426 | 
427 |     :param p: Precision points [list]
428 |     :param r: Recall points    [list]
429 |     :param show_flag: plot if TRUE  [boolean]
430 | 
431 |     :return: ap:        Average Precision
432 |              p_interp:  interpolated precision
433 |     """
434 |     assert len(p) == len(r), "Equal number of Precision and Recall points."
435 |     ap = 0.0
436 |     # add starting point (r, p) = (0, 1)
437 |     p = [1] + p
438 |     r = [0] + r
439 |     p_interp = [p[0]]
440 | 
441 |     for i in range(len(p)-1):
442 |         interp = max(p[i:])
443 |         ap += (r[i+1] - r[i]) * interp
444 |         p_interp.append(interp)
445 | 
446 |     if show_flag:
447 |         plt.plot(r, p)
448 |         plt.step(r, p_interp)
449 |         plt.legend(['Precision', 'Interpolated precision'], loc='upper right')
450 |         plt.show()
451 |     return ap, p_interp
452 | 
453 | 
454 | def calc_mean_average_precision(p_all_cls, r_all_cls):
455 |     """
456 |     Calc mAP.
457 |     :param p_all_cls:   Precisions for all classes [list]
458 |     :param r_all_cls:   Recalls for all classes [list]
459 |     :return: mAP
460 |     """
461 |     assert len(p_all_cls) == len(r_all_cls), "lengths of Lists should be equal."
462 | 
463 |     mAP = 0
464 |     num_class = len(p_all_cls)
465 | 
466 |     non_empty_list = [x for x in p_all_cls if x != []]
467 |     num_class_effective = len(non_empty_list)
468 | 
469 |     for i in range(num_class):
470 |         p = p_all_cls[i]
471 |         r = r_all_cls[i]
472 |         ap, _ = calc_average_precision(p, r)
473 |         mAP += ap
474 | 
475 |     mAP = mAP / num_class_effective if num_class_effective else 0
476 |     return mAP
477 | 
478 | 
479 | if __name__ == "__main__":
480 |     voc2012 = VOC('/Users/erica/Dataset/Pascal/2012_train_short.txt', IMG_WIDTH, IMG_HEIGHT)
481 |     det = ground_truth_detection(voc2012.label_list)
482 |     print('Done.')
483 | 
484 | 
485 | 


--------------------------------------------------------------------------------
/voc_label.py:
--------------------------------------------------------------------------------
 1 | import xml.etree.ElementTree as ET
 2 | import pickle
 3 | import os
 4 | from os import listdir, getcwd
 5 | from os.path import join
 6 | 
 7 | 
 8 | sets = [('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
 9 | 
10 | classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
11 | 
12 | 
13 | def convert(size, box):
14 |     dw = 1./size[0]
15 |     dh = 1./size[1]
16 |     x = (box[0] + box[1])/2.0
17 |     y = (box[2] + box[3])/2.0
18 |     w = box[1] - box[0]
19 |     h = box[3] - box[2]
20 |     x = x*dw
21 |     w = w*dw
22 |     y = y*dh
23 |     h = h*dh
24 |     return (x,y,w,h)
25 | 
26 | 
27 | def convert_annotation(year, image_id):
28 |     in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
29 |     out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w')
30 |     tree = ET.parse(in_file)
31 |     root = tree.getroot()
32 |     size = root.find('size')
33 |     w = int(size.find('width').text)
34 |     h = int(size.find('height').text)
35 | 
36 |     for obj in root.iter('object'):
37 |         difficult = obj.find('difficult').text
38 |         cls = obj.find('name').text
39 |         if cls not in classes or int(difficult) == 1:
40 |             continue
41 |         cls_id = classes.index(cls)
42 |         xmlbox = obj.find('bndbox')
43 |         b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
44 |         bb = convert((w,h), b)
45 |         out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
46 | 
47 | wd = getcwd()
48 | 
49 | for year, image_set in sets:
50 |     if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)):
51 |         os.makedirs('VOCdevkit/VOC%s/labels/'%(year))
52 |     image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
53 |     list_file = open('%s_%s.txt'%(year, image_set), 'w')
54 |     for image_id in image_ids:
55 |         list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id))
56 |         convert_annotation(year, image_id)
57 |     list_file.close()
58 | 
59 | 


--------------------------------------------------------------------------------