├── .gitattributes
├── .gitignore
├── LICENSE
├── README.md
├── callbacks.py
├── config.json
├── create_validation_set.sh
├── evaluate.py
├── gen_anchors.py
├── generator.py
├── predict.py
├── requirements.txt
├── train.py
├── utils
    ├── __init__.py
    ├── bbox.py
    ├── colors.py
    ├── image.py
    ├── multi_gpu_model.py
    └── utils.py
├── voc.py
├── yolo.py
├── yolo3_one_file_to_detect_them_all.py
└── zoo
    ├── config_kangaroo.json
    ├── config_license_plates.json
    ├── config_raccoon.json
    ├── config_rbc.json
    └── config_voc.json


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.h5 filter=lfs diff=lfs merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.jpg
 2 | *.jpeg
 3 | *.weights
 4 | *.h5
 5 | *.pyc
 6 | *.xml
 7 | *.mp4
 8 | *.DS_Store
 9 | *.bak
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Ngoc Anh Huynh
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # YOLO3 (Detection, Training, and Evaluation)
  2 | 
  3 | ## Dataset and Model
  4 | 
  5 | Dataset | mAP | Demo | Config | Model
  6 | :---:|:---:|:---:|:---:|:---:
  7 | Kangaroo Detection (1 class) (https://github.com/experiencor/kangaroo) | 95% | https://youtu.be/URO3UDHvoLY | check zoo | https://bit.ly/39rLNoE
  8 | License Plate Detection (European in Romania) (1 class) (https://github.com/RobertLucian/license-plate-dataset) | 90% | https://youtu.be/HrqzIXFVCRo | check zoo | https://bit.ly/2tIpvPl
  9 | Raccoon Detection (1 class) (https://github.com/experiencor/raccoon_dataset) | 98% | https://youtu.be/lxLyLIL7OsU | check zoo | https://bit.ly/39rLNoE
 10 | Red Blood Cell Detection (3 classes) (https://github.com/experiencor/BCCD_Dataset) | 84% | https://imgur.com/a/uJl2lRI | check zoo | https://bit.ly/39rLNoE
 11 | VOC (20 classes) (http://host.robots.ox.ac.uk/pascal/VOC/voc2012/) | 72% | https://youtu.be/0RmOI6hcfBI | check zoo | https://bit.ly/39rLNoE
 12 | 
 13 | ## Todo list:
 14 | - [x] Yolo3 detection
 15 | - [x] Yolo3 training (warmup and multi-scale)
 16 | - [x] mAP Evaluation
 17 | - [x] Multi-GPU training
 18 | - [x] Evaluation on VOC
 19 | - [ ] Evaluation on COCO
 20 | - [ ] MobileNet, DenseNet, ResNet, and VGG backends
 21 | 
 22 | ## Installing
 23 | 
 24 | To install the dependencies, run
 25 | ```bash
 26 | pip install -r requirements.txt
 27 | ```
 28 | And for the GPU to work, make sure you've got the drivers installed beforehand (CUDA).
 29 | 
 30 | It has been tested to work with Python 2.7.13 and 3.5.3.
 31 | 
 32 | ## Detection
 33 | 
 34 | Grab the pretrained weights of yolo3 from https://pjreddie.com/media/files/yolov3.weights.
 35 | 
 36 | ```python yolo3_one_file_to_detect_them_all.py -w yolo3.weights -i dog.jpg``` 
 37 | 
 38 | ## Training
 39 | 
 40 | ### 1. Data preparation 
 41 | 
 42 | Download the Raccoon dataset from from https://github.com/experiencor/raccoon_dataset.
 43 | 
 44 | Organize the dataset into 4 folders:
 45 | 
 46 | + train_image_folder <= the folder that contains the train images.
 47 | 
 48 | + train_annot_folder <= the folder that contains the train annotations in VOC format.
 49 | 
 50 | + valid_image_folder <= the folder that contains the validation images.
 51 | 
 52 | + valid_annot_folder <= the folder that contains the validation annotations in VOC format.
 53 |     
 54 | There is a one-to-one correspondence by file name between images and annotations. If the validation set is empty, the training set will be automatically splitted into the training set and validation set using the ratio of 0.8.
 55 | 
 56 | Also, if you've got the dataset split into 2 folders such as one for images and the other one for annotations and you need to set a custom size for the validation set, use `create_validation_set.sh` script to that. The script expects the following parameters in the following order:
 57 | ```bash
 58 | ./create_validation_set.sh $param1 $param2 $param3 $param4
 59 | # 1st param - folder where the images are found
 60 | # 2nd param - folder where the annotations are found
 61 | # 3rd param - number of random choices (aka the size of the validation set in absolute value)
 62 | # 4th param - folder where validation images/annots end up (must have images/annots folders inside the given directory as the 4th param)
 63 | ```
 64 | 
 65 | ### 2. Edit the configuration file
 66 | The configuration file is a json file, which looks like this:
 67 | 
 68 | ```python
 69 | {
 70 |     "model" : {
 71 |         "min_input_size":       352,
 72 |         "max_input_size":       448,
 73 |         "anchors":              [10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326],
 74 |         "labels":               ["raccoon"]
 75 |     },
 76 | 
 77 |     "train": {
 78 |         "train_image_folder":   "/home/andy/data/raccoon_dataset/images/",
 79 |         "train_annot_folder":   "/home/andy/data/raccoon_dataset/anns/",      
 80 |           
 81 |         "train_times":          10,             # the number of time to cycle through the training set, useful for small datasets
 82 |         "pretrained_weights":   "",             # specify the path of the pretrained weights, but it's fine to start from scratch
 83 |         "batch_size":           16,             # the number of images to read in each batch
 84 |         "learning_rate":        1e-4,           # the base learning rate of the default Adam rate scheduler
 85 |         "nb_epoch":             50,             # number of epoches
 86 |         "warmup_epochs":        3,              # the number of initial epochs during which the sizes of the 5 boxes in each cell is forced to match the sizes of the 5 anchors, this trick seems to improve precision emperically
 87 |         "ignore_thresh":        0.5,
 88 |         "gpus":                 "0,1",
 89 | 
 90 |         "saved_weights_name":   "raccoon.h5",
 91 |         "debug":                true            # turn on/off the line that prints current confidence, position, size, class losses and recall
 92 |     },
 93 | 
 94 |     "valid": {
 95 |         "valid_image_folder":   "",
 96 |         "valid_annot_folder":   "",
 97 | 
 98 |         "valid_times":          1
 99 |     }
100 | }
101 | 
102 | ```
103 | 
104 | The ```labels``` setting lists the labels to be trained on. Only images, which has labels being listed, are fed to the network. The rest images are simply ignored. By this way, a Dog Detector can easily be trained using VOC or COCO dataset by setting ```labels``` to ```['dog']```.
105 | 
106 | Download pretrained weights for backend at:
107 | 
108 | https://bit.ly/39rLNoE
109 | 
110 | **This weights must be put in the root folder of the repository. They are the pretrained weights for the backend only and will be loaded during model creation. The code does not work without this weights.**
111 | 
112 | ### 3. Generate anchors for your dataset (optional)
113 | 
114 | `python gen_anchors.py -c config.json`
115 | 
116 | Copy the generated anchors printed on the terminal to the ```anchors``` setting in ```config.json```.
117 | 
118 | ### 4. Start the training process
119 | 
120 | `python train.py -c config.json`
121 | 
122 | By the end of this process, the code will write the weights of the best model to file best_weights.h5 (or whatever name specified in the setting "saved_weights_name" in the config.json file). The training process stops when the loss on the validation set is not improved in 3 consecutive epoches.
123 | 
124 | ### 5. Perform detection using trained weights on image, set of images, video, or webcam
125 | `python predict.py -c config.json -i /path/to/image/or/video`
126 | 
127 | It carries out detection on the image and write the image with detected bounding boxes to the same folder.
128 | 
129 | If you wish to change the object threshold or IOU threshold, you can do it by altering `obj_thresh` and `nms_thresh` variables. By default, they are set to `0.5` and `0.45` respectively.
130 | 
131 | ## Evaluation
132 | 
133 | `python evaluate.py -c config.json`
134 | 
135 | Compute the mAP performance of the model defined in `saved_weights_name` on the validation dataset defined in `valid_image_folder` and `valid_annot_folder`.
136 | 


--------------------------------------------------------------------------------
/callbacks.py:
--------------------------------------------------------------------------------
 1 | from keras.callbacks import TensorBoard, ModelCheckpoint
 2 | import tensorflow as tf
 3 | import numpy as np
 4 | 
 5 | class CustomTensorBoard(TensorBoard):
 6 |     """ to log the loss after each batch
 7 |     """    
 8 |     def __init__(self, log_every=1, **kwargs):
 9 |         super(CustomTensorBoard, self).__init__(**kwargs)
10 |         self.log_every = log_every
11 |         self.counter = 0
12 |     
13 |     def on_batch_end(self, batch, logs=None):
14 |         self.counter+=1
15 |         if self.counter%self.log_every==0:
16 |             for name, value in logs.items():
17 |                 if name in ['batch', 'size']:
18 |                     continue
19 |                 summary = tf.Summary()
20 |                 summary_value = summary.value.add()
21 |                 summary_value.simple_value = value.item()
22 |                 summary_value.tag = name
23 |                 self.writer.add_summary(summary, self.counter)
24 |             self.writer.flush()
25 |         
26 |         super(CustomTensorBoard, self).on_batch_end(batch, logs)
27 | 
28 | class CustomModelCheckpoint(ModelCheckpoint):
29 |     """ to save the template model, not the multi-GPU model
30 |     """
31 |     def __init__(self, model_to_save, **kwargs):
32 |         super(CustomModelCheckpoint, self).__init__(**kwargs)
33 |         self.model_to_save = model_to_save
34 | 
35 |     def on_epoch_end(self, epoch, logs=None):
36 |         logs = logs or {}
37 |         self.epochs_since_last_save += 1
38 |         if self.epochs_since_last_save >= self.period:
39 |             self.epochs_since_last_save = 0
40 |             filepath = self.filepath.format(epoch=epoch + 1, **logs)
41 |             if self.save_best_only:
42 |                 current = logs.get(self.monitor)
43 |                 if current is None:
44 |                     warnings.warn('Can save best model only with %s available, '
45 |                                   'skipping.' % (self.monitor), RuntimeWarning)
46 |                 else:
47 |                     if self.monitor_op(current, self.best):
48 |                         if self.verbose > 0:
49 |                             print('\nEpoch %05d: %s improved from %0.5f to %0.5f,'
50 |                                   ' saving model to %s'
51 |                                   % (epoch + 1, self.monitor, self.best,
52 |                                      current, filepath))
53 |                         self.best = current
54 |                         if self.save_weights_only:
55 |                             self.model_to_save.save_weights(filepath, overwrite=True)
56 |                         else:
57 |                             self.model_to_save.save(filepath, overwrite=True)
58 |                     else:
59 |                         if self.verbose > 0:
60 |                             print('\nEpoch %05d: %s did not improve from %0.5f' %
61 |                                   (epoch + 1, self.monitor, self.best))
62 |             else:
63 |                 if self.verbose > 0:
64 |                     print('\nEpoch %05d: saving model to %s' % (epoch + 1, filepath))
65 |                 if self.save_weights_only:
66 |                     self.model_to_save.save_weights(filepath, overwrite=True)
67 |                 else:
68 |                     self.model_to_save.save(filepath, overwrite=True)
69 | 
70 |         super(CustomModelCheckpoint, self).on_batch_end(epoch, logs)


--------------------------------------------------------------------------------
/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model" : {
 3 |         "min_input_size":       288,
 4 |         "max_input_size":       448,
 5 |         "anchors":              [55,69, 75,234, 133,240, 136,129, 142,363, 203,290, 228,184, 285,359, 341,260],
 6 |         "labels":               ["kangaroo"]
 7 |     },
 8 | 
 9 |     "train": {
10 |         "train_image_folder":   "/home/andy/Desktop/github/kangaroo/images/",
11 |         "train_annot_folder":   "/home/andy/Desktop/github/kangaroo/annots/",
12 |         "cache_name":           "kangaroo_train.pkl",
13 | 
14 |         "train_times":          8,
15 |         "batch_size":           16,
16 |         "learning_rate":        1e-4,
17 |         "nb_epochs":            100,
18 |         "warmup_epochs":        3,
19 |         "ignore_thresh":        0.5,
20 |         "gpus":                 "0,1",
21 | 
22 |         "grid_scales":          [1,1,1],
23 |         "obj_scale":            5,
24 |         "noobj_scale":          1,
25 |         "xywh_scale":           1,
26 |         "class_scale":          1,
27 | 
28 |         "tensorboard_dir":      "logs",
29 |         "saved_weights_name":   "kangaroo.h5",
30 |         "debug":                true
31 |     },
32 | 
33 |     "valid": {
34 |         "valid_image_folder":   "",
35 |         "valid_annot_folder":   "",
36 |         "cache_name":           "",
37 | 
38 |         "valid_times":          1
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/create_validation_set.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Run it from this directory.
 4 | 
 5 | # 1st param - folder where images are found
 6 | # 2nd param - folder where annotations are found
 7 | # 3rd param - number of random choices
 8 | # 4th param - folder where validation images/annots end up (must have images/annots folders created)
 9 | 
10 | ls "$1" | sort -R | tail -"$3" | while read image; do
11 |    filename=$(basename "$image" .jpg)
12 |    annot="$filename.xml"
13 |    echo "moving files $image $annot"
14 |    mv "$1/$image" "$4/images"
15 |    mv "$2/$annot" "$4/annots"
16 | done
17 | 


--------------------------------------------------------------------------------
/evaluate.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | 
 3 | import argparse
 4 | import os
 5 | import numpy as np
 6 | import json
 7 | from voc import parse_voc_annotation
 8 | from yolo import create_yolov3_model
 9 | from generator import BatchGenerator
10 | from utils.utils import normalize, evaluate
11 | from keras.callbacks import EarlyStopping, ModelCheckpoint
12 | from keras.optimizers import Adam
13 | from keras.models import load_model
14 | 
15 | def _main_(args):
16 |     config_path = args.conf
17 | 
18 |     with open(config_path) as config_buffer:    
19 |         config = json.loads(config_buffer.read())
20 | 
21 |     ###############################
22 |     #   Create the validation generator
23 |     ###############################  
24 |     valid_ints, labels = parse_voc_annotation(
25 |         config['valid']['valid_annot_folder'], 
26 |         config['valid']['valid_image_folder'], 
27 |         config['valid']['cache_name'],
28 |         config['model']['labels']
29 |     )
30 | 
31 |     labels = labels.keys() if len(config['model']['labels']) == 0 else config['model']['labels']
32 |     labels = sorted(labels)
33 |    
34 |     valid_generator = BatchGenerator(
35 |         instances           = valid_ints, 
36 |         anchors             = config['model']['anchors'],   
37 |         labels              = labels,        
38 |         downsample          = 32, # ratio between network input's size and network output's size, 32 for YOLOv3
39 |         max_box_per_image   = 0,
40 |         batch_size          = config['train']['batch_size'],
41 |         min_net_size        = config['model']['min_input_size'],
42 |         max_net_size        = config['model']['max_input_size'],   
43 |         shuffle             = True, 
44 |         jitter              = 0.0, 
45 |         norm                = normalize
46 |     )
47 | 
48 |     ###############################
49 |     #   Load the model and do evaluation
50 |     ###############################
51 |     os.environ['CUDA_VISIBLE_DEVICES'] = config['train']['gpus']
52 | 
53 |     infer_model = load_model(config['train']['saved_weights_name'])
54 | 
55 |     # compute mAP for all the classes
56 |     average_precisions = evaluate(infer_model, valid_generator)
57 | 
58 |     # print the score
59 |     for label, average_precision in average_precisions.items():
60 |         print(labels[label] + ': {:.4f}'.format(average_precision))
61 |     print('mAP: {:.4f}'.format(sum(average_precisions.values()) / len(average_precisions)))           
62 | 
63 | if __name__ == '__main__':
64 |     argparser = argparse.ArgumentParser(description='Evaluate YOLO_v3 model on any dataset')
65 |     argparser.add_argument('-c', '--conf', help='path to configuration file')    
66 |     
67 |     args = argparser.parse_args()
68 |     _main_(args)
69 | 


--------------------------------------------------------------------------------
/gen_anchors.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import argparse
  3 | import numpy as np
  4 | 
  5 | from voc import parse_voc_annotation
  6 | import json
  7 | 
  8 | def IOU(ann, centroids):
  9 |     w, h = ann
 10 |     similarities = []
 11 | 
 12 |     for centroid in centroids:
 13 |         c_w, c_h = centroid
 14 | 
 15 |         if c_w >= w and c_h >= h:
 16 |             similarity = w*h/(c_w*c_h)
 17 |         elif c_w >= w and c_h <= h:
 18 |             similarity = w*c_h/(w*h + (c_w-w)*c_h)
 19 |         elif c_w <= w and c_h >= h:
 20 |             similarity = c_w*h/(w*h + c_w*(c_h-h))
 21 |         else: #means both w,h are bigger than c_w and c_h respectively
 22 |             similarity = (c_w*c_h)/(w*h)
 23 |         similarities.append(similarity) # will become (k,) shape
 24 | 
 25 |     return np.array(similarities)
 26 | 
 27 | def avg_IOU(anns, centroids):
 28 |     n,d = anns.shape
 29 |     sum = 0.
 30 | 
 31 |     for i in range(anns.shape[0]):
 32 |         sum+= max(IOU(anns[i], centroids))
 33 | 
 34 |     return sum/n
 35 | 
 36 | def print_anchors(centroids):
 37 |     out_string = ''
 38 | 
 39 |     anchors = centroids.copy()
 40 | 
 41 |     widths = anchors[:, 0]
 42 |     sorted_indices = np.argsort(widths)
 43 | 
 44 |     r = "anchors: ["
 45 |     for i in sorted_indices:
 46 |         out_string += str(int(anchors[i,0]*416)) + ',' + str(int(anchors[i,1]*416)) + ', '
 47 |             
 48 |     print(out_string[:-2])
 49 | 
 50 | def run_kmeans(ann_dims, anchor_num):
 51 |     ann_num = ann_dims.shape[0]
 52 |     iterations = 0
 53 |     prev_assignments = np.ones(ann_num)*(-1)
 54 |     iteration = 0
 55 |     old_distances = np.zeros((ann_num, anchor_num))
 56 | 
 57 |     indices = [random.randrange(ann_dims.shape[0]) for i in range(anchor_num)]
 58 |     centroids = ann_dims[indices]
 59 |     anchor_dim = ann_dims.shape[1]
 60 | 
 61 |     while True:
 62 |         distances = []
 63 |         iteration += 1
 64 |         for i in range(ann_num):
 65 |             d = 1 - IOU(ann_dims[i], centroids)
 66 |             distances.append(d)
 67 |         distances = np.array(distances) # distances.shape = (ann_num, anchor_num)
 68 | 
 69 |         print("iteration {}: dists = {}".format(iteration, np.sum(np.abs(old_distances-distances))))
 70 | 
 71 |         #assign samples to centroids
 72 |         assignments = np.argmin(distances,axis=1)
 73 | 
 74 |         if (assignments == prev_assignments).all() :
 75 |             return centroids
 76 | 
 77 |         #calculate new centroids
 78 |         centroid_sums=np.zeros((anchor_num, anchor_dim), np.float)
 79 |         for i in range(ann_num):
 80 |             centroid_sums[assignments[i]]+=ann_dims[i]
 81 |         for j in range(anchor_num):
 82 |             centroids[j] = centroid_sums[j]/(np.sum(assignments==j) + 1e-6)
 83 | 
 84 |         prev_assignments = assignments.copy()
 85 |         old_distances = distances.copy()
 86 | 
 87 | def _main_(argv):
 88 |     config_path = args.conf
 89 |     num_anchors = args.anchors
 90 | 
 91 |     with open(config_path) as config_buffer:
 92 |         config = json.loads(config_buffer.read())
 93 | 
 94 |     train_imgs, train_labels = parse_voc_annotation(
 95 |         config['train']['train_annot_folder'],
 96 |         config['train']['train_image_folder'],
 97 |         config['train']['cache_name'],
 98 |         config['model']['labels']
 99 |     )
100 | 
101 |     # run k_mean to find the anchors
102 |     annotation_dims = []
103 |     for image in train_imgs:
104 |         print(image['filename'])
105 |         for obj in image['object']:
106 |             relative_w = (float(obj['xmax']) - float(obj['xmin']))/image['width']
107 |             relatice_h = (float(obj["ymax"]) - float(obj['ymin']))/image['height']
108 |             annotation_dims.append(tuple(map(float, (relative_w,relatice_h))))
109 | 
110 |     annotation_dims = np.array(annotation_dims)
111 |     centroids = run_kmeans(annotation_dims, num_anchors)
112 | 
113 |     # write anchors to file
114 |     print('\naverage IOU for', num_anchors, 'anchors:', '%0.2f' % avg_IOU(annotation_dims, centroids))
115 |     print_anchors(centroids)
116 | 
117 | if __name__ == '__main__':
118 |     argparser = argparse.ArgumentParser()
119 | 
120 |     argparser.add_argument(
121 |         '-c',
122 |         '--conf',
123 |         default='config.json',
124 |         help='path to configuration file')
125 |     argparser.add_argument(
126 |         '-a',
127 |         '--anchors',
128 |         default=9,
129 |         help='number of anchors to use')
130 | 
131 |     args = argparser.parse_args()
132 |     _main_(args)
133 | 


--------------------------------------------------------------------------------
/generator.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import copy
  3 | import numpy as np
  4 | from keras.utils import Sequence
  5 | from utils.bbox import BoundBox, bbox_iou
  6 | from utils.image import apply_random_scale_and_crop, random_distort_image, random_flip, correct_bounding_boxes
  7 | 
  8 | class BatchGenerator(Sequence):
  9 |     def __init__(self, 
 10 |         instances, 
 11 |         anchors,   
 12 |         labels,        
 13 |         downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3
 14 |         max_box_per_image=30,
 15 |         batch_size=1,
 16 |         min_net_size=320,
 17 |         max_net_size=608,    
 18 |         shuffle=True, 
 19 |         jitter=True, 
 20 |         norm=None
 21 |     ):
 22 |         self.instances          = instances
 23 |         self.batch_size         = batch_size
 24 |         self.labels             = labels
 25 |         self.downsample         = downsample
 26 |         self.max_box_per_image  = max_box_per_image
 27 |         self.min_net_size       = (min_net_size//self.downsample)*self.downsample
 28 |         self.max_net_size       = (max_net_size//self.downsample)*self.downsample
 29 |         self.shuffle            = shuffle
 30 |         self.jitter             = jitter
 31 |         self.norm               = norm
 32 |         self.anchors            = [BoundBox(0, 0, anchors[2*i], anchors[2*i+1]) for i in range(len(anchors)//2)]
 33 |         self.net_h              = 416  
 34 |         self.net_w              = 416
 35 | 
 36 |         if shuffle: np.random.shuffle(self.instances)
 37 |             
 38 |     def __len__(self):
 39 |         return int(np.ceil(float(len(self.instances))/self.batch_size))           
 40 | 
 41 |     def __getitem__(self, idx):
 42 |         # get image input size, change every 10 batches
 43 |         net_h, net_w = self._get_net_size(idx)
 44 |         base_grid_h, base_grid_w = net_h//self.downsample, net_w//self.downsample
 45 | 
 46 |         # determine the first and the last indices of the batch
 47 |         l_bound = idx*self.batch_size
 48 |         r_bound = (idx+1)*self.batch_size
 49 | 
 50 |         if r_bound > len(self.instances):
 51 |             r_bound = len(self.instances)
 52 |             l_bound = r_bound - self.batch_size
 53 | 
 54 |         x_batch = np.zeros((r_bound - l_bound, net_h, net_w, 3))             # input images
 55 |         t_batch = np.zeros((r_bound - l_bound, 1, 1, 1,  self.max_box_per_image, 4))   # list of groundtruth boxes
 56 | 
 57 |         # initialize the inputs and the outputs
 58 |         yolo_1 = np.zeros((r_bound - l_bound, 1*base_grid_h,  1*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 1
 59 |         yolo_2 = np.zeros((r_bound - l_bound, 2*base_grid_h,  2*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 2
 60 |         yolo_3 = np.zeros((r_bound - l_bound, 4*base_grid_h,  4*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 3
 61 |         yolos = [yolo_3, yolo_2, yolo_1]
 62 | 
 63 |         dummy_yolo_1 = np.zeros((r_bound - l_bound, 1))
 64 |         dummy_yolo_2 = np.zeros((r_bound - l_bound, 1))
 65 |         dummy_yolo_3 = np.zeros((r_bound - l_bound, 1))
 66 |         
 67 |         instance_count = 0
 68 |         true_box_index = 0
 69 | 
 70 |         # do the logic to fill in the inputs and the output
 71 |         for train_instance in self.instances[l_bound:r_bound]:
 72 |             # augment input image and fix object's position and size
 73 |             img, all_objs = self._aug_image(train_instance, net_h, net_w)
 74 |             
 75 |             for obj in all_objs:
 76 |                 # find the best anchor box for this object
 77 |                 max_anchor = None                
 78 |                 max_index  = -1
 79 |                 max_iou    = -1
 80 | 
 81 |                 shifted_box = BoundBox(0, 
 82 |                                        0,
 83 |                                        obj['xmax']-obj['xmin'],                                                
 84 |                                        obj['ymax']-obj['ymin'])    
 85 |                 
 86 |                 for i in range(len(self.anchors)):
 87 |                     anchor = self.anchors[i]
 88 |                     iou    = bbox_iou(shifted_box, anchor)
 89 | 
 90 |                     if max_iou < iou:
 91 |                         max_anchor = anchor
 92 |                         max_index  = i
 93 |                         max_iou    = iou                
 94 |                 
 95 |                 # determine the yolo to be responsible for this bounding box
 96 |                 yolo = yolos[max_index//3]
 97 |                 grid_h, grid_w = yolo.shape[1:3]
 98 |                 
 99 |                 # determine the position of the bounding box on the grid
100 |                 center_x = .5*(obj['xmin'] + obj['xmax'])
101 |                 center_x = center_x / float(net_w) * grid_w # sigma(t_x) + c_x
102 |                 center_y = .5*(obj['ymin'] + obj['ymax'])
103 |                 center_y = center_y / float(net_h) * grid_h # sigma(t_y) + c_y
104 |                 
105 |                 # determine the sizes of the bounding box
106 |                 w = np.log((obj['xmax'] - obj['xmin']) / float(max_anchor.xmax)) # t_w
107 |                 h = np.log((obj['ymax'] - obj['ymin']) / float(max_anchor.ymax)) # t_h
108 | 
109 |                 box = [center_x, center_y, w, h]
110 | 
111 |                 # determine the index of the label
112 |                 obj_indx = self.labels.index(obj['name'])  
113 | 
114 |                 # determine the location of the cell responsible for this object
115 |                 grid_x = int(np.floor(center_x))
116 |                 grid_y = int(np.floor(center_y))
117 | 
118 |                 # assign ground truth x, y, w, h, confidence and class probs to y_batch
119 |                 yolo[instance_count, grid_y, grid_x, max_index%3]      = 0
120 |                 yolo[instance_count, grid_y, grid_x, max_index%3, 0:4] = box
121 |                 yolo[instance_count, grid_y, grid_x, max_index%3, 4  ] = 1.
122 |                 yolo[instance_count, grid_y, grid_x, max_index%3, 5+obj_indx] = 1
123 | 
124 |                 # assign the true box to t_batch
125 |                 true_box = [center_x, center_y, obj['xmax'] - obj['xmin'], obj['ymax'] - obj['ymin']]
126 |                 t_batch[instance_count, 0, 0, 0, true_box_index] = true_box
127 | 
128 |                 true_box_index += 1
129 |                 true_box_index  = true_box_index % self.max_box_per_image    
130 | 
131 |             # assign input image to x_batch
132 |             if self.norm != None: 
133 |                 x_batch[instance_count] = self.norm(img)
134 |             else:
135 |                 # plot image and bounding boxes for sanity check
136 |                 for obj in all_objs:
137 |                     cv2.rectangle(img, (obj['xmin'],obj['ymin']), (obj['xmax'],obj['ymax']), (255,0,0), 3)
138 |                     cv2.putText(img, obj['name'], 
139 |                                 (obj['xmin']+2, obj['ymin']+12), 
140 |                                 0, 1.2e-3 * img.shape[0], 
141 |                                 (0,255,0), 2)
142 |                 
143 |                 x_batch[instance_count] = img
144 | 
145 |             # increase instance counter in the current batch
146 |             instance_count += 1                 
147 |                 
148 |         return [x_batch, t_batch, yolo_1, yolo_2, yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3]
149 | 
150 |     def _get_net_size(self, idx):
151 |         if idx%10 == 0:
152 |             net_size = self.downsample*np.random.randint(self.min_net_size/self.downsample, \
153 |                                                          self.max_net_size/self.downsample+1)
154 |             print("resizing: ", net_size, net_size)
155 |             self.net_h, self.net_w = net_size, net_size
156 |         return self.net_h, self.net_w
157 |     
158 |     def _aug_image(self, instance, net_h, net_w):
159 |         image_name = instance['filename']
160 |         image = cv2.imread(image_name) # RGB image
161 |         
162 |         if image is None: print('Cannot find ', image_name)
163 |         image = image[:,:,::-1] # RGB image
164 |             
165 |         image_h, image_w, _ = image.shape
166 |         
167 |         # determine the amount of scaling and cropping
168 |         dw = self.jitter * image_w;
169 |         dh = self.jitter * image_h;
170 | 
171 |         new_ar = (image_w + np.random.uniform(-dw, dw)) / (image_h + np.random.uniform(-dh, dh));
172 |         scale = np.random.uniform(0.25, 2);
173 | 
174 |         if (new_ar < 1):
175 |             new_h = int(scale * net_h);
176 |             new_w = int(net_h * new_ar);
177 |         else:
178 |             new_w = int(scale * net_w);
179 |             new_h = int(net_w / new_ar);
180 |             
181 |         dx = int(np.random.uniform(0, net_w - new_w));
182 |         dy = int(np.random.uniform(0, net_h - new_h));
183 |         
184 |         # apply scaling and cropping
185 |         im_sized = apply_random_scale_and_crop(image, new_w, new_h, net_w, net_h, dx, dy)
186 |         
187 |         # randomly distort hsv space
188 |         im_sized = random_distort_image(im_sized)
189 |         
190 |         # randomly flip
191 |         flip = np.random.randint(2)
192 |         im_sized = random_flip(im_sized, flip)
193 |             
194 |         # correct the size and pos of bounding boxes
195 |         all_objs = correct_bounding_boxes(instance['object'], new_w, new_h, net_w, net_h, dx, dy, flip, image_w, image_h)
196 |         
197 |         return im_sized, all_objs   
198 | 
199 |     def on_epoch_end(self):
200 |         if self.shuffle: np.random.shuffle(self.instances)
201 |             
202 |     def num_classes(self):
203 |         return len(self.labels)
204 | 
205 |     def size(self):
206 |         return len(self.instances)    
207 | 
208 |     def get_anchors(self):
209 |         anchors = []
210 | 
211 |         for anchor in self.anchors:
212 |             anchors += [anchor.xmax, anchor.ymax]
213 | 
214 |         return anchors
215 | 
216 |     def load_annotation(self, i):
217 |         annots = []
218 | 
219 |         for obj in self.instances[i]['object']:
220 |             annot = [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax'], self.labels.index(obj['name'])]
221 |             annots += [annot]
222 | 
223 |         if len(annots) == 0: annots = [[]]
224 | 
225 |         return np.array(annots)
226 | 
227 |     def load_image(self, i):
228 |         return cv2.imread(self.instances[i]['filename'])     


--------------------------------------------------------------------------------
/predict.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | 
  3 | import os
  4 | import argparse
  5 | import json
  6 | import cv2
  7 | from utils.utils import get_yolo_boxes, makedirs
  8 | from utils.bbox import draw_boxes
  9 | from keras.models import load_model
 10 | from tqdm import tqdm
 11 | import numpy as np
 12 | 
 13 | def _main_(args):
 14 |     config_path  = args.conf
 15 |     input_path   = args.input
 16 |     output_path  = args.output
 17 | 
 18 |     with open(config_path) as config_buffer:    
 19 |         config = json.load(config_buffer)
 20 | 
 21 |     makedirs(output_path)
 22 | 
 23 |     ###############################
 24 |     #   Set some parameter
 25 |     ###############################       
 26 |     net_h, net_w = 416, 416 # a multiple of 32, the smaller the faster
 27 |     obj_thresh, nms_thresh = 0.5, 0.45
 28 | 
 29 |     ###############################
 30 |     #   Load the model
 31 |     ###############################
 32 |     os.environ['CUDA_VISIBLE_DEVICES'] = config['train']['gpus']
 33 |     infer_model = load_model(config['train']['saved_weights_name'])
 34 | 
 35 |     ###############################
 36 |     #   Predict bounding boxes 
 37 |     ###############################
 38 |     if 'webcam' in input_path: # do detection on the first webcam
 39 |         video_reader = cv2.VideoCapture(0)
 40 | 
 41 |         # the main loop
 42 |         batch_size  = 1
 43 |         images      = []
 44 |         while True:
 45 |             ret_val, image = video_reader.read()
 46 |             if ret_val == True: images += [image]
 47 | 
 48 |             if (len(images)==batch_size) or (ret_val==False and len(images)>0):
 49 |                 batch_boxes = get_yolo_boxes(infer_model, images, net_h, net_w, config['model']['anchors'], obj_thresh, nms_thresh)
 50 | 
 51 |                 for i in range(len(images)):
 52 |                     draw_boxes(images[i], batch_boxes[i], config['model']['labels'], obj_thresh) 
 53 |                     cv2.imshow('video with bboxes', images[i])
 54 |                 images = []
 55 |             if cv2.waitKey(1) == 27: 
 56 |                 break  # esc to quit
 57 |         cv2.destroyAllWindows()        
 58 |     elif input_path[-4:] == '.mp4': # do detection on a video  
 59 |         video_out = output_path + input_path.split('/')[-1]
 60 |         video_reader = cv2.VideoCapture(input_path)
 61 | 
 62 |         nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
 63 |         frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
 64 |         frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
 65 | 
 66 |         video_writer = cv2.VideoWriter(video_out,
 67 |                                cv2.VideoWriter_fourcc(*'MPEG'), 
 68 |                                50.0, 
 69 |                                (frame_w, frame_h))
 70 |         # the main loop
 71 |         batch_size  = 1
 72 |         images      = []
 73 |         start_point = 0 #%
 74 |         show_window = False
 75 |         for i in tqdm(range(nb_frames)):
 76 |             _, image = video_reader.read()
 77 | 
 78 |             if (float(i+1)/nb_frames) > start_point/100.:
 79 |                 images += [image]
 80 | 
 81 |                 if (i%batch_size == 0) or (i == (nb_frames-1) and len(images) > 0):
 82 |                     # predict the bounding boxes
 83 |                     batch_boxes = get_yolo_boxes(infer_model, images, net_h, net_w, config['model']['anchors'], obj_thresh, nms_thresh)
 84 | 
 85 |                     for i in range(len(images)):
 86 |                         # draw bounding boxes on the image using labels
 87 |                         draw_boxes(images[i], batch_boxes[i], config['model']['labels'], obj_thresh)   
 88 | 
 89 |                         # show the video with detection bounding boxes          
 90 |                         if show_window: cv2.imshow('video with bboxes', images[i])  
 91 | 
 92 |                         # write result to the output video
 93 |                         video_writer.write(images[i]) 
 94 |                     images = []
 95 |                 if show_window and cv2.waitKey(1) == 27: break  # esc to quit
 96 | 
 97 |         if show_window: cv2.destroyAllWindows()
 98 |         video_reader.release()
 99 |         video_writer.release()       
100 |     else: # do detection on an image or a set of images
101 |         image_paths = []
102 | 
103 |         if os.path.isdir(input_path): 
104 |             for inp_file in os.listdir(input_path):
105 |                 image_paths += [input_path + inp_file]
106 |         else:
107 |             image_paths += [input_path]
108 | 
109 |         image_paths = [inp_file for inp_file in image_paths if (inp_file[-4:] in ['.jpg', '.png', 'JPEG'])]
110 | 
111 |         # the main loop
112 |         for image_path in image_paths:
113 |             image = cv2.imread(image_path)
114 |             print(image_path)
115 | 
116 |             # predict the bounding boxes
117 |             boxes = get_yolo_boxes(infer_model, [image], net_h, net_w, config['model']['anchors'], obj_thresh, nms_thresh)[0]
118 | 
119 |             # draw bounding boxes on the image using labels
120 |             draw_boxes(image, boxes, config['model']['labels'], obj_thresh) 
121 |      
122 |             # write the image with bounding boxes to file
123 |             cv2.imwrite(output_path + image_path.split('/')[-1], np.uint8(image))         
124 | 
125 | if __name__ == '__main__':
126 |     argparser = argparse.ArgumentParser(description='Predict with a trained yolo model')
127 |     argparser.add_argument('-c', '--conf', help='path to configuration file')
128 |     argparser.add_argument('-i', '--input', help='path to an image, a directory of images, a video, or webcam')    
129 |     argparser.add_argument('-o', '--output', default='output/', help='path to output directory')   
130 |     
131 |     args = argparser.parse_args()
132 |     _main_(args)
133 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | absl-py==0.9.0
 2 | astor==0.8.1
 3 | gast==0.2.2
 4 | google-pasta==0.1.8
 5 | grpcio==1.26.0
 6 | h5py==2.10.0
 7 | Keras==2.3.1
 8 | Keras-Applications==1.0.8
 9 | Keras-Preprocessing==1.1.0
10 | Markdown==3.1.1
11 | numpy==1.18.1
12 | opencv-contrib-python==4.1.2.30
13 | opt-einsum==3.1.0
14 | protobuf==3.11.2
15 | PyYAML==5.3
16 | scipy==1.4.1
17 | six==1.14.0
18 | tensorboard==1.15.0
19 | tensorflow==1.15.0
20 | tensorflow-estimator==1.15.1
21 | termcolor==1.1.0
22 | tqdm==4.41.1
23 | Werkzeug==0.16.0
24 | wrapt==1.11.2


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | 
  3 | import argparse
  4 | import os
  5 | import numpy as np
  6 | import json
  7 | from voc import parse_voc_annotation
  8 | from yolo import create_yolov3_model, dummy_loss
  9 | from generator import BatchGenerator
 10 | from utils.utils import normalize, evaluate, makedirs
 11 | from keras.callbacks import EarlyStopping, ReduceLROnPlateau
 12 | from keras.optimizers import Adam
 13 | from callbacks import CustomModelCheckpoint, CustomTensorBoard
 14 | from utils.multi_gpu_model import multi_gpu_model
 15 | import tensorflow as tf
 16 | import keras
 17 | from keras.models import load_model
 18 | 
 19 | 
 20 | config = tf.compat.v1.ConfigProto(
 21 |     gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.9)
 22 |     # device_count = {'GPU': 1}
 23 | )
 24 | config.gpu_options.allow_growth = True
 25 | session = tf.compat.v1.Session(config=config)
 26 | tf.compat.v1.keras.backend.set_session(session)
 27 | 
 28 | def create_training_instances(
 29 |     train_annot_folder,
 30 |     train_image_folder,
 31 |     train_cache,
 32 |     valid_annot_folder,
 33 |     valid_image_folder,
 34 |     valid_cache,
 35 |     labels,
 36 | ):
 37 |     # parse annotations of the training set
 38 |     train_ints, train_labels = parse_voc_annotation(train_annot_folder, train_image_folder, train_cache, labels)
 39 | 
 40 |     # parse annotations of the validation set, if any, otherwise split the training set
 41 |     if os.path.exists(valid_annot_folder):
 42 |         valid_ints, valid_labels = parse_voc_annotation(valid_annot_folder, valid_image_folder, valid_cache, labels)
 43 |     else:
 44 |         print("valid_annot_folder not exists. Spliting the trainining set.")
 45 | 
 46 |         train_valid_split = int(0.8*len(train_ints))
 47 |         np.random.seed(0)
 48 |         np.random.shuffle(train_ints)
 49 |         np.random.seed()
 50 | 
 51 |         valid_ints = train_ints[train_valid_split:]
 52 |         train_ints = train_ints[:train_valid_split]
 53 | 
 54 |     # compare the seen labels with the given labels in config.json
 55 |     if len(labels) > 0:
 56 |         overlap_labels = set(labels).intersection(set(train_labels.keys()))
 57 | 
 58 |         print('Seen labels: \t'  + str(train_labels) + '\n')
 59 |         print('Given labels: \t' + str(labels))
 60 | 
 61 |         # return None, None, None if some given label is not in the dataset
 62 |         if len(overlap_labels) < len(labels):
 63 |             print('Some labels have no annotations! Please revise the list of labels in the config.json.')
 64 |             return None, None, None
 65 |     else:
 66 |         print('No labels are provided. Train on all seen labels.')
 67 |         print(train_labels)
 68 |         labels = train_labels.keys()
 69 | 
 70 |     max_box_per_image = max([len(inst['object']) for inst in (train_ints + valid_ints)])
 71 | 
 72 |     return train_ints, valid_ints, sorted(labels), max_box_per_image
 73 | 
 74 | def create_callbacks(saved_weights_name, tensorboard_logs, model_to_save):
 75 |     makedirs(tensorboard_logs)
 76 |     
 77 |     early_stop = EarlyStopping(
 78 |         monitor     = 'loss', 
 79 |         min_delta   = 0.01, 
 80 |         patience    = 7, 
 81 |         mode        = 'min', 
 82 |         verbose     = 1
 83 |     )
 84 |     checkpoint = CustomModelCheckpoint(
 85 |         model_to_save   = model_to_save,
 86 |         filepath        = saved_weights_name,# + '{epoch:02d}.h5', 
 87 |         monitor         = 'loss', 
 88 |         verbose         = 1, 
 89 |         save_best_only  = True, 
 90 |         mode            = 'min', 
 91 |         period          = 1
 92 |     )
 93 |     reduce_on_plateau = ReduceLROnPlateau(
 94 |         monitor  = 'loss',
 95 |         factor   = 0.1,
 96 |         patience = 2,
 97 |         verbose  = 1,
 98 |         mode     = 'min',
 99 |         epsilon  = 0.01,
100 |         cooldown = 0,
101 |         min_lr   = 0
102 |     )
103 |     tensorboard = CustomTensorBoard(
104 |         log_dir                = tensorboard_logs,
105 |         write_graph            = True,
106 |         write_images           = True,
107 |     )    
108 |     return [early_stop, checkpoint, reduce_on_plateau, tensorboard]
109 | 
110 | def create_model(
111 |     nb_class, 
112 |     anchors, 
113 |     max_box_per_image, 
114 |     max_grid, batch_size, 
115 |     warmup_batches, 
116 |     ignore_thresh, 
117 |     multi_gpu, 
118 |     saved_weights_name, 
119 |     lr,
120 |     grid_scales,
121 |     obj_scale,
122 |     noobj_scale,
123 |     xywh_scale,
124 |     class_scale  
125 | ):
126 |     if multi_gpu > 1:
127 |         with tf.device('/cpu:0'):
128 |             template_model, infer_model = create_yolov3_model(
129 |                 nb_class            = nb_class, 
130 |                 anchors             = anchors, 
131 |                 max_box_per_image   = max_box_per_image, 
132 |                 max_grid            = max_grid, 
133 |                 batch_size          = batch_size//multi_gpu, 
134 |                 warmup_batches      = warmup_batches,
135 |                 ignore_thresh       = ignore_thresh,
136 |                 grid_scales         = grid_scales,
137 |                 obj_scale           = obj_scale,
138 |                 noobj_scale         = noobj_scale,
139 |                 xywh_scale          = xywh_scale,
140 |                 class_scale         = class_scale
141 |             )
142 |     else:
143 |         template_model, infer_model = create_yolov3_model(
144 |             nb_class            = nb_class, 
145 |             anchors             = anchors, 
146 |             max_box_per_image   = max_box_per_image, 
147 |             max_grid            = max_grid, 
148 |             batch_size          = batch_size, 
149 |             warmup_batches      = warmup_batches,
150 |             ignore_thresh       = ignore_thresh,
151 |             grid_scales         = grid_scales,
152 |             obj_scale           = obj_scale,
153 |             noobj_scale         = noobj_scale,
154 |             xywh_scale          = xywh_scale,
155 |             class_scale         = class_scale
156 |         )  
157 | 
158 |     # load the pretrained weight if exists, otherwise load the backend weight only
159 |     if os.path.exists(saved_weights_name): 
160 |         print("\nLoading pretrained weights.\n")
161 |         template_model.load_weights(saved_weights_name)
162 |     else:
163 |         template_model.load_weights("backend.h5", by_name=True)       
164 | 
165 |     if multi_gpu > 1:
166 |         train_model = multi_gpu_model(template_model, gpus=multi_gpu)
167 |     else:
168 |         train_model = template_model      
169 | 
170 |     optimizer = Adam(lr=lr, clipnorm=0.001)
171 |     train_model.compile(loss=dummy_loss, optimizer=optimizer)             
172 | 
173 |     return train_model, infer_model
174 | 
175 | def _main_(args):
176 |     config_path = args.conf
177 | 
178 |     with open(config_path) as config_buffer:    
179 |         config = json.loads(config_buffer.read())
180 | 
181 |     ###############################
182 |     #   Parse the annotations 
183 |     ###############################
184 |     train_ints, valid_ints, labels, max_box_per_image = create_training_instances(
185 |         config['train']['train_annot_folder'],
186 |         config['train']['train_image_folder'],
187 |         config['train']['cache_name'],
188 |         config['valid']['valid_annot_folder'],
189 |         config['valid']['valid_image_folder'],
190 |         config['valid']['cache_name'],
191 |         config['model']['labels']
192 |     )
193 |     print('\nTraining on: \t' + str(labels) + '\n')
194 | 
195 |     ###############################
196 |     #   Create the generators 
197 |     ###############################    
198 |     train_generator = BatchGenerator(
199 |         instances           = train_ints, 
200 |         anchors             = config['model']['anchors'],   
201 |         labels              = labels,        
202 |         downsample          = 32, # ratio between network input's size and network output's size, 32 for YOLOv3
203 |         max_box_per_image   = max_box_per_image,
204 |         batch_size          = config['train']['batch_size'],
205 |         min_net_size        = config['model']['min_input_size'],
206 |         max_net_size        = config['model']['max_input_size'],   
207 |         shuffle             = True, 
208 |         jitter              = 0.3, 
209 |         norm                = normalize
210 |     )
211 |     
212 |     valid_generator = BatchGenerator(
213 |         instances           = valid_ints, 
214 |         anchors             = config['model']['anchors'],   
215 |         labels              = labels,        
216 |         downsample          = 32, # ratio between network input's size and network output's size, 32 for YOLOv3
217 |         max_box_per_image   = max_box_per_image,
218 |         batch_size          = config['train']['batch_size'],
219 |         min_net_size        = config['model']['min_input_size'],
220 |         max_net_size        = config['model']['max_input_size'],   
221 |         shuffle             = True, 
222 |         jitter              = 0.0, 
223 |         norm                = normalize
224 |     )
225 | 
226 |     ###############################
227 |     #   Create the model 
228 |     ###############################
229 |     if os.path.exists(config['train']['saved_weights_name']): 
230 |         config['train']['warmup_epochs'] = 0
231 |     warmup_batches = config['train']['warmup_epochs'] * (config['train']['train_times']*len(train_generator))   
232 | 
233 |     os.environ['CUDA_VISIBLE_DEVICES'] = config['train']['gpus']
234 |     multi_gpu = len(config['train']['gpus'].split(','))
235 | 
236 |     train_model, infer_model = create_model(
237 |         nb_class            = len(labels), 
238 |         anchors             = config['model']['anchors'], 
239 |         max_box_per_image   = max_box_per_image, 
240 |         max_grid            = [config['model']['max_input_size'], config['model']['max_input_size']], 
241 |         batch_size          = config['train']['batch_size'], 
242 |         warmup_batches      = warmup_batches,
243 |         ignore_thresh       = config['train']['ignore_thresh'],
244 |         multi_gpu           = multi_gpu,
245 |         saved_weights_name  = config['train']['saved_weights_name'],
246 |         lr                  = config['train']['learning_rate'],
247 |         grid_scales         = config['train']['grid_scales'],
248 |         obj_scale           = config['train']['obj_scale'],
249 |         noobj_scale         = config['train']['noobj_scale'],
250 |         xywh_scale          = config['train']['xywh_scale'],
251 |         class_scale         = config['train']['class_scale'],
252 |     )
253 | 
254 |     ###############################
255 |     #   Kick off the training
256 |     ###############################
257 |     callbacks = create_callbacks(config['train']['saved_weights_name'], config['train']['tensorboard_dir'], infer_model)
258 | 
259 |     train_model.fit_generator(
260 |         generator        = train_generator, 
261 |         steps_per_epoch  = len(train_generator) * config['train']['train_times'], 
262 |         epochs           = config['train']['nb_epochs'] + config['train']['warmup_epochs'], 
263 |         verbose          = 2 if config['train']['debug'] else 1,
264 |         callbacks        = callbacks, 
265 |         workers          = 4,
266 |         max_queue_size   = 8
267 |     )
268 | 
269 |     # make a GPU version of infer_model for evaluation
270 |     if multi_gpu > 1:
271 |         infer_model = load_model(config['train']['saved_weights_name'])
272 | 
273 |     ###############################
274 |     #   Run the evaluation
275 |     ###############################   
276 |     # compute mAP for all the classes
277 |     average_precisions = evaluate(infer_model, valid_generator)
278 | 
279 |     # print the score
280 |     for label, average_precision in average_precisions.items():
281 |         print(labels[label] + ': {:.4f}'.format(average_precision))
282 |     print('mAP: {:.4f}'.format(sum(average_precisions.values()) / len(average_precisions)))           
283 | 
284 | if __name__ == '__main__':
285 |     argparser = argparse.ArgumentParser(description='train and evaluate YOLO_v3 model on any dataset')
286 |     argparser.add_argument('-c', '--conf', help='path to configuration file')   
287 | 
288 |     args = argparser.parse_args()
289 |     _main_(args)
290 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/experiencor/keras-yolo3/768c524f277adbfd26c2f44d73cb1826bbaf2d10/utils/__init__.py


--------------------------------------------------------------------------------
/utils/bbox.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | import cv2
 4 | from .colors import get_color
 5 | 
 6 | class BoundBox:
 7 |     def __init__(self, xmin, ymin, xmax, ymax, c = None, classes = None):
 8 |         self.xmin = xmin
 9 |         self.ymin = ymin
10 |         self.xmax = xmax
11 |         self.ymax = ymax
12 |         
13 |         self.c       = c
14 |         self.classes = classes
15 | 
16 |         self.label = -1
17 |         self.score = -1
18 | 
19 |     def get_label(self):
20 |         if self.label == -1:
21 |             self.label = np.argmax(self.classes)
22 |         
23 |         return self.label
24 |     
25 |     def get_score(self):
26 |         if self.score == -1:
27 |             self.score = self.classes[self.get_label()]
28 |             
29 |         return self.score      
30 | 
31 | def _interval_overlap(interval_a, interval_b):
32 |     x1, x2 = interval_a
33 |     x3, x4 = interval_b
34 | 
35 |     if x3 < x1:
36 |         if x4 < x1:
37 |             return 0
38 |         else:
39 |             return min(x2,x4) - x1
40 |     else:
41 |         if x2 < x3:
42 |              return 0
43 |         else:
44 |             return min(x2,x4) - x3    
45 | 
46 | def bbox_iou(box1, box2):
47 |     intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
48 |     intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])  
49 |     
50 |     intersect = intersect_w * intersect_h
51 | 
52 |     w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin
53 |     w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin
54 |     
55 |     union = w1*h1 + w2*h2 - intersect
56 |     
57 |     return float(intersect) / union
58 | 
59 | def draw_boxes(image, boxes, labels, obj_thresh, quiet=True):
60 |     for box in boxes:
61 |         label_str = ''
62 |         label = -1
63 |         
64 |         for i in range(len(labels)):
65 |             if box.classes[i] > obj_thresh:
66 |                 if label_str != '': label_str += ', '
67 |                 label_str += (labels[i] + ' ' + str(round(box.get_score()*100, 2)) + '%')
68 |                 label = i
69 |             if not quiet: print(label_str)
70 |                 
71 |         if label >= 0:
72 |             text_size = cv2.getTextSize(label_str, cv2.FONT_HERSHEY_SIMPLEX, 1.1e-3 * image.shape[0], 5)
73 |             width, height = text_size[0][0], text_size[0][1]
74 |             region = np.array([[box.xmin-3,        box.ymin], 
75 |                                [box.xmin-3,        box.ymin-height-26], 
76 |                                [box.xmin+width+13, box.ymin-height-26], 
77 |                                [box.xmin+width+13, box.ymin]], dtype='int32')  
78 | 
79 |             cv2.rectangle(img=image, pt1=(box.xmin,box.ymin), pt2=(box.xmax,box.ymax), color=get_color(label), thickness=5)
80 |             cv2.fillPoly(img=image, pts=[region], color=get_color(label))
81 |             cv2.putText(img=image, 
82 |                         text=label_str, 
83 |                         org=(box.xmin+13, box.ymin - 13), 
84 |                         fontFace=cv2.FONT_HERSHEY_SIMPLEX, 
85 |                         fontScale=1e-3 * image.shape[0], 
86 |                         color=(0,0,0), 
87 |                         thickness=2)
88 |         
89 |     return image          


--------------------------------------------------------------------------------
/utils/colors.py:
--------------------------------------------------------------------------------
 1 | def get_color(label):
 2 |     """ Return a color from a set of predefined colors. Contains 80 colors in total.
 3 |     code originally from https://github.com/fizyr/keras-retinanet/
 4 |     Args
 5 |         label: The label to get the color for.
 6 |     Returns
 7 |         A list of three values representing a RGB color.
 8 |     """
 9 |     if label < len(colors):
10 |         return colors[label]
11 |     else:
12 |         print('Label {} has no color, returning default.'.format(label))
13 |         return (0, 255, 0)
14 | 
15 | colors = [
16 |     [31  , 0   , 255] ,
17 |     [0   , 159 , 255] ,
18 |     [255 , 95  , 0]   ,
19 |     [255 , 19  , 0]   ,
20 |     [255 , 0   , 0]   ,
21 |     [255 , 38  , 0]   ,
22 |     [0   , 255 , 25]  ,
23 |     [255 , 0   , 133] ,
24 |     [255 , 172 , 0]   ,
25 |     [108 , 0   , 255] ,
26 |     [0   , 82  , 255] ,
27 |     [0   , 255 , 6]   ,
28 |     [255 , 0   , 152] ,
29 |     [223 , 0   , 255] ,
30 |     [12  , 0   , 255] ,
31 |     [0   , 255 , 178] ,
32 |     [108 , 255 , 0]   ,
33 |     [184 , 0   , 255] ,
34 |     [255 , 0   , 76]  ,
35 |     [146 , 255 , 0]   ,
36 |     [51  , 0   , 255] ,
37 |     [0   , 197 , 255] ,
38 |     [255 , 248 , 0]   ,
39 |     [255 , 0   , 19]  ,
40 |     [255 , 0   , 38]  ,
41 |     [89  , 255 , 0]   ,
42 |     [127 , 255 , 0]   ,
43 |     [255 , 153 , 0]   ,
44 |     [0   , 255 , 255] ,
45 |     [0   , 255 , 216] ,
46 |     [0   , 255 , 121] ,
47 |     [255 , 0   , 248] ,
48 |     [70  , 0   , 255] ,
49 |     [0   , 255 , 159] ,
50 |     [0   , 216 , 255] ,
51 |     [0   , 6   , 255] ,
52 |     [0   , 63  , 255] ,
53 |     [31  , 255 , 0]   ,
54 |     [255 , 57  , 0]   ,
55 |     [255 , 0   , 210] ,
56 |     [0   , 255 , 102] ,
57 |     [242 , 255 , 0]   ,
58 |     [255 , 191 , 0]   ,
59 |     [0   , 255 , 63]  ,
60 |     [255 , 0   , 95]  ,
61 |     [146 , 0   , 255] ,
62 |     [184 , 255 , 0]   ,
63 |     [255 , 114 , 0]   ,
64 |     [0   , 255 , 235] ,
65 |     [255 , 229 , 0]   ,
66 |     [0   , 178 , 255] ,
67 |     [255 , 0   , 114] ,
68 |     [255 , 0   , 57]  ,
69 |     [0   , 140 , 255] ,
70 |     [0   , 121 , 255] ,
71 |     [12  , 255 , 0]   ,
72 |     [255 , 210 , 0]   ,
73 |     [0   , 255 , 44]  ,
74 |     [165 , 255 , 0]   ,
75 |     [0   , 25  , 255] ,
76 |     [0   , 255 , 140] ,
77 |     [0   , 101 , 255] ,
78 |     [0   , 255 , 82]  ,
79 |     [223 , 255 , 0]   ,
80 |     [242 , 0   , 255] ,
81 |     [89  , 0   , 255] ,
82 |     [165 , 0   , 255] ,
83 |     [70  , 255 , 0]   ,
84 |     [255 , 0   , 172] ,
85 |     [255 , 76  , 0]   ,
86 |     [203 , 255 , 0]   ,
87 |     [204 , 0   , 255] ,
88 |     [255 , 0   , 229] ,
89 |     [255 , 133 , 0]   ,
90 |     [127 , 0   , 255] ,
91 |     [0   , 235 , 255] ,
92 |     [0   , 255 , 197] ,
93 |     [255 , 0   , 191] ,
94 |     [0   , 44  , 255] ,
95 |     [50  , 255 , 0]
96 | ]
97 | 


--------------------------------------------------------------------------------
/utils/image.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import copy
 4 | 
 5 | def _rand_scale(scale):
 6 |     scale = np.random.uniform(1, scale)
 7 |     return scale if (np.random.randint(2) == 0) else 1./scale;
 8 | 
 9 | def _constrain(min_v, max_v, value):
10 |     if value < min_v: return min_v
11 |     if value > max_v: return max_v
12 |     return value 
13 | 
14 | def random_flip(image, flip):
15 |     if flip == 1: return cv2.flip(image, 1)
16 |     return image
17 | 
18 | def correct_bounding_boxes(boxes, new_w, new_h, net_w, net_h, dx, dy, flip, image_w, image_h):
19 |     boxes = copy.deepcopy(boxes)
20 | 
21 |     # randomize boxes' order
22 |     np.random.shuffle(boxes)
23 | 
24 |     # correct sizes and positions
25 |     sx, sy = float(new_w)/image_w, float(new_h)/image_h
26 |     zero_boxes = []
27 | 
28 |     for i in range(len(boxes)):
29 |         boxes[i]['xmin'] = int(_constrain(0, net_w, boxes[i]['xmin']*sx + dx))
30 |         boxes[i]['xmax'] = int(_constrain(0, net_w, boxes[i]['xmax']*sx + dx))
31 |         boxes[i]['ymin'] = int(_constrain(0, net_h, boxes[i]['ymin']*sy + dy))
32 |         boxes[i]['ymax'] = int(_constrain(0, net_h, boxes[i]['ymax']*sy + dy))
33 | 
34 |         if boxes[i]['xmax'] <= boxes[i]['xmin'] or boxes[i]['ymax'] <= boxes[i]['ymin']:
35 |             zero_boxes += [i]
36 |             continue
37 | 
38 |         if flip == 1:
39 |             swap = boxes[i]['xmin'];
40 |             boxes[i]['xmin'] = net_w - boxes[i]['xmax']
41 |             boxes[i]['xmax'] = net_w - swap
42 | 
43 |     boxes = [boxes[i] for i in range(len(boxes)) if i not in zero_boxes]
44 | 
45 |     return boxes
46 | 
47 | def random_distort_image(image, hue=18, saturation=1.5, exposure=1.5):
48 |     # determine scale factors
49 |     dhue = np.random.uniform(-hue, hue)
50 |     dsat = _rand_scale(saturation);
51 |     dexp = _rand_scale(exposure);     
52 | 
53 |     # convert RGB space to HSV space
54 |     image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV).astype('float')
55 |     
56 |     # change satuation and exposure
57 |     image[:,:,1] *= dsat
58 |     image[:,:,2] *= dexp
59 |     
60 |     # change hue
61 |     image[:,:,0] += dhue
62 |     image[:,:,0] -= (image[:,:,0] > 180)*180
63 |     image[:,:,0] += (image[:,:,0] < 0)  *180
64 |     
65 |     # convert back to RGB from HSV
66 |     return cv2.cvtColor(image.astype('uint8'), cv2.COLOR_HSV2RGB)
67 | 
68 | def apply_random_scale_and_crop(image, new_w, new_h, net_w, net_h, dx, dy):
69 |     im_sized = cv2.resize(image, (new_w, new_h))
70 |     
71 |     if dx > 0: 
72 |         im_sized = np.pad(im_sized, ((0,0), (dx,0), (0,0)), mode='constant', constant_values=127)
73 |     else:
74 |         im_sized = im_sized[:,-dx:,:]
75 |     if (new_w + dx) < net_w:
76 |         im_sized = np.pad(im_sized, ((0,0), (0, net_w - (new_w+dx)), (0,0)), mode='constant', constant_values=127)
77 |                
78 |     if dy > 0: 
79 |         im_sized = np.pad(im_sized, ((dy,0), (0,0), (0,0)), mode='constant', constant_values=127)
80 |     else:
81 |         im_sized = im_sized[-dy:,:,:]
82 |         
83 |     if (new_h + dy) < net_h:
84 |         im_sized = np.pad(im_sized, ((0, net_h - (new_h+dy)), (0,0), (0,0)), mode='constant', constant_values=127)
85 |         
86 |     return im_sized[:net_h, :net_w,:]     


--------------------------------------------------------------------------------
/utils/multi_gpu_model.py:
--------------------------------------------------------------------------------
 1 | from keras.layers import Lambda, concatenate
 2 | from keras.models import Model
 3 | import tensorflow as tf
 4 | 
 5 | def multi_gpu_model(model, gpus):
 6 |     if isinstance(gpus, (list, tuple)):
 7 |         num_gpus = len(gpus)
 8 |         target_gpu_ids = gpus
 9 |     else:
10 |         num_gpus = gpus
11 |         target_gpu_ids = range(num_gpus)
12 | 
13 |     def get_slice(data, i, parts):
14 |         shape = tf.shape(data)
15 |         batch_size = shape[:1]
16 |         input_shape = shape[1:]
17 |         step = batch_size // parts
18 |         if i == num_gpus - 1:
19 |             size = batch_size - step * i
20 |         else:
21 |             size = step
22 |         size = tf.concat([size, input_shape], axis=0)
23 |         stride = tf.concat([step, input_shape * 0], axis=0)
24 |         start = stride * i
25 |         return tf.slice(data, start, size)
26 | 
27 |     all_outputs = []
28 |     for i in range(len(model.outputs)):
29 |         all_outputs.append([])
30 | 
31 |     # Place a copy of the model on each GPU,
32 |     # each getting a slice of the inputs.
33 |     for i, gpu_id in enumerate(target_gpu_ids):
34 |         with tf.device('/gpu:%d' % gpu_id):
35 |             with tf.name_scope('replica_%d' % gpu_id):
36 |                 inputs = []
37 |                 # Retrieve a slice of the input.
38 |                 for x in model.inputs:
39 |                     input_shape = tuple(x.get_shape().as_list())[1:]
40 |                     slice_i = Lambda(get_slice,
41 |                                                      output_shape=input_shape,
42 |                                                      arguments={'i': i,
43 |                                                                             'parts': num_gpus})(x)
44 |                     inputs.append(slice_i)
45 | 
46 |                 # Apply model on slice
47 |                 # (creating a model replica on the target device).
48 |                 outputs = model(inputs)
49 |                 if not isinstance(outputs, list):
50 |                     outputs = [outputs]
51 | 
52 |                 # Save the outputs for merging back together later.
53 |                 for o in range(len(outputs)):
54 |                     all_outputs[o].append(outputs[o])
55 | 
56 |     # Merge outputs on CPU.
57 |     with tf.device('/cpu:0'):
58 |         merged = []
59 |         for name, outputs in zip(model.output_names, all_outputs):
60 |             merged.append(concatenate(outputs,
61 |                                                                 axis=0, name=name))
62 |         return Model(model.inputs, merged)


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import os
  4 | from .bbox import BoundBox, bbox_iou
  5 | from scipy.special import expit
  6 | 
  7 | def _sigmoid(x):
  8 |     return expit(x)
  9 | 
 10 | def makedirs(path):
 11 |     try:
 12 |         os.makedirs(path)
 13 |     except OSError:
 14 |         if not os.path.isdir(path):
 15 |             raise
 16 | 
 17 | def evaluate(model, 
 18 |              generator, 
 19 |              iou_threshold=0.5,
 20 |              obj_thresh=0.5,
 21 |              nms_thresh=0.45,
 22 |              net_h=416,
 23 |              net_w=416,
 24 |              save_path=None):
 25 |     """ Evaluate a given dataset using a given model.
 26 |     code originally from https://github.com/fizyr/keras-retinanet
 27 | 
 28 |     # Arguments
 29 |         model           : The model to evaluate.
 30 |         generator       : The generator that represents the dataset to evaluate.
 31 |         iou_threshold   : The threshold used to consider when a detection is positive or negative.
 32 |         obj_thresh      : The threshold used to distinguish between object and non-object
 33 |         nms_thresh      : The threshold used to determine whether two detections are duplicates
 34 |         net_h           : The height of the input image to the model, higher value results in better accuracy
 35 |         net_w           : The width of the input image to the model
 36 |         save_path       : The path to save images with visualized detections to.
 37 |     # Returns
 38 |         A dict mapping class names to mAP scores.
 39 |     """    
 40 |     # gather all detections and annotations
 41 |     all_detections     = [[None for i in range(generator.num_classes())] for j in range(generator.size())]
 42 |     all_annotations    = [[None for i in range(generator.num_classes())] for j in range(generator.size())]
 43 | 
 44 |     for i in range(generator.size()):
 45 |         raw_image = [generator.load_image(i)]
 46 | 
 47 |         # make the boxes and the labels
 48 |         pred_boxes = get_yolo_boxes(model, raw_image, net_h, net_w, generator.get_anchors(), obj_thresh, nms_thresh)[0]
 49 | 
 50 |         score = np.array([box.get_score() for box in pred_boxes])
 51 |         pred_labels = np.array([box.label for box in pred_boxes])        
 52 |         
 53 |         if len(pred_boxes) > 0:
 54 |             pred_boxes = np.array([[box.xmin, box.ymin, box.xmax, box.ymax, box.get_score()] for box in pred_boxes]) 
 55 |         else:
 56 |             pred_boxes = np.array([[]])  
 57 |         
 58 |         # sort the boxes and the labels according to scores
 59 |         score_sort = np.argsort(-score)
 60 |         pred_labels = pred_labels[score_sort]
 61 |         pred_boxes  = pred_boxes[score_sort]
 62 |         
 63 |         # copy detections to all_detections
 64 |         for label in range(generator.num_classes()):
 65 |             all_detections[i][label] = pred_boxes[pred_labels == label, :]
 66 | 
 67 |         annotations = generator.load_annotation(i)
 68 |         
 69 |         # copy detections to all_annotations
 70 |         for label in range(generator.num_classes()):
 71 |             all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy()
 72 | 
 73 |     # compute mAP by comparing all detections and all annotations
 74 |     average_precisions = {}
 75 |     
 76 |     for label in range(generator.num_classes()):
 77 |         false_positives = np.zeros((0,))
 78 |         true_positives  = np.zeros((0,))
 79 |         scores          = np.zeros((0,))
 80 |         num_annotations = 0.0
 81 | 
 82 |         for i in range(generator.size()):
 83 |             detections           = all_detections[i][label]
 84 |             annotations          = all_annotations[i][label]
 85 |             num_annotations     += annotations.shape[0]
 86 |             detected_annotations = []
 87 | 
 88 |             for d in detections:
 89 |                 scores = np.append(scores, d[4])
 90 | 
 91 |                 if annotations.shape[0] == 0:
 92 |                     false_positives = np.append(false_positives, 1)
 93 |                     true_positives  = np.append(true_positives, 0)
 94 |                     continue
 95 | 
 96 |                 overlaps            = compute_overlap(np.expand_dims(d, axis=0), annotations)
 97 |                 assigned_annotation = np.argmax(overlaps, axis=1)
 98 |                 max_overlap         = overlaps[0, assigned_annotation]
 99 | 
100 |                 if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations:
101 |                     false_positives = np.append(false_positives, 0)
102 |                     true_positives  = np.append(true_positives, 1)
103 |                     detected_annotations.append(assigned_annotation)
104 |                 else:
105 |                     false_positives = np.append(false_positives, 1)
106 |                     true_positives  = np.append(true_positives, 0)
107 | 
108 |         # no annotations -> AP for this class is 0 (is this correct?)
109 |         if num_annotations == 0:
110 |             average_precisions[label] = 0
111 |             continue
112 | 
113 |         # sort by score
114 |         indices         = np.argsort(-scores)
115 |         false_positives = false_positives[indices]
116 |         true_positives  = true_positives[indices]
117 | 
118 |         # compute false positives and true positives
119 |         false_positives = np.cumsum(false_positives)
120 |         true_positives  = np.cumsum(true_positives)
121 | 
122 |         # compute recall and precision
123 |         recall    = true_positives / num_annotations
124 |         precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)
125 | 
126 |         # compute average precision
127 |         average_precision  = compute_ap(recall, precision)  
128 |         average_precisions[label] = average_precision
129 | 
130 |     return average_precisions    
131 | 
132 | def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
133 |     if (float(net_w)/image_w) < (float(net_h)/image_h):
134 |         new_w = net_w
135 |         new_h = (image_h*net_w)/image_w
136 |     else:
137 |         new_h = net_w
138 |         new_w = (image_w*net_h)/image_h
139 |         
140 |     for i in range(len(boxes)):
141 |         x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w
142 |         y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h
143 |         
144 |         boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
145 |         boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
146 |         boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
147 |         boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)
148 |         
149 | def do_nms(boxes, nms_thresh):
150 |     if len(boxes) > 0:
151 |         nb_class = len(boxes[0].classes)
152 |     else:
153 |         return
154 |         
155 |     for c in range(nb_class):
156 |         sorted_indices = np.argsort([-box.classes[c] for box in boxes])
157 | 
158 |         for i in range(len(sorted_indices)):
159 |             index_i = sorted_indices[i]
160 | 
161 |             if boxes[index_i].classes[c] == 0: continue
162 | 
163 |             for j in range(i+1, len(sorted_indices)):
164 |                 index_j = sorted_indices[j]
165 | 
166 |                 if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
167 |                     boxes[index_j].classes[c] = 0
168 | 
169 | def decode_netout(netout, anchors, obj_thresh, net_h, net_w):
170 |     grid_h, grid_w = netout.shape[:2]
171 |     nb_box = 3
172 |     netout = netout.reshape((grid_h, grid_w, nb_box, -1))
173 |     nb_class = netout.shape[-1] - 5
174 | 
175 |     boxes = []
176 | 
177 |     netout[..., :2]  = _sigmoid(netout[..., :2])
178 |     netout[..., 4]   = _sigmoid(netout[..., 4])
179 |     netout[..., 5:]  = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])
180 |     netout[..., 5:] *= netout[..., 5:] > obj_thresh
181 | 
182 |     for i in range(grid_h*grid_w):
183 |         row = i // grid_w
184 |         col = i % grid_w
185 |         
186 |         for b in range(nb_box):
187 |             # 4th element is objectness score
188 |             objectness = netout[row, col, b, 4]
189 |             
190 |             if(objectness <= obj_thresh): continue
191 |             
192 |             # first 4 elements are x, y, w, and h
193 |             x, y, w, h = netout[row,col,b,:4]
194 | 
195 |             x = (col + x) / grid_w # center position, unit: image width
196 |             y = (row + y) / grid_h # center position, unit: image height
197 |             w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width
198 |             h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height  
199 |             
200 |             # last elements are class probabilities
201 |             classes = netout[row,col,b,5:]
202 |             
203 |             box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)
204 | 
205 |             boxes.append(box)
206 | 
207 |     return boxes
208 | 
209 | def preprocess_input(image, net_h, net_w):
210 |     new_h, new_w, _ = image.shape
211 | 
212 |     # determine the new size of the image
213 |     if (float(net_w)/new_w) < (float(net_h)/new_h):
214 |         new_h = (new_h * net_w)//new_w
215 |         new_w = net_w
216 |     else:
217 |         new_w = (new_w * net_h)//new_h
218 |         new_h = net_h
219 | 
220 |     # resize the image to the new size
221 |     resized = cv2.resize(image[:,:,::-1]/255., (new_w, new_h))
222 | 
223 |     # embed the image into the standard letter box
224 |     new_image = np.ones((net_h, net_w, 3)) * 0.5
225 |     new_image[(net_h-new_h)//2:(net_h+new_h)//2, (net_w-new_w)//2:(net_w+new_w)//2, :] = resized
226 |     new_image = np.expand_dims(new_image, 0)
227 | 
228 |     return new_image
229 | 
230 | def normalize(image):
231 |     return image/255.
232 |        
233 | def get_yolo_boxes(model, images, net_h, net_w, anchors, obj_thresh, nms_thresh):
234 |     image_h, image_w, _ = images[0].shape
235 |     nb_images           = len(images)
236 |     batch_input         = np.zeros((nb_images, net_h, net_w, 3))
237 | 
238 |     # preprocess the input
239 |     for i in range(nb_images):
240 |         batch_input[i] = preprocess_input(images[i], net_h, net_w)        
241 | 
242 |     # run the prediction
243 |     batch_output = model.predict_on_batch(batch_input)
244 |     batch_boxes  = [None]*nb_images
245 | 
246 |     for i in range(nb_images):
247 |         yolos = [batch_output[0][i], batch_output[1][i], batch_output[2][i]]
248 |         boxes = []
249 | 
250 |         # decode the output of the network
251 |         for j in range(len(yolos)):
252 |             yolo_anchors = anchors[(2-j)*6:(3-j)*6] # config['model']['anchors']
253 |             boxes += decode_netout(yolos[j], yolo_anchors, obj_thresh, net_h, net_w)
254 | 
255 |         # correct the sizes of the bounding boxes
256 |         correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w)
257 | 
258 |         # suppress non-maximal boxes
259 |         do_nms(boxes, nms_thresh)        
260 |            
261 |         batch_boxes[i] = boxes
262 | 
263 |     return batch_boxes        
264 | 
265 | def compute_overlap(a, b):
266 |     """
267 |     Code originally from https://github.com/rbgirshick/py-faster-rcnn.
268 |     Parameters
269 |     ----------
270 |     a: (N, 4) ndarray of float
271 |     b: (K, 4) ndarray of float
272 |     Returns
273 |     -------
274 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
275 |     """
276 |     area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
277 | 
278 |     iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0])
279 |     ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1])
280 | 
281 |     iw = np.maximum(iw, 0)
282 |     ih = np.maximum(ih, 0)
283 | 
284 |     ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih
285 | 
286 |     ua = np.maximum(ua, np.finfo(float).eps)
287 | 
288 |     intersection = iw * ih
289 | 
290 |     return intersection / ua  
291 |     
292 | def compute_ap(recall, precision):
293 |     """ Compute the average precision, given the recall and precision curves.
294 |     Code originally from https://github.com/rbgirshick/py-faster-rcnn.
295 | 
296 |     # Arguments
297 |         recall:    The recall curve (list).
298 |         precision: The precision curve (list).
299 |     # Returns
300 |         The average precision as computed in py-faster-rcnn.
301 |     """
302 |     # correct AP calculation
303 |     # first append sentinel values at the end
304 |     mrec = np.concatenate(([0.], recall, [1.]))
305 |     mpre = np.concatenate(([0.], precision, [0.]))
306 | 
307 |     # compute the precision envelope
308 |     for i in range(mpre.size - 1, 0, -1):
309 |         mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
310 | 
311 |     # to calculate area under PR curve, look for points
312 |     # where X axis (recall) changes value
313 |     i = np.where(mrec[1:] != mrec[:-1])[0]
314 | 
315 |     # and sum (\Delta recall) * prec
316 |     ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
317 |     return ap     
318 | 
319 | def _softmax(x, axis=-1):
320 |     x = x - np.amax(x, axis, keepdims=True)
321 |     e_x = np.exp(x)
322 |     
323 |     return e_x / e_x.sum(axis, keepdims=True)
324 | 


--------------------------------------------------------------------------------
/voc.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | import xml.etree.ElementTree as ET
 4 | import pickle
 5 | 
 6 | def parse_voc_annotation(ann_dir, img_dir, cache_name, labels=[]):
 7 |     if os.path.exists(cache_name):
 8 |         with open(cache_name, 'rb') as handle:
 9 |             cache = pickle.load(handle)
10 |         all_insts, seen_labels = cache['all_insts'], cache['seen_labels']
11 |     else:
12 |         all_insts = []
13 |         seen_labels = {}
14 |         
15 |         for ann in sorted(os.listdir(ann_dir)):
16 |             img = {'object':[]}
17 | 
18 |             try:
19 |                 tree = ET.parse(ann_dir + ann)
20 |             except Exception as e:
21 |                 print(e)
22 |                 print('Ignore this bad annotation: ' + ann_dir + ann)
23 |                 continue
24 |             
25 |             for elem in tree.iter():
26 |                 if 'filename' in elem.tag:
27 |                     img['filename'] = img_dir + elem.text
28 |                 if 'width' in elem.tag:
29 |                     img['width'] = int(elem.text)
30 |                 if 'height' in elem.tag:
31 |                     img['height'] = int(elem.text)
32 |                 if 'object' in elem.tag or 'part' in elem.tag:
33 |                     obj = {}
34 |                     
35 |                     for attr in list(elem):
36 |                         if 'name' in attr.tag:
37 |                             obj['name'] = attr.text
38 | 
39 |                             if obj['name'] in seen_labels:
40 |                                 seen_labels[obj['name']] += 1
41 |                             else:
42 |                                 seen_labels[obj['name']] = 1
43 |                             
44 |                             if len(labels) > 0 and obj['name'] not in labels:
45 |                                 break
46 |                             else:
47 |                                 img['object'] += [obj]
48 |                                 
49 |                         if 'bndbox' in attr.tag:
50 |                             for dim in list(attr):
51 |                                 if 'xmin' in dim.tag:
52 |                                     obj['xmin'] = int(round(float(dim.text)))
53 |                                 if 'ymin' in dim.tag:
54 |                                     obj['ymin'] = int(round(float(dim.text)))
55 |                                 if 'xmax' in dim.tag:
56 |                                     obj['xmax'] = int(round(float(dim.text)))
57 |                                 if 'ymax' in dim.tag:
58 |                                     obj['ymax'] = int(round(float(dim.text)))
59 | 
60 |             if len(img['object']) > 0:
61 |                 all_insts += [img]
62 | 
63 |         cache = {'all_insts': all_insts, 'seen_labels': seen_labels}
64 |         with open(cache_name, 'wb') as handle:
65 |             pickle.dump(cache, handle, protocol=pickle.HIGHEST_PROTOCOL)    
66 |                         
67 |     return all_insts, seen_labels


--------------------------------------------------------------------------------
/yolo.py:
--------------------------------------------------------------------------------
  1 | from keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D, Lambda
  2 | from keras.layers.merge import add, concatenate
  3 | from keras.models import Model
  4 | from keras.engine.topology import Layer
  5 | import tensorflow as tf
  6 | 
  7 | debug = False
  8 | 
  9 | class YoloLayer(Layer):
 10 |     def __init__(self, anchors, max_grid, batch_size, warmup_batches, ignore_thresh, 
 11 |                     grid_scale, obj_scale, noobj_scale, xywh_scale, class_scale, 
 12 |                     **kwargs):
 13 |         # make the model settings persistent
 14 |         self.ignore_thresh  = ignore_thresh
 15 |         self.warmup_batches = warmup_batches
 16 |         self.anchors        = tf.constant(anchors, dtype='float', shape=[1,1,1,3,2])
 17 |         self.grid_scale     = grid_scale
 18 |         self.obj_scale      = obj_scale
 19 |         self.noobj_scale    = noobj_scale
 20 |         self.xywh_scale     = xywh_scale
 21 |         self.class_scale    = class_scale        
 22 | 
 23 |         # make a persistent mesh grid
 24 |         max_grid_h, max_grid_w = max_grid
 25 | 
 26 |         cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(max_grid_w), [max_grid_h]), (1, max_grid_h, max_grid_w, 1, 1)))
 27 |         cell_y = tf.transpose(cell_x, (0,2,1,3,4))
 28 |         self.cell_grid = tf.tile(tf.concat([cell_x,cell_y],-1), [batch_size, 1, 1, 3, 1])
 29 | 
 30 |         super(YoloLayer, self).__init__(**kwargs)
 31 | 
 32 |     def build(self, input_shape):
 33 |         super(YoloLayer, self).build(input_shape)  # Be sure to call this somewhere!
 34 | 
 35 |     def call(self, x):
 36 |         input_image, y_pred, y_true, true_boxes = x
 37 | 
 38 |         # adjust the shape of the y_predict [batch, grid_h, grid_w, 3, 4+1+nb_class]
 39 |         y_pred = tf.reshape(y_pred, tf.concat([tf.shape(y_pred)[:3], tf.constant([3, -1])], axis=0))
 40 |         
 41 |         # initialize the masks
 42 |         object_mask     = tf.expand_dims(y_true[..., 4], 4)
 43 | 
 44 |         # the variable to keep track of number of batches processed
 45 |         batch_seen = tf.Variable(0.)        
 46 | 
 47 |         # compute grid factor and net factor
 48 |         grid_h      = tf.shape(y_true)[1]
 49 |         grid_w      = tf.shape(y_true)[2]
 50 |         grid_factor = tf.reshape(tf.cast([grid_w, grid_h], tf.float32), [1,1,1,1,2])
 51 | 
 52 |         net_h       = tf.shape(input_image)[1]
 53 |         net_w       = tf.shape(input_image)[2]            
 54 |         net_factor  = tf.reshape(tf.cast([net_w, net_h], tf.float32), [1,1,1,1,2])
 55 |         
 56 |         """
 57 |         Adjust prediction
 58 |         """
 59 |         pred_box_xy    = (self.cell_grid[:,:grid_h,:grid_w,:,:] + tf.sigmoid(y_pred[..., :2]))  # sigma(t_xy) + c_xy
 60 |         pred_box_wh    = y_pred[..., 2:4]                                                       # t_wh
 61 |         pred_box_conf  = tf.expand_dims(tf.sigmoid(y_pred[..., 4]), 4)                          # adjust confidence
 62 |         pred_box_class = y_pred[..., 5:]                                                        # adjust class probabilities      
 63 | 
 64 |         """
 65 |         Adjust ground truth
 66 |         """
 67 |         true_box_xy    = y_true[..., 0:2] # (sigma(t_xy) + c_xy)
 68 |         true_box_wh    = y_true[..., 2:4] # t_wh
 69 |         true_box_conf  = tf.expand_dims(y_true[..., 4], 4)
 70 |         true_box_class = tf.argmax(y_true[..., 5:], -1)         
 71 | 
 72 |         """
 73 |         Compare each predicted box to all true boxes
 74 |         """        
 75 |         # initially, drag all objectness of all boxes to 0
 76 |         conf_delta  = pred_box_conf - 0 
 77 | 
 78 |         # then, ignore the boxes which have good overlap with some true box
 79 |         true_xy = true_boxes[..., 0:2] / grid_factor
 80 |         true_wh = true_boxes[..., 2:4] / net_factor
 81 |         
 82 |         true_wh_half = true_wh / 2.
 83 |         true_mins    = true_xy - true_wh_half
 84 |         true_maxes   = true_xy + true_wh_half
 85 |         
 86 |         pred_xy = tf.expand_dims(pred_box_xy / grid_factor, 4)
 87 |         pred_wh = tf.expand_dims(tf.exp(pred_box_wh) * self.anchors / net_factor, 4)
 88 |         
 89 |         pred_wh_half = pred_wh / 2.
 90 |         pred_mins    = pred_xy - pred_wh_half
 91 |         pred_maxes   = pred_xy + pred_wh_half    
 92 | 
 93 |         intersect_mins  = tf.maximum(pred_mins,  true_mins)
 94 |         intersect_maxes = tf.minimum(pred_maxes, true_maxes)
 95 | 
 96 |         intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
 97 |         intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
 98 |         
 99 |         true_areas = true_wh[..., 0] * true_wh[..., 1]
100 |         pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
101 | 
102 |         union_areas = pred_areas + true_areas - intersect_areas
103 |         iou_scores  = tf.truediv(intersect_areas, union_areas)
104 | 
105 |         best_ious   = tf.reduce_max(iou_scores, axis=4)        
106 |         conf_delta *= tf.expand_dims(tf.to_float(best_ious < self.ignore_thresh), 4)
107 | 
108 |         """
109 |         Compute some online statistics
110 |         """            
111 |         true_xy = true_box_xy / grid_factor
112 |         true_wh = tf.exp(true_box_wh) * self.anchors / net_factor
113 | 
114 |         true_wh_half = true_wh / 2.
115 |         true_mins    = true_xy - true_wh_half
116 |         true_maxes   = true_xy + true_wh_half
117 | 
118 |         pred_xy = pred_box_xy / grid_factor
119 |         pred_wh = tf.exp(pred_box_wh) * self.anchors / net_factor 
120 |         
121 |         pred_wh_half = pred_wh / 2.
122 |         pred_mins    = pred_xy - pred_wh_half
123 |         pred_maxes   = pred_xy + pred_wh_half      
124 | 
125 |         intersect_mins  = tf.maximum(pred_mins,  true_mins)
126 |         intersect_maxes = tf.minimum(pred_maxes, true_maxes)
127 |         intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
128 |         intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
129 |         
130 |         true_areas = true_wh[..., 0] * true_wh[..., 1]
131 |         pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
132 | 
133 |         union_areas = pred_areas + true_areas - intersect_areas
134 |         iou_scores  = tf.truediv(intersect_areas, union_areas)
135 |         iou_scores  = object_mask * tf.expand_dims(iou_scores, 4)
136 |         
137 |         count       = tf.reduce_sum(object_mask)
138 |         count_noobj = tf.reduce_sum(1 - object_mask)
139 |         detect_mask = tf.to_float((pred_box_conf*object_mask) >= 0.5)
140 |         class_mask  = tf.expand_dims(tf.to_float(tf.equal(tf.argmax(pred_box_class, -1), true_box_class)), 4)
141 |         recall50    = tf.reduce_sum(tf.to_float(iou_scores >= 0.5 ) * detect_mask  * class_mask) / (count + 1e-3)
142 |         recall75    = tf.reduce_sum(tf.to_float(iou_scores >= 0.75) * detect_mask  * class_mask) / (count + 1e-3)    
143 |         avg_iou     = tf.reduce_sum(iou_scores) / (count + 1e-3)
144 |         avg_obj     = tf.reduce_sum(pred_box_conf  * object_mask)  / (count + 1e-3)
145 |         avg_noobj   = tf.reduce_sum(pred_box_conf  * (1-object_mask))  / (count_noobj + 1e-3)
146 |         avg_cat     = tf.reduce_sum(object_mask * class_mask) / (count + 1e-3) 
147 | 
148 |         """
149 |         Warm-up training
150 |         """
151 |         batch_seen = tf.assign_add(batch_seen, 1.)
152 |         
153 |         true_box_xy, true_box_wh, xywh_mask = tf.cond(tf.less(batch_seen, self.warmup_batches+1), 
154 |                               lambda: [true_box_xy + (0.5 + self.cell_grid[:,:grid_h,:grid_w,:,:]) * (1-object_mask), 
155 |                                        true_box_wh + tf.zeros_like(true_box_wh) * (1-object_mask), 
156 |                                        tf.ones_like(object_mask)],
157 |                               lambda: [true_box_xy, 
158 |                                        true_box_wh,
159 |                                        object_mask])
160 | 
161 |         """
162 |         Compare each true box to all anchor boxes
163 |         """      
164 |         wh_scale = tf.exp(true_box_wh) * self.anchors / net_factor
165 |         wh_scale = tf.expand_dims(2 - wh_scale[..., 0] * wh_scale[..., 1], axis=4) # the smaller the box, the bigger the scale
166 | 
167 |         xy_delta    = xywh_mask   * (pred_box_xy-true_box_xy) * wh_scale * self.xywh_scale
168 |         wh_delta    = xywh_mask   * (pred_box_wh-true_box_wh) * wh_scale * self.xywh_scale
169 |         conf_delta  = object_mask * (pred_box_conf-true_box_conf) * self.obj_scale + (1-object_mask) * conf_delta * self.noobj_scale
170 |         class_delta = object_mask * \
171 |                       tf.expand_dims(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class), 4) * \
172 |                       self.class_scale
173 | 
174 |         loss_xy    = tf.reduce_sum(tf.square(xy_delta),       list(range(1,5)))
175 |         loss_wh    = tf.reduce_sum(tf.square(wh_delta),       list(range(1,5)))
176 |         loss_conf  = tf.reduce_sum(tf.square(conf_delta),     list(range(1,5)))
177 |         loss_class = tf.reduce_sum(class_delta,               list(range(1,5)))
178 | 
179 |         loss = loss_xy + loss_wh + loss_conf + loss_class
180 | 
181 |         if debug:
182 |             loss = tf.Print(loss, [grid_h, avg_obj], message='avg_obj \t\t', summarize=1000)
183 |             loss = tf.Print(loss, [grid_h, avg_noobj], message='avg_noobj \t\t', summarize=1000)
184 |             loss = tf.Print(loss, [grid_h, avg_iou], message='avg_iou \t\t', summarize=1000)
185 |             loss = tf.Print(loss, [grid_h, avg_cat], message='avg_cat \t\t', summarize=1000)
186 |             loss = tf.Print(loss, [grid_h, recall50], message='recall50 \t', summarize=1000)
187 |             loss = tf.Print(loss, [grid_h, recall75], message='recall75 \t', summarize=1000)   
188 |             loss = tf.Print(loss, [grid_h, count], message='count \t', summarize=1000)     
189 |             loss = tf.Print(loss, [grid_h, tf.reduce_sum(loss_xy), 
190 |                                         tf.reduce_sum(loss_wh), 
191 |                                         tf.reduce_sum(loss_conf), 
192 |                                         tf.reduce_sum(loss_class)],  message='loss xy, wh, conf, class: \t',   summarize=1000)   
193 | 
194 | 
195 |         return loss*self.grid_scale
196 | 
197 |     def compute_output_shape(self, input_shape):
198 |         return [(None, 1)]
199 | 
200 | def _conv_block(inp, convs, do_skip=True):
201 |     x = inp
202 |     count = 0
203 |     
204 |     for conv in convs:
205 |         if count == (len(convs) - 2) and do_skip:
206 |             skip_connection = x
207 |         count += 1
208 |         
209 |         if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # unlike tensorflow darknet prefer left and top paddings
210 |         x = Conv2D(conv['filter'], 
211 |                    conv['kernel'], 
212 |                    strides=conv['stride'], 
213 |                    padding='valid' if conv['stride'] > 1 else 'same', # unlike tensorflow darknet prefer left and top paddings
214 |                    name='conv_' + str(conv['layer_idx']), 
215 |                    use_bias=False if conv['bnorm'] else True)(x)
216 |         if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x)
217 |         if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x)
218 | 
219 |     return add([skip_connection, x]) if do_skip else x        
220 | 
221 | def create_yolov3_model(
222 |     nb_class, 
223 |     anchors, 
224 |     max_box_per_image, 
225 |     max_grid, 
226 |     batch_size, 
227 |     warmup_batches,
228 |     ignore_thresh,
229 |     grid_scales,
230 |     obj_scale,
231 |     noobj_scale,
232 |     xywh_scale,
233 |     class_scale
234 | ):
235 |     input_image = Input(shape=(None, None, 3)) # net_h, net_w, 3
236 |     true_boxes  = Input(shape=(1, 1, 1, max_box_per_image, 4))
237 |     true_yolo_1 = Input(shape=(None, None, len(anchors)//6, 4+1+nb_class)) # grid_h, grid_w, nb_anchor, 5+nb_class
238 |     true_yolo_2 = Input(shape=(None, None, len(anchors)//6, 4+1+nb_class)) # grid_h, grid_w, nb_anchor, 5+nb_class
239 |     true_yolo_3 = Input(shape=(None, None, len(anchors)//6, 4+1+nb_class)) # grid_h, grid_w, nb_anchor, 5+nb_class
240 | 
241 |     # Layer  0 => 4
242 |     x = _conv_block(input_image, [{'filter': 32, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 0},
243 |                                   {'filter': 64, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 1},
244 |                                   {'filter': 32, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 2},
245 |                                   {'filter': 64, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 3}])
246 | 
247 |     # Layer  5 => 8
248 |     x = _conv_block(x, [{'filter': 128, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 5},
249 |                         {'filter':  64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 6},
250 |                         {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 7}])
251 | 
252 |     # Layer  9 => 11
253 |     x = _conv_block(x, [{'filter':  64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 9},
254 |                         {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 10}])
255 | 
256 |     # Layer 12 => 15
257 |     x = _conv_block(x, [{'filter': 256, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 12},
258 |                         {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 13},
259 |                         {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 14}])
260 | 
261 |     # Layer 16 => 36
262 |     for i in range(7):
263 |         x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3},
264 |                             {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}])
265 |         
266 |     skip_36 = x
267 |         
268 |     # Layer 37 => 40
269 |     x = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37},
270 |                         {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38},
271 |                         {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 39}])
272 | 
273 |     # Layer 41 => 61
274 |     for i in range(7):
275 |         x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3},
276 |                             {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}])
277 |         
278 |     skip_61 = x
279 |         
280 |     # Layer 62 => 65
281 |     x = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62},
282 |                         {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63},
283 |                         {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 64}])
284 | 
285 |     # Layer 66 => 74
286 |     for i in range(3):
287 |         x = _conv_block(x, [{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3},
288 |                             {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}])
289 |         
290 |     # Layer 75 => 79
291 |     x = _conv_block(x, [{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75},
292 |                         {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76},
293 |                         {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 77},
294 |                         {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 78},
295 |                         {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 79}], do_skip=False)
296 | 
297 |     # Layer 80 => 82
298 |     pred_yolo_1 = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 80},
299 |                              {'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81}], do_skip=False)
300 |     loss_yolo_1 = YoloLayer(anchors[12:], 
301 |                             [1*num for num in max_grid], 
302 |                             batch_size, 
303 |                             warmup_batches, 
304 |                             ignore_thresh, 
305 |                             grid_scales[0],
306 |                             obj_scale,
307 |                             noobj_scale,
308 |                             xywh_scale,
309 |                             class_scale)([input_image, pred_yolo_1, true_yolo_1, true_boxes])
310 | 
311 |     # Layer 83 => 86
312 |     x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 84}], do_skip=False)
313 |     x = UpSampling2D(2)(x)
314 |     x = concatenate([x, skip_61])
315 | 
316 |     # Layer 87 => 91
317 |     x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 87},
318 |                         {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 88},
319 |                         {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 89},
320 |                         {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 90},
321 |                         {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 91}], do_skip=False)
322 | 
323 |     # Layer 92 => 94
324 |     pred_yolo_2 = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 92},
325 |                              {'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93}], do_skip=False)
326 |     loss_yolo_2 = YoloLayer(anchors[6:12], 
327 |                             [2*num for num in max_grid], 
328 |                             batch_size, 
329 |                             warmup_batches, 
330 |                             ignore_thresh, 
331 |                             grid_scales[1],
332 |                             obj_scale,
333 |                             noobj_scale,
334 |                             xywh_scale,
335 |                             class_scale)([input_image, pred_yolo_2, true_yolo_2, true_boxes])
336 | 
337 |     # Layer 95 => 98
338 |     x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True,   'layer_idx': 96}], do_skip=False)
339 |     x = UpSampling2D(2)(x)
340 |     x = concatenate([x, skip_36])
341 | 
342 |     # Layer 99 => 106
343 |     pred_yolo_3 = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 99},
344 |                              {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 100},
345 |                              {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 101},
346 |                              {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 102},
347 |                              {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 103},
348 |                              {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 104},
349 |                              {'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], do_skip=False)
350 |     loss_yolo_3 = YoloLayer(anchors[:6], 
351 |                             [4*num for num in max_grid], 
352 |                             batch_size, 
353 |                             warmup_batches, 
354 |                             ignore_thresh, 
355 |                             grid_scales[2],
356 |                             obj_scale,
357 |                             noobj_scale,
358 |                             xywh_scale,
359 |                             class_scale)([input_image, pred_yolo_3, true_yolo_3, true_boxes]) 
360 | 
361 |     train_model = Model([input_image, true_boxes, true_yolo_1, true_yolo_2, true_yolo_3], [loss_yolo_1, loss_yolo_2, loss_yolo_3])
362 |     infer_model = Model(input_image, [pred_yolo_1, pred_yolo_2, pred_yolo_3])
363 | 
364 |     return [train_model, infer_model]
365 | 
366 | def dummy_loss(y_true, y_pred):
367 |     return tf.sqrt(tf.reduce_sum(y_pred))


--------------------------------------------------------------------------------
/yolo3_one_file_to_detect_them_all.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import numpy as np
  4 | from keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D
  5 | from keras.layers.merge import add, concatenate
  6 | from keras.models import Model
  7 | import struct
  8 | import cv2
  9 | 
 10 | np.set_printoptions(threshold=np.nan)
 11 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
 12 | os.environ["CUDA_VISIBLE_DEVICES"]="0"
 13 | 
 14 | argparser = argparse.ArgumentParser(
 15 |     description='test yolov3 network with coco weights')
 16 | 
 17 | argparser.add_argument(
 18 |     '-w',
 19 |     '--weights',
 20 |     help='path to weights file')
 21 | 
 22 | argparser.add_argument(
 23 |     '-i',
 24 |     '--image',
 25 |     help='path to image file')
 26 | 
 27 | class WeightReader:
 28 |     def __init__(self, weight_file):
 29 |         with open(weight_file, 'rb') as w_f:
 30 |             major,    = struct.unpack('i', w_f.read(4))
 31 |             minor,    = struct.unpack('i', w_f.read(4))
 32 |             revision, = struct.unpack('i', w_f.read(4))
 33 | 
 34 |             if (major*10 + minor) >= 2 and major < 1000 and minor < 1000:
 35 |                 w_f.read(8)
 36 |             else:
 37 |                 w_f.read(4)
 38 | 
 39 |             transpose = (major > 1000) or (minor > 1000)
 40 |             
 41 |             binary = w_f.read()
 42 | 
 43 |         self.offset = 0
 44 |         self.all_weights = np.frombuffer(binary, dtype='float32')
 45 |         
 46 |     def read_bytes(self, size):
 47 |         self.offset = self.offset + size
 48 |         return self.all_weights[self.offset-size:self.offset]
 49 | 
 50 |     def load_weights(self, model):
 51 |         for i in range(106):
 52 |             try:
 53 |                 conv_layer = model.get_layer('conv_' + str(i))
 54 |                 print("loading weights of convolution #" + str(i))
 55 | 
 56 |                 if i not in [81, 93, 105]:
 57 |                     norm_layer = model.get_layer('bnorm_' + str(i))
 58 | 
 59 |                     size = np.prod(norm_layer.get_weights()[0].shape)
 60 | 
 61 |                     beta  = self.read_bytes(size) # bias
 62 |                     gamma = self.read_bytes(size) # scale
 63 |                     mean  = self.read_bytes(size) # mean
 64 |                     var   = self.read_bytes(size) # variance            
 65 | 
 66 |                     weights = norm_layer.set_weights([gamma, beta, mean, var])  
 67 | 
 68 |                 if len(conv_layer.get_weights()) > 1:
 69 |                     bias   = self.read_bytes(np.prod(conv_layer.get_weights()[1].shape))
 70 |                     kernel = self.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
 71 |                     
 72 |                     kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
 73 |                     kernel = kernel.transpose([2,3,1,0])
 74 |                     conv_layer.set_weights([kernel, bias])
 75 |                 else:
 76 |                     kernel = self.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
 77 |                     kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
 78 |                     kernel = kernel.transpose([2,3,1,0])
 79 |                     conv_layer.set_weights([kernel])
 80 |             except ValueError:
 81 |                 print("no convolution #" + str(i))     
 82 |     
 83 |     def reset(self):
 84 |         self.offset = 0
 85 | 
 86 | class BoundBox:
 87 |     def __init__(self, xmin, ymin, xmax, ymax, objness = None, classes = None):
 88 |         self.xmin = xmin
 89 |         self.ymin = ymin
 90 |         self.xmax = xmax
 91 |         self.ymax = ymax
 92 |         
 93 |         self.objness = objness
 94 |         self.classes = classes
 95 | 
 96 |         self.label = -1
 97 |         self.score = -1
 98 | 
 99 |     def get_label(self):
100 |         if self.label == -1:
101 |             self.label = np.argmax(self.classes)
102 |         
103 |         return self.label
104 |     
105 |     def get_score(self):
106 |         if self.score == -1:
107 |             self.score = self.classes[self.get_label()]
108 |             
109 |         return self.score
110 | 
111 | def _conv_block(inp, convs, skip=True):
112 |     x = inp
113 |     count = 0
114 |     
115 |     for conv in convs:
116 |         if count == (len(convs) - 2) and skip:
117 |             skip_connection = x
118 |         count += 1
119 |         
120 |         if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # peculiar padding as darknet prefer left and top
121 |         x = Conv2D(conv['filter'], 
122 |                    conv['kernel'], 
123 |                    strides=conv['stride'], 
124 |                    padding='valid' if conv['stride'] > 1 else 'same', # peculiar padding as darknet prefer left and top
125 |                    name='conv_' + str(conv['layer_idx']), 
126 |                    use_bias=False if conv['bnorm'] else True)(x)
127 |         if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x)
128 |         if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x)
129 | 
130 |     return add([skip_connection, x]) if skip else x
131 | 
132 | def _interval_overlap(interval_a, interval_b):
133 |     x1, x2 = interval_a
134 |     x3, x4 = interval_b
135 | 
136 |     if x3 < x1:
137 |         if x4 < x1:
138 |             return 0
139 |         else:
140 |             return min(x2,x4) - x1
141 |     else:
142 |         if x2 < x3:
143 |              return 0
144 |         else:
145 |             return min(x2,x4) - x3          
146 | 
147 | def _sigmoid(x):
148 |     return 1. / (1. + np.exp(-x))
149 | 
150 | def bbox_iou(box1, box2):
151 |     intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
152 |     intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
153 |     
154 |     intersect = intersect_w * intersect_h
155 | 
156 |     w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin
157 |     w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin
158 |     
159 |     union = w1*h1 + w2*h2 - intersect
160 |     
161 |     return float(intersect) / union
162 | 
163 | def make_yolov3_model():
164 |     input_image = Input(shape=(None, None, 3))
165 | 
166 |     # Layer  0 => 4
167 |     x = _conv_block(input_image, [{'filter': 32, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 0},
168 |                                   {'filter': 64, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 1},
169 |                                   {'filter': 32, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 2},
170 |                                   {'filter': 64, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 3}])
171 | 
172 |     # Layer  5 => 8
173 |     x = _conv_block(x, [{'filter': 128, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 5},
174 |                         {'filter':  64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 6},
175 |                         {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 7}])
176 | 
177 |     # Layer  9 => 11
178 |     x = _conv_block(x, [{'filter':  64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 9},
179 |                         {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 10}])
180 | 
181 |     # Layer 12 => 15
182 |     x = _conv_block(x, [{'filter': 256, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 12},
183 |                         {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 13},
184 |                         {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 14}])
185 | 
186 |     # Layer 16 => 36
187 |     for i in range(7):
188 |         x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3},
189 |                             {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}])
190 |         
191 |     skip_36 = x
192 |         
193 |     # Layer 37 => 40
194 |     x = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37},
195 |                         {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38},
196 |                         {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 39}])
197 | 
198 |     # Layer 41 => 61
199 |     for i in range(7):
200 |         x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3},
201 |                             {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}])
202 |         
203 |     skip_61 = x
204 |         
205 |     # Layer 62 => 65
206 |     x = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62},
207 |                         {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63},
208 |                         {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 64}])
209 | 
210 |     # Layer 66 => 74
211 |     for i in range(3):
212 |         x = _conv_block(x, [{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3},
213 |                             {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}])
214 |         
215 |     # Layer 75 => 79
216 |     x = _conv_block(x, [{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75},
217 |                         {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76},
218 |                         {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 77},
219 |                         {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 78},
220 |                         {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 79}], skip=False)
221 | 
222 |     # Layer 80 => 82
223 |     yolo_82 = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 80},
224 |                               {'filter':  255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81}], skip=False)
225 | 
226 |     # Layer 83 => 86
227 |     x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 84}], skip=False)
228 |     x = UpSampling2D(2)(x)
229 |     x = concatenate([x, skip_61])
230 | 
231 |     # Layer 87 => 91
232 |     x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 87},
233 |                         {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 88},
234 |                         {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 89},
235 |                         {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 90},
236 |                         {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 91}], skip=False)
237 | 
238 |     # Layer 92 => 94
239 |     yolo_94 = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 92},
240 |                               {'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93}], skip=False)
241 | 
242 |     # Layer 95 => 98
243 |     x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True,   'layer_idx': 96}], skip=False)
244 |     x = UpSampling2D(2)(x)
245 |     x = concatenate([x, skip_36])
246 | 
247 |     # Layer 99 => 106
248 |     yolo_106 = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 99},
249 |                                {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 100},
250 |                                {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 101},
251 |                                {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 102},
252 |                                {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 103},
253 |                                {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 104},
254 |                                {'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], skip=False)
255 | 
256 |     model = Model(input_image, [yolo_82, yolo_94, yolo_106])    
257 |     return model
258 | 
259 | def preprocess_input(image, net_h, net_w):
260 |     new_h, new_w, _ = image.shape
261 | 
262 |     # determine the new size of the image
263 |     if (float(net_w)/new_w) < (float(net_h)/new_h):
264 |         new_h = (new_h * net_w)/new_w
265 |         new_w = net_w
266 |     else:
267 |         new_w = (new_w * net_h)/new_h
268 |         new_h = net_h
269 | 
270 |     # resize the image to the new size
271 |     resized = cv2.resize(image[:,:,::-1]/255., (int(new_w), int(new_h)))
272 | 
273 |     # embed the image into the standard letter box
274 |     new_image = np.ones((net_h, net_w, 3)) * 0.5
275 |     new_image[int((net_h-new_h)//2):int((net_h+new_h)//2), int((net_w-new_w)//2):int((net_w+new_w)//2), :] = resized
276 |     new_image = np.expand_dims(new_image, 0)
277 | 
278 |     return new_image
279 | 
280 | def decode_netout(netout, anchors, obj_thresh, nms_thresh, net_h, net_w):
281 |     grid_h, grid_w = netout.shape[:2]
282 |     nb_box = 3
283 |     netout = netout.reshape((grid_h, grid_w, nb_box, -1))
284 |     nb_class = netout.shape[-1] - 5
285 | 
286 |     boxes = []
287 | 
288 |     netout[..., :2]  = _sigmoid(netout[..., :2])
289 |     netout[..., 4:]  = _sigmoid(netout[..., 4:])
290 |     netout[..., 5:]  = netout[..., 4][..., np.newaxis] * netout[..., 5:]
291 |     netout[..., 5:] *= netout[..., 5:] > obj_thresh
292 | 
293 |     for i in range(grid_h*grid_w):
294 |         row = i / grid_w
295 |         col = i % grid_w
296 |         
297 |         for b in range(nb_box):
298 |             # 4th element is objectness score
299 |             objectness = netout[int(row)][int(col)][b][4]
300 |             #objectness = netout[..., :4]
301 |             
302 |             if(objectness.all() <= obj_thresh): continue
303 |             
304 |             # first 4 elements are x, y, w, and h
305 |             x, y, w, h = netout[int(row)][int(col)][b][:4]
306 | 
307 |             x = (col + x) / grid_w # center position, unit: image width
308 |             y = (row + y) / grid_h # center position, unit: image height
309 |             w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width
310 |             h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height  
311 |             
312 |             # last elements are class probabilities
313 |             classes = netout[int(row)][col][b][5:]
314 |             
315 |             box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)
316 |             #box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, None, classes)
317 | 
318 |             boxes.append(box)
319 | 
320 |     return boxes
321 | 
322 | def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
323 |     if (float(net_w)/image_w) < (float(net_h)/image_h):
324 |         new_w = net_w
325 |         new_h = (image_h*net_w)/image_w
326 |     else:
327 |         new_h = net_w
328 |         new_w = (image_w*net_h)/image_h
329 |         
330 |     for i in range(len(boxes)):
331 |         x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w
332 |         y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h
333 |         
334 |         boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
335 |         boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
336 |         boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
337 |         boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)
338 |         
339 | def do_nms(boxes, nms_thresh):
340 |     if len(boxes) > 0:
341 |         nb_class = len(boxes[0].classes)
342 |     else:
343 |         return
344 |         
345 |     for c in range(nb_class):
346 |         sorted_indices = np.argsort([-box.classes[c] for box in boxes])
347 | 
348 |         for i in range(len(sorted_indices)):
349 |             index_i = sorted_indices[i]
350 | 
351 |             if boxes[index_i].classes[c] == 0: continue
352 | 
353 |             for j in range(i+1, len(sorted_indices)):
354 |                 index_j = sorted_indices[j]
355 | 
356 |                 if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
357 |                     boxes[index_j].classes[c] = 0
358 |                     
359 | def draw_boxes(image, boxes, labels, obj_thresh):
360 |     for box in boxes:
361 |         label_str = ''
362 |         label = -1
363 |         
364 |         for i in range(len(labels)):
365 |             if box.classes[i] > obj_thresh:
366 |                 label_str += labels[i]
367 |                 label = i
368 |                 print(labels[i] + ': ' + str(box.classes[i]*100) + '%')
369 |                 
370 |         if label >= 0:
371 |             cv2.rectangle(image, (box.xmin,box.ymin), (box.xmax,box.ymax), (0,255,0), 3)
372 |             cv2.putText(image, 
373 |                         label_str + ' ' + str(box.get_score()), 
374 |                         (box.xmin, box.ymin - 13), 
375 |                         cv2.FONT_HERSHEY_SIMPLEX, 
376 |                         1e-3 * image.shape[0], 
377 |                         (0,255,0), 2)
378 |         
379 |     return image      
380 | 
381 | def _main_(args):
382 |     weights_path = args.weights
383 |     image_path   = args.image
384 | 
385 |     # set some parameters
386 |     net_h, net_w = 416, 416
387 |     obj_thresh, nms_thresh = 0.5, 0.45
388 |     anchors = [[116,90,  156,198,  373,326],  [30,61, 62,45,  59,119], [10,13,  16,30,  33,23]]
389 |     labels = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", \
390 |               "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", \
391 |               "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", \
392 |               "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", \
393 |               "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", \
394 |               "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", \
395 |               "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", \
396 |               "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", \
397 |               "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", \
398 |               "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]
399 | 
400 |     # make the yolov3 model to predict 80 classes on COCO
401 |     yolov3 = make_yolov3_model()
402 | 
403 |     # load the weights trained on COCO into the model
404 |     weight_reader = WeightReader(weights_path)
405 |     weight_reader.load_weights(yolov3)
406 | 
407 |     # preprocess the image
408 |     image = cv2.imread(image_path)
409 |     image_h, image_w, _ = image.shape
410 |     new_image = preprocess_input(image, net_h, net_w)
411 | 
412 |     # run the prediction
413 |     yolos = yolov3.predict(new_image)
414 |     boxes = []
415 | 
416 |     for i in range(len(yolos)):
417 |         # decode the output of the network
418 |         boxes += decode_netout(yolos[i][0], anchors[i], obj_thresh, nms_thresh, net_h, net_w)
419 | 
420 |     # correct the sizes of the bounding boxes
421 |     correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w)
422 | 
423 |     # suppress non-maximal boxes
424 |     do_nms(boxes, nms_thresh)     
425 | 
426 |     # draw bounding boxes on the image using labels
427 |     draw_boxes(image, boxes, labels, obj_thresh) 
428 |  
429 |     # write the image with bounding boxes to file
430 |     cv2.imwrite(image_path[:-4] + '_detected' + image_path[-4:], (image).astype('uint8')) 
431 | 
432 | if __name__ == '__main__':
433 |     args = argparser.parse_args()
434 |     _main_(args)
435 | 


--------------------------------------------------------------------------------
/zoo/config_kangaroo.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model" : {
 3 |         "min_input_size":       288,
 4 |         "max_input_size":       448,
 5 |         "anchors":              [55,69, 75,234, 133,240, 136,129, 142,363, 203,290, 228,184, 285,359, 341,260],
 6 |         "labels":               ["kangaroo"]
 7 |     },
 8 | 
 9 |     "train": {
10 |         "train_image_folder":   "/home/andy/Desktop/github/kangaroo/images/",
11 |         "train_annot_folder":   "/home/andy/Desktop/github/kangaroo/annots/",
12 |         "cache_name":           "kangaroo_train.pkl",
13 | 
14 |         "train_times":          3,
15 |         "batch_size":           16,
16 |         "learning_rate":        1e-4,
17 |         "nb_epochs":            100,
18 |         "warmup_epochs":        3,
19 |         "ignore_thresh":        0.5,
20 |         "gpus":                 "0,1",
21 | 
22 |         "grid_scales":          [1,1,1],
23 |         "obj_scale":            5,
24 |         "noobj_scale":          1,
25 |         "xywh_scale":           1,
26 |         "class_scale":          1,
27 | 
28 |         "tensorboard_dir":      "log_kangaroo",
29 |         "saved_weights_name":   "kangaroo.h5",
30 |         "debug":                true
31 |     },
32 | 
33 |     "valid": {
34 |         "valid_image_folder":   "",
35 |         "valid_annot_folder":   "",
36 |         "cache_name":           "",
37 | 
38 |         "valid_times":          1
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/zoo/config_license_plates.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model" : {
 3 |         "min_input_size":       416,
 4 |         "max_input_size":       416,
 5 |         "anchors":              [15,6, 18,8, 22,9, 27,11, 32,13, 41,17, 54,21, 66,27, 82,33],
 6 |         "labels":               ["license-plate"]
 7 |     },
 8 | 
 9 |     "train": {
10 |         "train_image_folder":   "dataset/train/images/",
11 |         "train_annot_folder":   "dataset/train/annots/",
12 |         "cache_name":           "license_plate.pkl",
13 | 
14 |         "pretrained_weights":   "pretrained_lp.h5", 
15 | 
16 |         "train_times":          4,
17 |         "batch_size":           16,
18 |         "learning_rate":        1e-4,
19 |         "nb_epochs":            100,
20 |         "warmup_epochs":        3,
21 |         "ignore_thresh":        0.6,
22 |         "gpus":                 "0",
23 | 
24 |         "grid_scales":          [1,1,1],
25 |         "obj_scale":            5,
26 |         "noobj_scale":          1,
27 |         "xywh_scale":           1,
28 |         "class_scale":          1,
29 | 
30 |         "tensorboard_dir":      "logs",
31 |         "saved_weights_name":   "license_plate.h5",
32 |         "debug":                true
33 |     },
34 | 
35 |     "valid": {
36 |         "valid_image_folder":   "dataset/valid/images/",
37 |         "valid_annot_folder":   "dataset/valid/annots/",
38 |         "cache_name":           "valid.pkl",
39 | 
40 |         "valid_times":          1
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/zoo/config_raccoon.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model" : {
 3 |         "min_input_size":       288,
 4 |         "max_input_size":       448,
 5 |         "anchors":              [17,18, 28,24, 36,34, 42,44, 56,51, 72,66, 90,95, 92,154, 139,281],
 6 |         "labels":               ["raccoon"]
 7 |     },
 8 | 
 9 |     "train": {
10 |         "train_image_folder":   "/home/andy/Desktop/github/raccoon_dataset/images/",
11 |         "train_annot_folder":   "/home/andy/Desktop/github/raccoon_dataset/annotations/",
12 |         "cache_name":           "raccoon_train.pkl",
13 | 
14 |         "train_times":          3,
15 |         "batch_size":           16,
16 |         "learning_rate":        1e-4,
17 |         "nb_epochs":            100,
18 |         "warmup_epochs":        3,
19 |         "ignore_thresh":        0.5,
20 |         "gpus":                 "0,1",
21 | 
22 |         "grid_scales":          [1,1,1],
23 |         "obj_scale":            5,
24 |         "noobj_scale":          1,
25 |         "xywh_scale":           1,
26 |         "class_scale":          1,
27 | 
28 |         "tensorboard_dir":      "log_raccoon",
29 |         "saved_weights_name":   "raccoon.h5",
30 |         "debug":                true
31 |     },
32 | 
33 |     "valid": {
34 |         "valid_image_folder":   "",
35 |         "valid_annot_folder":   "",
36 |         "cache_name":           "",
37 | 
38 |         "valid_times":          1
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/zoo/config_rbc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model" : {
 3 |         "min_input_size":       224,
 4 |         "max_input_size":       480,
 5 |         "anchors":              [25,33, 52,94, 56,71, 67,83, 68,98, 73,65, 81,96, 116,134, 147,182],
 6 |         "labels":               ["Platelets", "RBC", "WBC"]
 7 |     },
 8 | 
 9 |     "train": {
10 |         "train_image_folder":   "/home/experiencor/data/BCCD_Dataset/BCCD/JPEGImages/",
11 |         "train_annot_folder":   "/home/experiencor/data/BCCD_Dataset/BCCD/Annotations/",   
12 |         "cache_name":           "rbc_train.pkl",  
13 | 
14 |         "train_times":          3,
15 |         "batch_size":           16,
16 |         "learning_rate":        1e-4,
17 |         "nb_epochs":            100,
18 |         "warmup_epochs":        3,
19 |         "ignore_thresh":        0.5,
20 |         "gpus":                 "0,1",
21 | 
22 |         "grid_scales":          [1,1,1],
23 |         "obj_scale":            5,
24 |         "noobj_scale":          1,
25 |         "xywh_scale":           1,
26 |         "class_scale":          1,
27 | 
28 |         "tensorboard_dir":      "log_rbc",
29 |         "saved_weights_name":   "rbc.h5",
30 |         "debug":                true
31 |     },
32 | 
33 |     "valid": {
34 |         "valid_image_folder":   "",
35 |         "valid_annot_folder":   "",
36 |         "cache_name":           "",
37 | 
38 |         "valid_times":          1
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/zoo/config_voc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model" : {
 3 |         "min_input_size":       224,
 4 |         "max_input_size":       480,
 5 |         "anchors":              [24,34, 46,84, 68,185, 116,286, 122,97, 171,180, 214,327, 326,193, 359,359],
 6 |         "labels":               ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
 7 |     },
 8 | 
 9 |     "train": {
10 |         "train_image_folder":   "/home/experiencor/data/pascal/train/images/",
11 |         "train_annot_folder":   "/home/experiencor/data/pascal/train/annots/",   
12 |         "cache_name":           "voc_train.pkl",  
13 |           
14 |         "train_times":          1,
15 |         "batch_size":           8,
16 |         "learning_rate":        1e-5,
17 |         "nb_epochs":            100,
18 |         "warmup_epochs":        3,
19 |         "ignore_thresh":        0.5,
20 |         "gpus":                 "0",
21 | 
22 |         "grid_scales":          [1,1,1],
23 |         "obj_scale":            5,
24 |         "noobj_scale":          1,
25 |         "xywh_scale":           1,
26 |         "class_scale":          1,
27 | 
28 |         "tensorboard_dir":      "log_voc",
29 |         "saved_weights_name":   "voc.h5",
30 |         "debug":                true
31 |     },
32 | 
33 |     "valid": {
34 |         "valid_image_folder":   "/home/experiencor/data/pascal/valid/images/",
35 |         "valid_annot_folder":   "/home/experiencor/data/pascal/valid/annots/",
36 |         "cache_name":           "voc_valid.pkl",
37 | 
38 |         "valid_times":          1
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------