├── out
    ├── .gitkeep
    ├── out1.png
    ├── out2.png
    ├── out3.png
    ├── out4.png
    ├── out5.png
    ├── out6.png
    ├── out7.png
    ├── out8.png
    ├── out9.png
    ├── out10.png
    ├── out11.png
    ├── out12.png
    ├── out13.png
    ├── out14.png
    ├── out15.png
    └── out16.png
├── dataset
    └── images
    │   └── .gitkeep
├── test_images
    ├── .gitkeep
    ├── test (1).png
    ├── test (10).png
    ├── test (11).png
    ├── test (12).png
    ├── test (13).png
    ├── test (14).png
    ├── test (15).png
    ├── test (16).png
    ├── test (2).png
    ├── test (3).png
    ├── test (4).png
    ├── test (5).png
    ├── test (6).png
    ├── test (7).png
    ├── test (8).png
    └── test (9).png
├── main.py
├── config.json
├── predict.py
├── detect_test_images.py
├── .gitignore
├── preprocessing.py
├── grab_screen.py
├── README.md
├── generate_anchors.py
├── postprocessing.py
├── utils.py
└── yolo.py


/out/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dataset/images/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test_images/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/out/out1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out1.png


--------------------------------------------------------------------------------
/out/out2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out2.png


--------------------------------------------------------------------------------
/out/out3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out3.png


--------------------------------------------------------------------------------
/out/out4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out4.png


--------------------------------------------------------------------------------
/out/out5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out5.png


--------------------------------------------------------------------------------
/out/out6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out6.png


--------------------------------------------------------------------------------
/out/out7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out7.png


--------------------------------------------------------------------------------
/out/out8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out8.png


--------------------------------------------------------------------------------
/out/out9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out9.png


--------------------------------------------------------------------------------
/out/out10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out10.png


--------------------------------------------------------------------------------
/out/out11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out11.png


--------------------------------------------------------------------------------
/out/out12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out12.png


--------------------------------------------------------------------------------
/out/out13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out13.png


--------------------------------------------------------------------------------
/out/out14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out14.png


--------------------------------------------------------------------------------
/out/out15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out15.png


--------------------------------------------------------------------------------
/out/out16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out16.png


--------------------------------------------------------------------------------
/test_images/test (1).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (1).png


--------------------------------------------------------------------------------
/test_images/test (10).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (10).png


--------------------------------------------------------------------------------
/test_images/test (11).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (11).png


--------------------------------------------------------------------------------
/test_images/test (12).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (12).png


--------------------------------------------------------------------------------
/test_images/test (13).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (13).png


--------------------------------------------------------------------------------
/test_images/test (14).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (14).png


--------------------------------------------------------------------------------
/test_images/test (15).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (15).png


--------------------------------------------------------------------------------
/test_images/test (16).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (16).png


--------------------------------------------------------------------------------
/test_images/test (2).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (2).png


--------------------------------------------------------------------------------
/test_images/test (3).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (3).png


--------------------------------------------------------------------------------
/test_images/test (4).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (4).png


--------------------------------------------------------------------------------
/test_images/test (5).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (5).png


--------------------------------------------------------------------------------
/test_images/test (6).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (6).png


--------------------------------------------------------------------------------
/test_images/test (7).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (7).png


--------------------------------------------------------------------------------
/test_images/test (8).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (8).png


--------------------------------------------------------------------------------
/test_images/test (9).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (9).png


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | 
 4 | from yolo import YOLO
 5 | 
 6 | 
 7 | def main(args):
 8 |     config_path = args.conf
 9 | 
10 |     with open(config_path) as config_buffer:
11 |         config = json.loads(config_buffer.read())
12 | 
13 |     yolo = YOLO(config)
14 |     if config['train']['enabled']:
15 |         #yolo.load_weights(config['model']['saved_model_name'])
16 |         yolo.train()
17 |     else:
18 |         yolo.evaluate()
19 | 
20 | 
21 | if __name__ == "__main__":
22 |     arg_parser = argparse.ArgumentParser(
23 |         description='Train and validate autonomous car module')
24 | 
25 |     arg_parser.add_argument(
26 |         '-c',
27 |         '--conf',
28 |         help='path to the configuration file')
29 | 
30 |     main(arg_parser.parse_args())
31 | 


--------------------------------------------------------------------------------
/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model" :
 3 |     {
 4 |         "image_h":               416,
 5 |         "image_w":               416,
 6 |         "anchors":               [0.24,0.79, 0.80,2.12],
 7 |         "max_obj":               5,
 8 |         "grid_h":                13,
 9 |         "grid_w":                13,
10 |         "num_classes":           2,
11 |         "classes":               ["go", "stop"],
12 |         "obj_thresh":            0.20,
13 |         "nms_thresh":            0.01,
14 |         "saved_model_name":      "model.09-2.01.h5",
15 |         "obj_scale":             85,
16 |         "no_obj_scale":          75,
17 |         "coord_scale":           70,
18 |         "class_scale":           70
19 |     },
20 | 
21 |     "plot_model":           true,
22 | 
23 |     "train": {
24 |         "enabled":              false,
25 |         "annot_file_name":      "carla_all.csv",
26 |         "batch_size":           8,
27 |         "learning_rate":        1e-4,
28 |         "nb_epochs":            500,
29 |         "warmup_batches":       0,
30 |         "debug":                true
31 |     }
32 | }


--------------------------------------------------------------------------------
/predict.py:
--------------------------------------------------------------------------------
 1 | from keras.models import load_model
 2 | import os
 3 | import numpy as np
 4 | 
 5 | from yolo import YOLO, dummy_loss
 6 | from preprocessing import load_image_predict
 7 | from postprocessing import decode_netout
 8 | 
 9 | 
10 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
11 | 
12 | 
13 | def get_model(config):
14 |     model = YOLO(
15 |          config =config
16 |     )
17 |     model.load_weights(os.path.join(BASE_DIR, config['model']['saved_model_name']))
18 |     return model
19 | 
20 | 
21 | def get_model_from_file(config):
22 |     path = os.path.join(BASE_DIR, 'checkpoints', config['model']['saved_model_name'])
23 |     model = load_model(path, custom_objects={'custom_loss': dummy_loss})
24 |     return model
25 | 
26 | 
27 | def predict_with_model_from_file(config, model, image_path):
28 |     image = load_image_predict(image_path, config['model']['image_h'], config['model']['image_w'])
29 | 
30 |     dummy_array = np.zeros((1, 1, 1, 1, config['model']['max_obj'], 4))
31 |     netout = model.predict([image, dummy_array])[0]
32 | 
33 |     boxes = decode_netout(netout=netout, anchors=config['model']['anchors'],
34 |                           nb_class=config['model']['num_classes'],
35 |                           obj_threshold=config['model']['obj_thresh'],
36 |                           nms_threshold=config['model']['nms_thresh'])
37 |     return boxes


--------------------------------------------------------------------------------
/detect_test_images.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import argparse
 4 | import cv2
 5 | 
 6 | from postprocessing import draw_boxes
 7 | from predict import predict_with_model_from_file, get_model_from_file
 8 | 
 9 | 
10 | BASE_DIR = os.path.dirname(__file__)
11 | TEST_IMAGES_DIR = os.path.join(BASE_DIR, 'test_images')
12 | OUT_IMAGES_DIR = os.path.join(BASE_DIR, 'out')
13 | 
14 | 
15 | def detect_on_test_images(config):
16 |     model = get_model_from_file(config)
17 | 
18 |     all_images = [f for f in os.listdir(TEST_IMAGES_DIR) if os.path.isfile(os.path.join(TEST_IMAGES_DIR, f))]
19 |     img_num = 1
20 |     for image_name in all_images:
21 |         image_path = os.path.join(TEST_IMAGES_DIR, image_name)
22 | 
23 |         netout = predict_with_model_from_file(config, model, image_path)
24 |         plt_image = draw_boxes(cv2.imread(image_path), netout, config['model']['classes'])
25 | 
26 |         #cv2.imshow('demo', plt_image)
27 |         #cv2.waitKey(0)
28 | 
29 |         cv2.imwrite(os.path.join(OUT_IMAGES_DIR, 'out' + str(img_num) + '.png'), plt_image)
30 |         img_num += 1
31 | 
32 | 
33 | def main(args):
34 |     config_path = args.conf
35 | 
36 |     with open(config_path) as config_buffer:
37 |         config = json.loads(config_buffer.read())
38 | 
39 |     detect_on_test_images(config)
40 | 
41 | 
42 | if __name__ == '__main__':
43 |     argparser = argparse.ArgumentParser()
44 | 
45 |     argparser.add_argument(
46 |         '-c',
47 |         '--conf',
48 |         default='config.json',
49 |         help='Path to configuration file')
50 | 
51 |     args = argparser.parse_args()
52 |     main(args)


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | .idea
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | dataset/images/*
 30 | !dataset/images/.gitkeep
 31 | *.h5
 32 | trainval.json
 33 | checkpoints/*
 34 | 
 35 | # PyInstaller
 36 | #  Usually these files are written by a python script from a template
 37 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 38 | *.manifest
 39 | *.spec
 40 | 
 41 | # Installer logs
 42 | pip-log.txt
 43 | pip-delete-this-directory.txt
 44 | 
 45 | # Unit test / coverage reports
 46 | htmlcov/
 47 | .tox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | 
 66 | # Flask stuff:
 67 | instance/
 68 | .webassets-cache
 69 | 
 70 | # Scrapy stuff:
 71 | .scrapy
 72 | 
 73 | # Sphinx documentation
 74 | docs/_build/
 75 | 
 76 | # PyBuilder
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # pyenv
 83 | .python-version
 84 | 
 85 | # celery beat schedule file
 86 | celerybeat-schedule
 87 | 
 88 | # SageMath parsed files
 89 | *.sage.py
 90 | 
 91 | # Environments
 92 | .env
 93 | .venv
 94 | env/
 95 | venv/
 96 | ENV/
 97 | env.bak/
 98 | venv.bak/
 99 | 
100 | # Spyder project settings
101 | .spyderproject
102 | .spyproject
103 | 
104 | # Rope project settings
105 | .ropeproject
106 | 
107 | # mkdocs documentation
108 | /site
109 | 
110 | # mypy
111 | .mypy_cache/
112 | 


--------------------------------------------------------------------------------
/preprocessing.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import pandas as pd
 3 | import numpy as np
 4 | import os
 5 | from sklearn.preprocessing import LabelEncoder
 6 | 
 7 | 
 8 | def load_image_predict(image_path, image_h, image_w):
 9 |     image = cv2.imread(image_path)
10 |     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
11 |     image = cv2.resize(image, (image_h, image_w))
12 |     image = image/255
13 |     image = np.expand_dims(image, 0)
14 | 
15 |     return image
16 | 
17 | 
18 | def load_carla_data(path, labels):
19 |     le = LabelEncoder()
20 |     le.fit_transform(labels)
21 | 
22 |     data = pd.read_csv(path, delimiter=",", header=None)
23 | 
24 |     dataset = {}
25 | 
26 |     objects_omitted = 0
27 |     red = 0
28 |     green = 0
29 |     for record in data[1:][data.columns[:7]].values:
30 |         tokens = record[5].split(",")
31 | 
32 |         xmin, ymin, xmax, ymax = float(tokens[1].split(":")[1]), float(tokens[2].split(":")[1]),\
33 |                                float(tokens[3].split(":")[1]), float(tokens[4].split(":")[1].replace("}", ""))
34 | 
35 |         #omit small images
36 |         if xmax < 15:
37 |             objects_omitted += 1
38 |             continue
39 | 
40 |         xmax += xmin
41 |         ymax += ymin
42 | 
43 |         if "stop" in record[6]:
44 |             obj_class = "stop"
45 |             red += 1
46 |         else:
47 |             obj_class = "go"
48 |             green += 1
49 | 
50 |         obj = {}
51 |         obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax'], obj['class'] = xmin, ymin, xmax, ymax, obj_class
52 | 
53 |         image_path = record[0]
54 | 
55 |         #image_path = os.path.join("images", image_path)
56 | 
57 |         if image_path in dataset:
58 |             dataset[image_path].append(obj)
59 |         else:
60 |             dataset[image_path] = [obj]
61 | 
62 |     print("Objects omitted", objects_omitted)
63 |     print("Red light: ", red)
64 |     print("Green light: ", green)
65 | 
66 |     instances = []
67 | 
68 |     for key in dataset.keys():
69 |         inst = {}
70 | 
71 |         inst['image_path'] = key
72 |         inst['object'] = dataset[key]
73 | 
74 |         instances.append(inst)
75 | 
76 |     return instances
77 | 
78 | 
79 | def load_image(path):
80 |     img = cv2.imread(os.path.join(path))
81 | 
82 |     if len(img.shape) == 3:
83 |         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
84 | 
85 |     if len(img.shape) == 1:
86 |         img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
87 | 
88 |     return img


--------------------------------------------------------------------------------
/grab_screen.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import json
  3 | import os
  4 | import argparse
  5 | import cv2
  6 | import numpy as np
  7 | import win32gui, win32ui, win32con, win32api
  8 | import tensorflow as tf
  9 | from keras.models import load_model
 10 | from mss import mss
 11 | 
 12 | from yolo import dummy_loss
 13 | from postprocessing import decode_netout, draw_boxes
 14 | 
 15 | 
 16 | BASE_DIR = os.path.dirname(__file__)
 17 | 
 18 | 
 19 | def grab_screen_slower(region=None):
 20 |     hwin = win32gui.GetDesktopWindow()
 21 | 
 22 |     if region:
 23 |         left, top, x2, y2 = region
 24 |         width = x2 - left + 1
 25 |         height = y2 - top + 1
 26 |     else:
 27 |         width = win32api.GetSystemMetrics(win32con.SM_CXVIRTUALSCREEN)
 28 |         height = win32api.GetSystemMetrics(win32con.SM_CYVIRTUALSCREEN)
 29 |         left = win32api.GetSystemMetrics(win32con.SM_XVIRTUALSCREEN)
 30 |         top = win32api.GetSystemMetrics(win32con.SM_YVIRTUALSCREEN)
 31 | 
 32 |     hwindc = win32gui.GetWindowDC(hwin)
 33 |     srcdc = win32ui.CreateDCFromHandle(hwindc)
 34 |     memdc = srcdc.CreateCompatibleDC()
 35 |     bmp = win32ui.CreateBitmap()
 36 |     bmp.CreateCompatibleBitmap(srcdc, width, height)
 37 |     memdc.SelectObject(bmp)
 38 |     memdc.BitBlt((0, 0), (width, height), srcdc, (left, top), win32con.SRCCOPY)
 39 | 
 40 |     signedIntsArray = bmp.GetBitmapBits(True)
 41 |     img = np.fromstring(signedIntsArray, dtype='uint8')
 42 |     img.shape = (height, width, 4)
 43 | 
 44 |     srcdc.DeleteDC()
 45 |     memdc.DeleteDC()
 46 |     win32gui.ReleaseDC(hwin, hwindc)
 47 |     win32gui.DeleteObject(bmp.GetHandle())
 48 | 
 49 | 
 50 |     return cv2.cvtColor(img, cv2.COLOR_BGRA2RGB)
 51 | 
 52 | 
 53 | def grab_and_broadcast_screen(config):
 54 |     paused = False
 55 | 
 56 |     mon = {'top': 10, 'left': 10, 'width': 750, 'height': 680}
 57 |     sct = mss()
 58 | 
 59 |     dummy_array = np.zeros((1, 1, 1, 1, config['model']['max_obj'], 4))
 60 | 
 61 |     model = load_model("checkpoints/traffic-light-model.h5",
 62 |                        custom_objects={'custom_loss': dummy_loss, 'tf': tf})
 63 | 
 64 |     frame_num = 0
 65 |     while (True):
 66 | 
 67 |         if not paused:
 68 |             screen = np.array(sct.grab(mon))
 69 |             screen = cv2.cvtColor(screen, cv2.COLOR_BGR2RGB)
 70 | 
 71 |             plt_image = copy.deepcopy(screen)
 72 | 
 73 |             screen = cv2.resize(screen, (config['model']['image_h'], config['model']['image_w']))
 74 |             screen = screen / 255.
 75 |             screen = np.expand_dims(screen, 0)
 76 | 
 77 |             netout = model.predict([screen, dummy_array])[0]
 78 | 
 79 |             boxes = decode_netout(netout,
 80 |                                   obj_threshold=config['model']['obj_thresh'],
 81 |                                   nms_threshold=config['model']['nms_thresh'],
 82 |                                   anchors=config['model']['anchors'],
 83 |                                   nb_class=config['model']['num_classes'])
 84 | 
 85 |             plt_image = draw_boxes(plt_image, boxes, labels=config['model']['classes'])
 86 |             cv2.imshow('window', cv2.cvtColor(plt_image, cv2.COLOR_BGR2RGB))
 87 | 
 88 |             key = cv2.waitKey(10) & 0xff
 89 |             if key == 27:
 90 |                 cv2.destroyAllWindows()
 91 |                 break
 92 | 
 93 | 
 94 |             #save images when o or O is pressed
 95 |             if key == ord('o') or key == ord('O'):
 96 |                 cv2.imwrite(os.path.join(BASE_DIR, 'out', str(frame_num)+".png"), cv2.cvtColor(plt_image, cv2.COLOR_BGR2RGB))
 97 |                 frame_num += 1
 98 | 
 99 | 
100 | def main(args):
101 |     config_path = args.conf
102 | 
103 |     with open(config_path) as config_buffer:
104 |         config = json.loads(config_buffer.read())
105 | 
106 |     grab_and_broadcast_screen(config)
107 | 
108 | 
109 | if __name__ == '__main__':
110 |     argparser = argparse.ArgumentParser()
111 | 
112 |     argparser.add_argument(
113 |         '-c',
114 |         '--conf',
115 |         default='config.json',
116 |         help='Path to configuration file')
117 | 
118 |     args = argparser.parse_args()
119 |     main(args)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # traffic-light-detection-module
 2 | 
 3 | ⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️
 4 | # IMPORTANT UPDATE
 5 | This repo is deprecated. For a newer and improved model, implemented in PyTorch, please refer to this [repo](https://github.com/filipbasara0/simple-object-detection/blob/main/README.md#carla-traffic-lights).
 6 | 
 7 | ⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️
 8 | 
 9 | 
10 | ![out(2).png](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/out/out2.png)
11 | 
12 | ## About
13 | 
14 | Module for detecting traffic lights in the [CARLA autonomous driving simulator](http://carla.org/)  (version: 0.8.4). <br />
15 | Built upon and inspired by https://github.com/experiencor/keras-yolo2. <br />
16 | Instructions and more traffic light detection examples can be found below. <br />
17 | 
18 | - This module is used along several other [modules](https://github.com/affinis-lab) to implement our version of imitation learning in the CARLA simulator. Results of the [core](https://github.com/affinis-lab/core) module can be found on this [repository](https://github.com/affinis-lab/core)
19 | 
20 | - Model for objection detection is based on tiny yolov2
21 | 
22 | - Training started with yolov2 coco pretrained weights
23 | 
24 | - It was first trained on the LISA traffic light detection dataset (~5800 images), and after that on the dataset collected from the CARLA simulator by myself (~1800 images).
25 | 
26 | ## CARLA dataset and model
27 | 
28 | - Dataset collected by myself in the CARLA simulator can be found [here](https://drive.google.com/drive/folders/1TXkPLWlNgauPhQnKEoPDZsx7Px1MD9n_?usp=sharing), annotations can be found [here](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/dataset/carla_all.csv). 
29 | 
30 | - **Important note** - several images in the dataset are left out of annotations because bounding boxes are too small (too far away). I also filtered (left out) all images that have xmax < 15 when loading the dataset. There is around 70-80 out of ~1800 images that are left out, so it isn't that problematic.
31 | 
32 | - Pretrained model can be found [here](https://drive.google.com/file/d/1FVb6b6axN2WAYePv0_zLyiWDois7PgMZ/view?usp=sharing).
33 | 
34 | 
35 | ## Instructions
36 | - To train: 
37 |   - In the [config](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/config.json) file set _training_ -> _enabled_ to **true**
38 |   - Put your annotations file in the **dataset** folder
39 |   - In the [config](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/config.json) file _set training_ -> _annot_file_name_ to the name of your annotations file
40 |   - Put your images in the **dataset/images** folder
41 |   - If necessary, adjust parameters in [config](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/config.json) according to your problem/dataset
42 |   - run main.py with **-c config.json**
43 |   
44 | - To evaluate:
45 |   - In the [config](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/config.json) file set _training_ -> _enabled_ to **false**
46 |   - Put your annotations file in the **evaluation** folder
47 |   - In the [config](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/config.json) file _set training_ -> _annot_file_name_ to to the name of your annotations file containing images for evaluation
48 |   - Put your images in the **evaluation/images** folder
49 |   - If necessary, adjust parameters in [config](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/config.json) according to your problem/dataset
50 |   - run main.py with **-c config.json**
51 |   
52 | - To generate anchors:
53 |   - run generate_anchors.py with **-c config.json**
54 | 
55 | - Soon to be added:
56 |   - Real time traffic light detecting gifs
57 |   
58 | ## Examples
59 | - Several examples of predictions, more can be found in the [out folder](https://github.com/affinis-lab/traffic-light-detection-module/tree/master/out)
60 | 
61 | ![out(11).png](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/out/out11.png)
62 | ![out(12).png](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/out/out12.png)
63 | ![out(6).png](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/out/out6.png)
64 | ![out(7).png](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/out/out7.png)
65 | ![out(14).png](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/out/out14.png)
66 | ![out(15).png](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/out/out15.png)
67 | ![out(4).png](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/out/out4.png)
68 | 


--------------------------------------------------------------------------------
/generate_anchors.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import argparse
  3 | import numpy as np
  4 | import cv2
  5 | import os
  6 | import json
  7 | 
  8 | from preprocessing import load_carla_data
  9 | 
 10 | 
 11 | argparser = argparse.ArgumentParser()
 12 | 
 13 | argparser.add_argument(
 14 |     '-c',
 15 |     '--conf',
 16 |     default='config.json',
 17 |     help='path to configuration file')
 18 | 
 19 | argparser.add_argument(
 20 |     '-a',
 21 |     '--anchors',
 22 |     default=2,
 23 |     help='number of anchors to use')
 24 | 
 25 | 
 26 | BASE_DIR = os.path.dirname(__file__)
 27 | IMAGES_DIR = os.path.join(BASE_DIR, 'dataset', 'images')
 28 | 
 29 | 
 30 | def IOU(ann, centroids):
 31 |     w, h = ann
 32 |     similarities = []
 33 | 
 34 |     for centroid in centroids:
 35 |         c_w, c_h = centroid
 36 | 
 37 |         if c_w >= w and c_h >= h:
 38 |             similarity = w*h/(c_w*c_h)
 39 |         elif c_w >= w and c_h <= h:
 40 |             similarity = w*c_h/(w*h + (c_w-w)*c_h)
 41 |         elif c_w <= w and c_h >= h:
 42 |             similarity = c_w*h/(w*h + c_w*(c_h-h))
 43 |         else: #means both w,h are bigger than c_w and c_h respectively
 44 |             similarity = (c_w*c_h)/(w*h)
 45 |         similarities.append(similarity) # will become (k,) shape
 46 | 
 47 |     return np.array(similarities)
 48 | 
 49 | 
 50 | def avg_IOU(anns, centroids):
 51 |     n,d = anns.shape
 52 |     sum = 0.
 53 | 
 54 |     for i in range(anns.shape[0]):
 55 |         sum+= max(IOU(anns[i], centroids))
 56 | 
 57 |     return sum/n
 58 | 
 59 | 
 60 | def print_anchors(centroids):
 61 |     anchors = centroids.copy()
 62 | 
 63 |     widths = anchors[:, 0]
 64 |     sorted_indices = np.argsort(widths)
 65 | 
 66 |     r = "anchors: ["
 67 |     for i in sorted_indices[:-1]:
 68 |         r += '%0.2f,%0.2f, ' % (anchors[i,0], anchors[i,1])
 69 | 
 70 |     r += '%0.2f,%0.2f' % (anchors[sorted_indices[-1:],0], anchors[sorted_indices[-1:],1])
 71 |     r += "]"
 72 | 
 73 |     print(r)
 74 | 
 75 | 
 76 | def run_kmeans(ann_dims, anchor_num):
 77 |     ann_num = ann_dims.shape[0]
 78 |     prev_assignments = np.ones(ann_num)*(-1)
 79 |     iterations = 0
 80 |     old_distances = np.zeros((ann_num, anchor_num))
 81 | 
 82 |     indices = [random.randrange(ann_dims.shape[0]) for i in range(anchor_num)]
 83 |     centroids = ann_dims[indices]
 84 |     anchor_dim = ann_dims.shape[1]
 85 | 
 86 |     while True:
 87 |         distances = []
 88 |         iterations += 1
 89 |         for i in range(ann_num):
 90 |             d = 1 - IOU(ann_dims[i], centroids)
 91 |             distances.append(d)
 92 |         distances = np.array(distances) # distances.shape = (ann_num, anchor_num)
 93 | 
 94 |         print("iteration {}: dists = {}".format(iterations, np.sum(np.abs(old_distances-distances))))
 95 | 
 96 |         #assign samples to centroids
 97 |         assignments = np.argmin(distances,axis=1)
 98 | 
 99 |         if (assignments == prev_assignments).all():
100 |             return centroids
101 | 
102 |         #calculate new centroids
103 |         centroid_sums=np.zeros((anchor_num, anchor_dim), np.float)
104 |         for i in range(ann_num):
105 |             centroid_sums[assignments[i]]+=ann_dims[i]
106 |         for j in range(anchor_num):
107 |             centroids[j] = centroid_sums[j]/(np.sum(assignments==j) + 1e-6)
108 | 
109 |         prev_assignments = assignments.copy()
110 |         old_distances = distances.copy()
111 | 
112 | 
113 | def main(args):
114 |     config_path = args.conf
115 |     num_anchors = args.anchors
116 | 
117 |     with open(config_path) as config_buffer:
118 |         config = json.loads(config_buffer.read())
119 | 
120 |     data = load_carla_data(os.path.join(BASE_DIR, 'dataset',config['train']['annot_file_name']), config['model']['classes'])
121 | 
122 |     print(len(data))
123 | 
124 |     np.random.shuffle(data)
125 | 
126 |     grid_w = config['model']['grid_w']
127 |     grid_h = config['model']['grid_h']
128 | 
129 |     # run k_mean to find the anchors
130 |     annotation_dims = []
131 |     for image in data:
132 |         img = cv2.imread(os.path.join(IMAGES_DIR,image['image_path']))
133 |         h, w, c = img.shape
134 |         cell_w = w/grid_w
135 |         cell_h = h/grid_h
136 | 
137 |         for obj in image['object']:
138 |             relative_w = (float(obj['xmax']) - float(obj['xmin']))/cell_w
139 |             relatice_h = (float(obj["ymax"]) - float(obj['ymin']))/cell_h
140 |             annotation_dims.append(tuple(map(float, (relative_w,relatice_h))))
141 | 
142 |     annotation_dims = np.array(annotation_dims)
143 |     centroids = run_kmeans(annotation_dims, num_anchors)
144 | 
145 |     # write anchors to file
146 |     print('\naverage IOU for', num_anchors, 'anchors:', '%0.2f' % avg_IOU(annotation_dims, centroids))
147 |     print_anchors(centroids)
148 | 
149 | 
150 | if __name__ == '__main__':
151 |     args = argparser.parse_args()
152 |     main(args)


--------------------------------------------------------------------------------
/postprocessing.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | 
  4 | 
  5 | def bbox_iou(box1, box2):
  6 |     intersect_w = interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
  7 |     intersect_h = interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
  8 | 
  9 |     intersect = intersect_w * intersect_h
 10 | 
 11 |     w1, h1 = box1.xmax - box1.xmin, box1.ymax - box1.ymin
 12 |     w2, h2 = box2.xmax - box2.xmin, box2.ymax - box2.ymin
 13 | 
 14 |     union = w1 * h1 + w2 * h2 - intersect
 15 | 
 16 |     return float(intersect) / union
 17 | 
 18 | 
 19 | def decode_netout(netout, anchors, nb_class, obj_threshold=0.3, nms_threshold=0.3):
 20 |     grid_h, grid_w, nb_box = netout.shape[:3]
 21 |     #grid_h, grid_w, nb_box = config['models']['traffic_light_module']['grid_h'], GRID_W, BOX
 22 |     boxes = []
 23 | 
 24 |     # decode the output by the network
 25 |     netout[..., 4] = _sigmoid(netout[..., 4])
 26 |     netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])
 27 |     netout[..., 5:] *= netout[..., 5:] > obj_threshold
 28 | 
 29 |     for row in range(grid_h):
 30 |         for col in range(grid_w):
 31 |             for b in range(nb_box):
 32 |                 # from 4th element onwards are confidence and class classes
 33 |                 classes = netout[row, col, b, 5:]
 34 | 
 35 |                 if np.sum(classes) > 0:
 36 |                     # first 4 elements are x, y, w, and h
 37 |                     x, y, w, h = netout[row, col, b, :4]
 38 | 
 39 |                     x = (col + _sigmoid(x)) / grid_w  # center position, unit: image width
 40 |                     y = (row + _sigmoid(y)) / grid_h  # center position, unit: image height
 41 | 
 42 |                     w = anchors[2 * b + 0] * np.exp(w) / grid_w  # unit: image width
 43 |                     h = anchors[2 * b + 1] * np.exp(h) / grid_h  # unit: image height
 44 |                     confidence = netout[row, col, b, 4]
 45 | 
 46 |                     box = BoundBox(x - w / 2, y - h / 2, x + w / 2, y + h / 2, confidence, classes)
 47 | 
 48 |                     boxes.append(box)
 49 | 
 50 |     # suppress non-maximal boxes
 51 |     for c in range(nb_class):
 52 |         sorted_indices = list(reversed(np.argsort([box.classes[c] for box in boxes])))
 53 | 
 54 |         for i in range(len(sorted_indices)):
 55 |             index_i = sorted_indices[i]
 56 | 
 57 |             if boxes[index_i].classes[c] == 0:
 58 |                 continue
 59 |             else:
 60 |                 for j in range(i + 1, len(sorted_indices)):
 61 |                     index_j = sorted_indices[j]
 62 | 
 63 |                     if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_threshold:
 64 |                         boxes[index_j].classes[c] = 0
 65 | 
 66 |     # remove the boxes which are less likely than a obj_threshold
 67 |     boxes = [box for box in boxes if box.get_score() > obj_threshold]
 68 | 
 69 |     boxes = sorted(boxes, key=lambda box: box.get_score(), reverse=True)
 70 |     if len(boxes) > 0: boxes = [boxes[0]]
 71 |     return boxes
 72 | 
 73 | 
 74 | def draw_boxes(image, boxes, labels):
 75 |     image_h, image_w, _ = image.shape
 76 | 
 77 |     for box in boxes:
 78 | 
 79 |         if box.xmin>image_w or box.xmax>image_w or box.ymin>image_h or box.ymax>image_h:
 80 |             continue
 81 | 
 82 |         if (box.xmin<0): box.xmin = 0
 83 |         if (box.ymin < 0): box.ymin = 0
 84 |         if (box.xmax < 0): box.xmax = 0
 85 |         if (box.ymax < 0): box.ymax = 0
 86 | 
 87 |         xmin = int(box.xmin * image_w)
 88 |         ymin = int(box.ymin * image_h)
 89 |         xmax = int(box.xmax * image_w)
 90 |         ymax = int(box.ymax * image_h)
 91 | 
 92 |         label = labels[box.get_label()]
 93 | 
 94 |         r_color = 0
 95 |         g_color = 0
 96 | 
 97 |         if label== "go":
 98 |             g_color = 255
 99 |         else:
100 |             r_color = 255
101 | 
102 |         cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, g_color, r_color), 3)
103 |         cv2.putText(image,
104 |                     label + ' ' + str(round(box.get_score(),4)),
105 |                     (xmin, ymin - 13),
106 |                     cv2.FONT_HERSHEY_SIMPLEX,
107 |                     1e-3 * image_h,
108 |                     (0, g_color, r_color), 2)
109 | 
110 |     return image
111 | 
112 | 
113 | def compute_overlap(a, b):
114 |     """
115 |     Code originally from https://github.com/rbgirshick/py-faster-rcnn.
116 |     Parameters
117 |     ----------
118 |     a: (N, 4) ndarray of float
119 |     b: (K, 4) ndarray of float
120 |     Returns
121 |     -------
122 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
123 |     """
124 |     area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
125 | 
126 |     iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0])
127 |     ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1])
128 | 
129 |     iw = np.maximum(iw, 0)
130 |     ih = np.maximum(ih, 0)
131 | 
132 |     ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih
133 | 
134 |     ua = np.maximum(ua, np.finfo(float).eps)
135 | 
136 |     intersection = iw * ih
137 | 
138 |     return intersection / ua
139 | 
140 | 
141 | def interval_overlap(interval_a, interval_b):
142 |     x1, x2 = interval_a
143 |     x3, x4 = interval_b
144 | 
145 |     if x3 < x1:
146 |         if x4 < x1:
147 |             return 0
148 |         else:
149 |             return min(x2, x4) - x1
150 |     else:
151 |         if x2 < x3:
152 |             return 0
153 |         else:
154 |             return min(x2, x4) - x3
155 | 
156 | 
157 | def compute_ap(recall, precision):
158 |     """ Compute the average precision, given the recall and precision curves.
159 |     Code originally from https://github.com/rbgirshick/py-faster-rcnn.
160 |     # Arguments
161 |         recall:    The recall curve (list).
162 |         precision: The precision curve (list).
163 |     # Returns
164 |         The average precision as computed in py-faster-rcnn.
165 |     """
166 |     # correct AP calculation
167 |     # first append sentinel values at the end
168 |     mrec = np.concatenate(([0.], recall, [1.]))
169 |     mpre = np.concatenate(([0.], precision, [0.]))
170 | 
171 |     # compute the precision envelope
172 |     for i in range(mpre.size - 1, 0, -1):
173 |         mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
174 | 
175 |     # to calculate area under PR curve, look for points
176 |     # where X axis (recall) changes value
177 |     i = np.where(mrec[1:] != mrec[:-1])[0]
178 | 
179 |     # and sum (\Delta recall) * prec
180 |     ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
181 |     return ap
182 | 
183 | 
184 | def _sigmoid(x):
185 |     return 1. / (1. + np.exp(-x))
186 | 
187 | 
188 | def _softmax(x, axis=-1, t=-100.):
189 |     x = x - np.max(x)
190 | 
191 |     if np.min(x) < t:
192 |         x = x / np.min(x) * t
193 | 
194 |     e_x = np.exp(x)
195 | 
196 |     return e_x / e_x.sum(axis, keepdims=True)
197 | 
198 | 
199 | class BoundBox:
200 |     def __init__(self, xmin, ymin, xmax, ymax, c=None, classes=None):
201 |         self.xmin = xmin
202 |         self.ymin = ymin
203 |         self.xmax = xmax
204 |         self.ymax = ymax
205 | 
206 |         self.c = c
207 |         self.classes = classes
208 | 
209 |         self.label = -1
210 |         self.score = -1
211 | 
212 |     def get_label(self):
213 |         if self.label == -1:
214 |             self.label = np.argmax(self.classes)
215 | 
216 |         return self.label
217 | 
218 |     def get_score(self):
219 |         if self.score == -1:
220 |             self.score = self.classes[self.get_label()]
221 | 
222 |         return self.score


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import keras
  3 | import cv2
  4 | import copy
  5 | import os
  6 | from imgaug import augmenters as iaa
  7 | from sklearn.preprocessing import LabelEncoder
  8 | 
  9 | from postprocessing import interval_overlap
 10 | 
 11 | 
 12 | BASE_DIR = os.path.dirname(__file__)
 13 | IMAGES_DIR = os.path.join(BASE_DIR, 'dataset', 'images')
 14 | 
 15 | 
 16 | def bbox_iou(box1, box2):
 17 |     # 0   ,1   ,2   ,3
 18 |     # xmin,ymin,xmax,ymax
 19 |     intersect_w = interval_overlap([box1[0], box1[2]], [box2[0], box2[2]])
 20 |     intersect_h = interval_overlap([box1[1], box1[3]], [box2[1], box2[3]])
 21 | 
 22 |     intersect = intersect_w * intersect_h
 23 | 
 24 |     w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
 25 |     w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
 26 | 
 27 |     union = w1 * h1 + w2 * h2 - intersect
 28 | 
 29 |     return float(intersect) / union
 30 | 
 31 | 
 32 | class BatchGenerator(keras.utils.Sequence):
 33 |     'Generates data for Keras'
 34 |     def __init__(self, config, dataset, shuffle=True, jitter = True):
 35 |         'Initialization'
 36 |         self.config = config
 37 |         self.dataset = dataset
 38 | 
 39 |         self.image_h = config['model']['image_h']
 40 |         self.image_w = config['model']['image_w']
 41 |         self.n_channels = 3
 42 | 
 43 |         self.grid_h = config['model']['grid_h']
 44 |         self.grid_w = config['model']['grid_w']
 45 | 
 46 |         self.n_classes = config['model']['num_classes']
 47 |         self.labels = config['model']['classes']
 48 | 
 49 |         self.batch_size = config['train']['batch_size']
 50 |         self.max_obj = config['model']['max_obj']
 51 | 
 52 |         self.shuffle = shuffle
 53 |         self.jitter = jitter
 54 | 
 55 |         self.nb_anchors = int(len(config['model']['anchors']) / 2)
 56 | 
 57 |         self.anchors = [[0, 0, config['model']['anchors'][2 * i], config['model']['anchors'][2 * i + 1]] for i in
 58 |                         range(int(len(config['model']['anchors']) // 2))]
 59 | 
 60 |         self.on_epoch_end()
 61 | 
 62 |         sometimes = lambda aug: iaa.Sometimes(0.5, aug)
 63 | 
 64 |         self.aug_pipe = iaa.Sequential(
 65 |             [
 66 |                 # apply the following augmenters to most images
 67 |                 # iaa.Fliplr(0.5), # horizontally flip 50% of all images
 68 |                 # iaa.Flipud(0.2), # vertically flip 20% of all images
 69 |                 # sometimes(iaa.Crop(percent=(0, 0.1))), # crop images by 0-10% of their height/width
 70 |                 #sometimes(iaa.Affine(
 71 |                     # scale={"x": (0.8, 1.2), "y": (0.8, 1.2)}, # scale images to 80-120% of their size, individually per axis
 72 |                     # translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}, # translate by -20 to +20 percent (per axis)
 73 |                     # rotate=(-5, 5), # rotate by -45 to +45 degrees
 74 |                     # shear=(-5, 5), # shear by -16 to +16 degrees
 75 |                     # order=[0, 1], # use nearest neighbour or bilinear interpolation (fast)
 76 |                     # cval=(0, 255), # if mode is constant, use a cval between 0 and 255
 77 |                     # mode=ia.ALL # use any of scikit-image's warping modes (see 2nd image from the top for examples)
 78 |                 #)),
 79 |                 # execute 0 to 5 of the following (less important) augmenters per image
 80 |                 # don't execute all of them, as that would often be way too strong
 81 |                 iaa.SomeOf((0, 3),
 82 |                            [
 83 |                                # sometimes(iaa.Superpixels(p_replace=(0, 1.0), n_segments=(20, 200))), # convert images into their superpixel representation
 84 |                                iaa.OneOf([
 85 |                                    iaa.GaussianBlur((0, 3.0)),  # blur images with a sigma between 0 and 3.0
 86 |                                    iaa.AverageBlur(k=(2, 7)),
 87 |                                    # blur image using local means with kernel sizes between 2 and 7
 88 |                                    iaa.MedianBlur(k=(3, 11)),
 89 |                                    # blur image using local medians with kernel sizes between 2 and 7
 90 |                                ]),
 91 |                                iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)),  # sharpen images
 92 |                                # iaa.Emboss(alpha=(0, 1.0), strength=(0, 2.0)), # emboss images
 93 |                                # search either for all edges or for directed edges
 94 |                                # sometimes(iaa.OneOf([
 95 |                                #    iaa.EdgeDetect(alpha=(0, 0.7)),
 96 |                                #    iaa.DirectedEdgeDetect(alpha=(0, 0.7), direction=(0.0, 1.0)),
 97 |                                # ])),
 98 |                                iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05 * 255), per_channel=0.5),
 99 |                                # add gaussian noise to images
100 |                                iaa.OneOf([
101 |                                    iaa.Dropout((0.01, 0.1), per_channel=0.5),  # randomly remove up to 10% of the pixels
102 |                                    # iaa.CoarseDropout((0.03, 0.15), size_percent=(0.02, 0.05), per_channel=0.2),
103 |                                ]),
104 |                                # iaa.Invert(0.05, per_channel=True), # invert color channels
105 |                                iaa.Add((-10, 10), per_channel=0.5),
106 |                                # change brightness of images (by -10 to 10 of original value)
107 |                                iaa.Multiply((0.5, 1.5), per_channel=0.5),
108 |                                # change brightness of images (50-150% of original value)
109 |                                iaa.ContrastNormalization((0.5, 2.0), per_channel=0.5),  # improve or worsen the contrast
110 |                                # iaa.Grayscale(alpha=(0.0, 1.0)),
111 |                                # sometimes(iaa.ElasticTransformation(alpha=(0.5, 3.5), sigma=0.25)), # move pixels locally around (with random strengths)
112 |                                # sometimes(iaa.PiecewiseAffine(scale=(0.01, 0.05))) # sometimes move parts of the image around
113 |                            ],
114 |                            random_order=True
115 |                            )
116 |             ],
117 |             random_order=True
118 |         )
119 | 
120 | 
121 |     def __len__(self):
122 |         'Denotes the number of batches per epoch'
123 |         return int(np.ceil(float(len(self.dataset)) / self.batch_size))
124 | 
125 | 
126 |     def __getitem__(self, index):
127 |         'Generate one batch of data'
128 | 
129 |         '''
130 |         l_bound = index*self.config['BATCH_SIZE']
131 |         r_bound = (index+1)*self.config['BATCH_SIZE']
132 | 
133 |         if r_bound > len(self.images):
134 |             r_bound = len(self.images)
135 |             l_bound = r_bound - self.config['BATCH_SIZE']
136 |         '''
137 | 
138 |         le = LabelEncoder()
139 |         le.fit_transform(self.labels)
140 | 
141 |         x_batch = np.zeros((self.batch_size, self.image_h, self.image_w, self.n_channels))
142 |         b_batch = np.zeros((self.batch_size, 1, 1, 1, self.max_obj, 4))
143 | 
144 |         y_batch = np.zeros((self.batch_size, self.grid_h, self.grid_w, self.nb_anchors, 4 + 1 + self.num_classes()))  # desired network output
145 | 
146 |         #current_batch = self.dataset[l_bound:r_bound]
147 |         current_batch = self.dataset[index * self.batch_size:(index + 1) * self.batch_size]
148 | 
149 |         instance_num = 0
150 | 
151 |         for instance in current_batch:
152 |             img, object_annotations = self.prep_image_and_annot(instance, jitter=self.jitter)
153 | 
154 |             obj_num = 0
155 | 
156 |             # center of the bounding box is divided with the image width/height and grid width/height
157 |             # to get the coordinates relative to a single element of a grid
158 |             for obj in object_annotations:
159 |                 if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj['ymin'] and obj['class'] in self.labels:
160 |                     center_x = .5 * (obj['xmin'] + obj['xmax'])     # center of the lower side of the bb (by x axis)
161 |                     center_x = center_x / (float(self.image_w) / self.grid_w)     # scaled to the grid unit (a value between 0 and GRID_W-1)
162 |                     center_y = .5 * (obj['ymin'] + obj['ymax'])     # center of the lower side (by y axis)
163 |                     center_y = center_y / (float(self.image_h) / self.grid_h)    # scaled to the grid unit (a value between 0 and GRID_H-1)
164 | 
165 |                     grid_x = int(np.floor(center_x))    # assigns the object to the matching
166 |                     grid_y = int(np.floor(center_y))     # grid element according to (center_x, center_y)
167 | 
168 |                     if grid_x < self.grid_w and grid_y < self.grid_h:
169 |                         center_w = (obj['xmax'] - obj['xmin']) / (float(self.image_w) / self.grid_w)
170 |                         center_h = (obj['ymax'] - obj['ymin']) / (float(self.image_h) / self.grid_h)
171 | 
172 |                         box = [center_x, center_y, center_w, center_h]
173 | 
174 |                         # find the anchor that best predicts this box
175 |                         best_anchor = -1
176 |                         max_iou = -1
177 | 
178 |                         shifted_box = [0, 0, center_w, center_h]
179 | 
180 |                         for i in range(len(self.anchors)):
181 |                             anchor = self.anchors[i]
182 |                             iou = bbox_iou(shifted_box, anchor)
183 | 
184 |                             if max_iou < iou:
185 |                                 best_anchor = i
186 |                                 max_iou = iou
187 | 
188 |                         classes = [0, 0]
189 | 
190 |                         obj_label = int(le.transform([obj['class']]))
191 | 
192 |                         if obj_label == 0:
193 |                             classes[0] = 1
194 |                         else:
195 |                             classes[1] = 1
196 | 
197 |                         img = self.normalize(img)
198 | 
199 |                         x_batch[instance_num] = img
200 | 
201 |                         b_batch[instance_num, 0, 0, 0, obj_num] = box
202 |                         y_batch[instance_num, grid_y, grid_x, best_anchor] = [box[0], box[1], box[2], box[3], 1.0, classes[0], classes[1]]
203 | 
204 |                         obj_num += 1
205 |                         obj_num %= self.max_obj
206 | 
207 |             instance_num += 1
208 | 
209 |         return [x_batch, b_batch], y_batch
210 | 
211 | 
212 |     def prep_image_and_annot(self, dataset_instance, jitter):
213 |         image_path = dataset_instance['image_path']
214 |         image = self.load_image(os.path.join(IMAGES_DIR,image_path))
215 | 
216 |         h, w, c = image.shape
217 | 
218 |         if jitter:
219 |             image = self.aug_pipe.augment_image(image)
220 | 
221 |         # resize the image to standard size
222 |         image = cv2.resize(image, (self.image_h, self.image_w))
223 | 
224 |         object_annotations = copy.deepcopy(dataset_instance['object'])
225 |         for obj in object_annotations:
226 |             for attr in ['xmin', 'xmax']:
227 |                 obj[attr] = int(obj[attr] * float(self.image_w) / w)
228 |                 obj[attr] = max(min(obj[attr], self.image_w), 0)
229 | 
230 |             for attr in ['ymin', 'ymax']:
231 |                 obj[attr] = int(obj[attr] * float(self.image_h) / h)
232 |                 obj[attr] = max(min(obj[attr], self.image_h), 0)
233 | 
234 |         return image, object_annotations
235 | 
236 | 
237 |     def on_epoch_end(self):
238 |         'Updates indexes after each epoch'
239 |         if self.shuffle:
240 |             np.random.shuffle(self.dataset)
241 | 
242 | 
243 |     def load_image(self, path):
244 |         img = cv2.imread(os.path.join(IMAGES_DIR, path))
245 | 
246 |         try:
247 |             if len(img.shape) == 3:
248 |                 img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
249 | 
250 |             else:
251 |                 img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
252 |         except:
253 |             print(path)
254 | 
255 |         return img
256 | 
257 | 
258 |     def load_annotation(self, i):
259 |         annots = []
260 | 
261 |         for obj in self.dataset[i]['object']:
262 |             annot = [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax'], self.labels.index(obj['class'])]
263 |             annots += [annot]
264 | 
265 |         if len(annots) == 0: annots = [[]]
266 | 
267 |         return np.array(annots)
268 | 
269 | 
270 |     def normalize(self, image):
271 |         return image/255.
272 | 
273 | 
274 |     def num_classes(self):
275 |         return len(self.labels)
276 | 
277 | 
278 |     def size(self):
279 |         return len(self.dataset)


--------------------------------------------------------------------------------
/yolo.py:
--------------------------------------------------------------------------------
  1 | from keras.models import Model, load_model
  2 | from keras.layers import Reshape, Lambda, Conv2D, Input, MaxPooling2D, BatchNormalization
  3 | from keras.layers.advanced_activations import LeakyReLU
  4 | from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard, Callback
  5 | from keras.optimizers import SGD, Adam, RMSprop
  6 | import tensorflow as tf
  7 | import os
  8 | import numpy as np
  9 | 
 10 | from postprocessing import decode_netout, interval_overlap, compute_overlap, compute_ap
 11 | from preprocessing import load_image_predict, load_carla_data
 12 | from utils import BatchGenerator
 13 | 
 14 | 
 15 | BASE_DIR = os.path.dirname(__file__)
 16 | ANNOT_DIR = os.path.join(BASE_DIR, 'dataset')
 17 | 
 18 | 
 19 | class TinyYoloFeature:
 20 |     """Tiny yolo feature extractor"""
 21 |     def __init__(self, input_size, config):
 22 |         input_image = Input(shape=(input_size, input_size, 3))
 23 | 
 24 |         # Layer 1
 25 |         x = Conv2D(16, (3,3), strides=(1,1), padding='same', name='conv_1', use_bias=False)(input_image)
 26 |         x = BatchNormalization(name='norm_1')(x)
 27 |         x = LeakyReLU(alpha=0.1)(x)
 28 |         x = MaxPooling2D(pool_size=(2, 2))(x)
 29 | 
 30 |         # Layer 2 - 5
 31 |         for i in range(0,4):
 32 |             x = Conv2D(32*(2**i), (3,3), strides=(1,1), padding='same', name='conv_' + str(i+2), use_bias=False)(x)
 33 |             x = BatchNormalization(name='norm_' + str(i+2))(x)
 34 |             x = LeakyReLU(alpha=0.1)(x)
 35 |             x = MaxPooling2D(pool_size=(2, 2))(x)
 36 | 
 37 |         # Layer 6
 38 |         x = Conv2D(512, (3,3), strides=(1,1), padding='same', name='conv_6', use_bias=False)(x)
 39 |         x = BatchNormalization(name='norm_6')(x)
 40 |         x = LeakyReLU(alpha=0.1)(x)
 41 |         x = MaxPooling2D(pool_size=(2, 2), strides=(1,1), padding='same')(x)
 42 | 
 43 |         # Layer 7
 44 |         x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_' + str(7), use_bias=False)(x)
 45 |         x = BatchNormalization(name='norm_' + str(7))(x)
 46 |         x = LeakyReLU(alpha=0.1)(x)
 47 | 
 48 |         # Layer 8
 49 |         x = Conv2D(512, (3, 3), strides=(1, 1), padding='same', name='conv_' + str(8), use_bias=False)(x)
 50 |         x = BatchNormalization(name='norm_' + str(8))(x)
 51 |         x = LeakyReLU(alpha=0.1, name = 'last')(x)
 52 | 
 53 |         self.feature_extractor = Model(input_image, x)
 54 | 
 55 | 
 56 |         pretrained = load_model('checkpoints\\' + config['model']['saved_model_name'], custom_objects={'custom_loss': dummy_loss, 'tf': tf})
 57 |         pretrained = pretrained.get_layer('model_1')
 58 | 
 59 |         idx = 0
 60 |         for layer in self.feature_extractor.layers:
 61 |             print(layer.name)
 62 |             layer.set_weights(pretrained.get_layer(index=idx).get_weights())
 63 |             idx += 1
 64 | 
 65 |         frozen = [1, 2, 3, 4, 5, 6, 7]
 66 | 
 67 |         for l in frozen:
 68 |             self.feature_extractor.get_layer("conv_" + str(l)).trainable = False
 69 |             self.feature_extractor.get_layer("norm_" + str(l)).trainable = False
 70 | 
 71 |         self.feature_extractor.summary()
 72 | 
 73 | 
 74 | class YOLO(object):
 75 |     def __init__(self, config):
 76 | 
 77 |         self.config = config
 78 | 
 79 |         self.image_h = config['model']['image_h']
 80 |         self.image_w = config['model']['image_w']
 81 | 
 82 |         self.grid_h, self.grid_w = config['model']['grid_h'], config['model']['grid_w']
 83 | 
 84 |         self.labels = config['model']['classes']
 85 |         self.nb_class = int(len(self.labels))
 86 |         self.nb_box = int(len(config['model']['anchors'])/2)
 87 |         self.class_wt = np.ones(self.nb_class, dtype='float32')
 88 |         self.anchors = config['model']['anchors']
 89 | 
 90 |         self.max_box_per_image = config['model']['max_obj']
 91 |         self.batch_size = config['train']['batch_size']
 92 | 
 93 |         self.object_scale = config['model']['obj_scale']
 94 |         self.no_object_scale = config['model']['no_obj_scale']
 95 |         self.coord_scale = config['model']['coord_scale']
 96 |         self.class_scale = config['model']['class_scale']
 97 | 
 98 |         self.obj_thresh = config['model']['obj_thresh']
 99 |         self.nms_thresh = config['model']['nms_thresh']
100 | 
101 |         self.warmup_batches = config['train']['warmup_batches']
102 |         self.debug = config['train']['debug']
103 | 
104 |         ##########################
105 |         # Make the model
106 |         ##########################
107 | 
108 |         # make the feature extractor layers
109 |         input_image = Input(shape=(self.image_h, self.image_w, 3))
110 |         self.true_boxes = Input(shape=(1, 1, 1, self.max_box_per_image, 4))
111 | 
112 |         self.feature_extractor = TinyYoloFeature(self.image_h, config).feature_extractor
113 |         features = self.feature_extractor(input_image)
114 | 
115 |         # Object detection layer
116 |         output = Conv2D(self.nb_box * (4 + 1 + self.nb_class),
117 |                         (1, 1), strides=(1, 1),
118 |                         padding='same',
119 |                         name='DetectionLayer',
120 |                         activation='linear',
121 |                         kernel_initializer='lecun_normal')(features)
122 | 
123 |         output = Reshape((self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class))(output)
124 |         output = Lambda(lambda args: args[0])([output, self.true_boxes])
125 | 
126 |         self.model = Model([input_image, self.true_boxes], output)
127 |         self.model.summary()
128 | 
129 |         pretrained = load_model('checkpoints\\' + config['model']['saved_model_name'], custom_objects={'custom_loss': self.custom_loss, 'tf': tf})
130 |         self.model.get_layer('DetectionLayer').set_weights(
131 |             pretrained.get_layer('DetectionLayer').get_weights())
132 | 
133 | 
134 |     def load_weights(self, model_path):
135 |         model = load_model(model_path, custom_objects={'custom_loss': self.custom_loss, 'tf': tf})
136 | 
137 |         idx = 0
138 |         for layer in self.model.layers:
139 |             layer.set_weights(model.get_layer(index=idx).get_weights())
140 |             idx += 1
141 | 
142 | 
143 |     def predict(self, image_path):
144 |         image = load_image_predict(image_path, self.image_h, self.image_w)
145 | 
146 |         dummy_array = np.zeros((1, 1, 1, 1, self.max_box_per_image, 4))
147 |         netout = self.model.predict([image, dummy_array])[0]
148 | 
149 |         boxes = decode_netout(netout=netout, anchors = self.anchors, nb_class=self.nb_class,
150 |                               obj_threshold=self.obj_thresh, nms_threshold=self.nms_thresh)
151 |         return boxes
152 | 
153 | 
154 |     def train(self):
155 |         data = load_carla_data(os.path.join(ANNOT_DIR, self.config['train']['annot_file_name']), self.config['model']['classes'])
156 | 
157 |         np.random.shuffle(data)
158 | 
159 |         train_instances, validation_instances = data[:1655], data[1655:]
160 | 
161 |         np.random.shuffle(train_instances)
162 |         np.random.shuffle(validation_instances)
163 | 
164 |         train_generator = BatchGenerator(self.config, train_instances, jitter=True)
165 |         validation_generator = BatchGenerator(self.config, validation_instances, jitter=False)
166 | 
167 |         checkpoint = ModelCheckpoint(
168 |             'checkpoints\\model.{epoch:02d}-{val_loss:.2f}.h5',
169 |             monitor='val_loss',
170 |             verbose=1,
171 |             save_best_only=True,
172 |             mode='auto',
173 |             period=1
174 |         )
175 | 
176 |         checkpoint_all = ModelCheckpoint(
177 |             'checkpoints\\all_models.{epoch:02d}-{loss:.2f}.h5',
178 |             monitor='loss',
179 |             verbose=1,
180 |             save_best_only=True,
181 |             mode='auto',
182 |             period=1
183 |         )
184 | 
185 |         # optimizer = RMSprop(lr=1e-3,rho=0.9, epsilon=1e-08, decay=0.0)
186 |         # optimizer = Adam(lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
187 |         optimizer = SGD(lr=1e-5, momentum=0.9, decay=0.0005)
188 | 
189 |         self.model.compile(loss=self.custom_loss, optimizer=optimizer)  #, metrics=['accuracy'])
190 | 
191 |         self.model.summary()
192 | 
193 |         history = self.model.fit_generator(generator=train_generator,
194 |                                       steps_per_epoch=len(train_generator),
195 |                                       epochs=self.config['train']['nb_epochs'],
196 |                                       verbose=1,
197 |                                       validation_data=validation_generator,
198 |                                       validation_steps=len(validation_generator),
199 |                                       callbacks=[checkpoint, checkpoint_all],# map_evaluator_cb],  # checkpoint, tensorboard
200 |                                       max_queue_size=10,
201 |                                       workers=3
202 |                                       )
203 | 
204 | 
205 |     def evaluate(self):
206 |         data = load_carla_data(os.path.join(ANNOT_DIR, self.config['train']['annot_file_name']),
207 |                                self.config['model']['classes'])
208 | 
209 |         np.random.shuffle(data)
210 | 
211 |         validation_instances = data#[1400:]
212 | 
213 |         validation_generator = BatchGenerator(self.config, validation_instances, jitter=False)
214 | 
215 |         map_evaluator_cb = self.MAP_evaluation(self, validation_generator,
216 |                                                save_best=True,
217 |                                                save_name='checkpoints\\best-mAP.h5',
218 |                                                # os.path.join(BASE_DIR,'best_mAP\\weights.{epoch:02d}-{val_loss:.2f}.h5'),
219 |                                                tensorboard=None,
220 |                                                iou_threshold=0.4)
221 | 
222 |         self.model.compile(loss=self.custom_loss, optimizer=SGD(lr=1e-5, momentum=0.9, decay=0.0005))
223 | 
224 |         self.model.summary()
225 | 
226 |         history = self.model.fit_generator(generator=validation_generator,
227 |                                            steps_per_epoch=len(validation_generator),
228 |                                            epochs=1,
229 |                                            verbose=1,
230 |                                            callbacks=[map_evaluator_cb]
231 |                                            )
232 | 
233 | 
234 |     def normalize(self, image):
235 |         return image / 255.
236 | 
237 | 
238 |     def custom_loss(self, y_true, y_pred):
239 |         mask_shape = tf.shape(y_true)[:4]
240 | 
241 |         cell_x = tf.to_float(
242 |             tf.reshape(tf.tile(tf.range(self.grid_w), [self.grid_h]), (1, self.grid_h, self.grid_w, 1, 1)))
243 |         cell_y = tf.transpose(cell_x, (0, 2, 1, 3, 4))
244 | 
245 |         cell_grid = tf.tile(tf.concat([cell_x, cell_y], -1), [self.batch_size, 1, 1, self.nb_box, 1])
246 | 
247 |         coord_mask = tf.zeros(mask_shape)
248 |         conf_mask = tf.zeros(mask_shape)
249 |         class_mask = tf.zeros(mask_shape)
250 | 
251 |         seen = tf.Variable(0.)
252 |         total_loss = tf.Variable(0.)
253 |         total_recall = tf.Variable(0.)
254 |         total_boxes = tf.Variable(self.grid_h*self.grid_w*self.nb_box*self.batch_size)
255 | 
256 |         """
257 |         Adjust prediction
258 |         """
259 |         ### adjust x and y
260 |         pred_box_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid
261 | 
262 |         ### adjust w and h tf.exp(
263 |         pred_box_wh = tf.exp(y_pred[..., 2:4]) * np.reshape(self.anchors, [1, 1, 1, self.nb_box, 2])
264 | 
265 |         ### adjust confidence
266 |         pred_box_conf = tf.sigmoid(y_pred[..., 4])
267 | 
268 |         ### adjust class probabilities
269 |         pred_box_class = y_pred[..., 5:]
270 | 
271 |         """
272 |         Adjust ground truth
273 |         """
274 |         ### adjust x and y
275 |         true_box_xy = y_true[..., 0:2]  # relative position to the containing cell
276 | 
277 |         ### adjust w and h
278 |         true_box_wh = y_true[..., 2:4]  # number of cells accross, horizontally and vertically
279 | 
280 |         ### adjust confidence
281 |         true_wh_half = true_box_wh / 2.
282 |         true_mins = true_box_xy - true_wh_half
283 |         true_maxes = true_box_xy + true_wh_half
284 | 
285 |         pred_wh_half = pred_box_wh / 2.
286 |         pred_mins = pred_box_xy - pred_wh_half
287 |         pred_maxes = pred_box_xy + pred_wh_half
288 | 
289 |         intersect_mins = tf.maximum(pred_mins, true_mins)
290 |         intersect_maxes = tf.minimum(pred_maxes, true_maxes)
291 |         intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
292 |         intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
293 | 
294 |         true_areas = true_box_wh[..., 0] * true_box_wh[..., 1]
295 |         pred_areas = pred_box_wh[..., 0] * pred_box_wh[..., 1]
296 | 
297 |         union_areas = pred_areas + true_areas - intersect_areas
298 |         iou_scores = tf.truediv(intersect_areas, union_areas)
299 | 
300 |         true_box_conf = iou_scores * y_true[..., 4]
301 | 
302 |         ### adjust class probabilities
303 |         true_box_class = tf.argmax(y_true[..., 5:], -1)
304 | 
305 |         """
306 |         Determine the masks
307 |         """
308 |         ### coordinate mask: simply the position of the ground truth boxes (the predictors)
309 |         coord_mask = tf.expand_dims(y_true[..., 4], axis=-1) * self.coord_scale
310 | 
311 |         ### confidence mask: penelize predictors + penalize boxes with low IOU
312 |         # penalize the confidence of the boxes, which have IOU with some ground truth box < 0.6
313 |         true_xy = self.true_boxes[..., 0:2]
314 |         true_wh = self.true_boxes[..., 2:4]
315 | 
316 |         true_wh_half = true_wh / 2.
317 |         true_mins = true_xy - true_wh_half
318 |         true_maxes = true_xy + true_wh_half
319 | 
320 |         pred_xy = tf.expand_dims(pred_box_xy, 4)
321 |         pred_wh = tf.expand_dims(pred_box_wh, 4)
322 | 
323 |         pred_wh_half = pred_wh / 2.
324 |         pred_mins = pred_xy - pred_wh_half
325 |         pred_maxes = pred_xy + pred_wh_half
326 | 
327 |         intersect_mins = tf.maximum(pred_mins, true_mins)
328 |         intersect_maxes = tf.minimum(pred_maxes, true_maxes)
329 |         intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
330 |         intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
331 | 
332 |         true_areas = true_wh[..., 0] * true_wh[..., 1]
333 |         pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
334 | 
335 |         union_areas = pred_areas + true_areas - intersect_areas
336 |         iou_scores = tf.truediv(intersect_areas, union_areas)
337 | 
338 |         best_ious = tf.reduce_max(iou_scores, axis=4)
339 |         #conf_mask = conf_mask + tf.to_float(best_ious < 0.5) * (1 - y_true[..., 4]) * self.no_object_scale
340 | 
341 |         # penalize the confidence of the boxes, which are reponsible for corresponding ground truth box
342 |         #conf_mask = conf_mask + y_true[..., 4] * self.object_scale
343 | 
344 |         conf_mask_neg = tf.to_float(best_ious < 0.50) * (1 - y_true[..., 4]) * self.no_object_scale
345 |         conf_mask_pos = y_true[..., 4] * self.object_scale
346 | 
347 |         ### class mask: simply the position of the ground truth boxes (the predictors)
348 |         class_mask = y_true[..., 4] * tf.gather(self.class_wt, true_box_class) * self.class_scale
349 | 
350 |         """
351 |         Warm-up training
352 |         """
353 |         no_boxes_mask = tf.to_float(coord_mask < self.coord_scale / 2.)
354 |         seen = tf.assign_add(seen, 1.)
355 | 
356 |         true_box_xy, true_box_wh, coord_mask = tf.cond(tf.less(seen, self.warmup_batches + 1),
357 |                                                        lambda: [true_box_xy + (0.5 + cell_grid) * no_boxes_mask,
358 |                                                                 true_box_wh + tf.ones_like(true_box_wh) * \
359 |                                                                 np.reshape(self.anchors, [1, 1, 1, self.nb_box, 2]) * \
360 |                                                                 no_boxes_mask,
361 |                                                                 tf.ones_like(coord_mask)],
362 |                                                        lambda: [true_box_xy,
363 |                                                                 true_box_wh,
364 |                                                                 coord_mask])
365 | 
366 |         """
367 |         Finalize the loss
368 |         """
369 |         nb_coord_box = tf.reduce_sum(tf.to_float(coord_mask > 0.0))
370 |         #nb_conf_box = tf.reduce_sum(tf.to_float(conf_mask > 0.0))
371 |         nb_conf_box_neg = tf.reduce_sum(tf.to_float(conf_mask_neg > 0.0))
372 |         nb_conf_box_pos = tf.subtract(tf.to_float(total_boxes), nb_conf_box_neg) #tf.reduce_sum(tf.to_float(conf_mask_pos > 0.0))
373 |         nb_class_box = tf.reduce_sum(tf.to_float(class_mask > 0.0))
374 | 
375 |         true_box_wh = tf.sqrt(true_box_wh)
376 |         pred_box_wh = tf.sqrt(pred_box_wh)
377 | 
378 |         loss_xy = tf.reduce_sum(tf.square(true_box_xy - pred_box_xy) * coord_mask) / (nb_coord_box + 1e-6) / 2.
379 |         loss_wh = tf.reduce_sum(tf.square(true_box_wh - pred_box_wh) * coord_mask) / (nb_coord_box + 1e-6) / 2.
380 |         loss_conf_neg = tf.reduce_sum(tf.square(true_box_conf - pred_box_conf) * conf_mask_neg) / (nb_conf_box_neg + 1e-6) / 2.
381 |         loss_conf_pos = tf.reduce_sum(tf.square(true_box_conf - pred_box_conf) * conf_mask_pos) / (nb_conf_box_pos + 1e-6) / 2
382 |         loss_conf = loss_conf_neg + loss_conf_pos
383 |         #loss_conf = tf.reduce_sum(tf.square(true_box_conf - pred_box_conf) * conf_mask) / (nb_conf_box + 1e-6) / 2.
384 |         loss_class = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class)
385 |         loss_class = tf.reduce_sum(loss_class * class_mask) / (nb_class_box + 1e-6)
386 | 
387 |         loss = tf.cond(tf.less(seen, self.warmup_batches + 1),
388 |                        lambda: loss_xy + loss_wh + loss_conf + loss_class + 10,
389 |                        lambda: loss_xy + loss_wh + loss_conf + loss_class)
390 | 
391 |         if self.debug:
392 |             nb_true_box = tf.reduce_sum(y_true[..., 4])
393 |             nb_pred_box = tf.reduce_sum(tf.to_float(true_box_conf > 0.32) * tf.to_float(pred_box_conf > 0.3))
394 | 
395 |             current_recall = nb_pred_box / (nb_true_box + 1e-6)
396 |             total_recall = tf.assign_add(total_recall, current_recall)
397 | 
398 |             total_loss = tf.assign_add(total_loss, loss)
399 | 
400 |             loss = tf.Print(loss, [loss_xy], message='\nLoss XY \t', summarize=1000)
401 |             loss = tf.Print(loss, [loss_wh], message='Loss WH \t', summarize=1000)
402 |             loss = tf.Print(loss, [nb_conf_box_neg], message='Nb Conf Box Negative \t', summarize=1000)
403 |             loss = tf.Print(loss, [nb_conf_box_pos], message='Nb Conf Box Positive \t', summarize=1000)
404 |             loss = tf.Print(loss, [loss_conf_neg], message='Loss Conf Negative \t', summarize=1000)
405 |             loss = tf.Print(loss, [loss_conf_pos], message='Loss Conf Positive \t', summarize=1000)
406 |             loss = tf.Print(loss, [loss_conf], message='Loss Conf \t', summarize=1000)
407 |             loss = tf.Print(loss, [loss_class], message='Loss Class \t', summarize=1000)
408 |             loss = tf.Print(loss, [loss], message='Total Loss \t', summarize=1000)
409 |             loss = tf.Print(loss, [total_loss / seen], message='Average Loss \t', summarize=1000)
410 |             loss = tf.Print(loss, [nb_pred_box], message='Number of pred boxes \t', summarize=1000)
411 |             loss = tf.Print(loss, [nb_true_box], message='Number of true boxes \t', summarize=1000)
412 |             loss = tf.Print(loss, [current_recall], message='Current Recall \t', summarize=1000)
413 |             loss = tf.Print(loss, [total_recall / seen], message='Average Recall \t', summarize=1000)
414 | 
415 | 
416 |         return loss
417 | 
418 | 
419 |     class MAP_evaluation(Callback):
420 |         """ Evaluate a given dataset using a given model.
421 |             code originally from https://github.com/fizyr/keras-retinanet
422 |             # Arguments
423 |                 generator       : The generator that represents the dataset to evaluate.
424 |                 model           : The model to evaluate.
425 |                 iou_threshold   : The threshold used to consider when a detection is positive or negative.
426 |                 score_threshold : The score confidence threshold to use for detections.
427 |                 save_path       : The path to save images with visualized detections to.
428 |             # Returns
429 |                 A dict mapping class names to mAP scores.
430 |         """
431 | 
432 |         def __init__(self,
433 |                      yolo,
434 |                      generator,
435 |                      iou_threshold=0.5,
436 |                      score_threshold=0.3,
437 |                      save_path=None,
438 |                      period=1,
439 |                      save_best=False,
440 |                      save_name=None,
441 |                      tensorboard=None):
442 | 
443 |             self.yolo = yolo
444 |             self.generator = generator
445 |             self.iou_threshold = iou_threshold
446 |             self.save_path = save_path
447 |             self.period = period
448 |             self.save_best = save_best
449 |             self.save_name = save_name
450 |             self.tensorboard = tensorboard
451 | 
452 |             self.bestMap = 0
453 | 
454 |             self.model = self.yolo.model
455 | 
456 |             if not isinstance(self.tensorboard, TensorBoard) and self.tensorboard is not None:
457 |                 raise ValueError("Tensorboard object must be a instance from keras.callbacks.TensorBoard")
458 | 
459 | 
460 |         def on_epoch_end(self, epoch, logs={}):
461 |             print(epoch)
462 |             #% self.period == 0 and self.period != 0:
463 |             mAP, average_precisions = self.evaluate_mAP()
464 |             print('\n')
465 |             for label, average_precision in average_precisions.items():
466 |                 print(self.yolo.labels[label], '{:.4f}'.format(average_precision))
467 |             print('mAP: {:.4f}'.format(mAP))
468 | 
469 |             if self.save_best and self.save_name is not None and mAP > self.bestMap:
470 |                 print(
471 |                     "mAP improved from {} to {}, saving model to {}.".format(self.bestMap, mAP, self.save_name))
472 |                 self.bestMap = mAP
473 |                 print(self.save_name)
474 |                 self.model.save(self.save_name)
475 |                 self.model.save_weights('checkpoints\\best-mAP.h5')
476 |             else:
477 |                 print("mAP did not improve from {}.".format(self.bestMap))
478 | 
479 |             if self.tensorboard is not None and self.tensorboard.writer is not None:
480 |                 import tensorflow as tf
481 |                 summary = tf.Summary()
482 |                 summary_value = summary.value.add()
483 |                 summary_value.simple_value = mAP
484 |                 summary_value.tag = "val_mAP"
485 |                 self.tensorboard.writer.add_summary(summary, epoch)
486 | 
487 | 
488 |         def evaluate_mAP(self):
489 |             average_precisions = self._calc_avg_precisions()
490 |             mAP = sum(average_precisions.values()) / len(average_precisions)
491 | 
492 |             return mAP, average_precisions
493 | 
494 | 
495 |         def _calc_avg_precisions(self):
496 |             # gather all detections and annotations
497 |             all_detections = [[None for i in range(self.generator.num_classes())] for j in
498 |                               range(self.generator.size())]
499 |             all_annotations = [[None for i in range(self.generator.num_classes())] for j in
500 |                                range(self.generator.size())]
501 | 
502 |             for i in range(self.generator.size()):
503 |                 raw_image = self.generator.load_image(self.generator.dataset[i]['image_path'])
504 |                 raw_height, raw_width, _ = raw_image.shape
505 |                 # make the boxes and the labels
506 |                 pred_boxes = self.yolo.predict(os.path.join(ANNOT_DIR, 'images', self.generator.dataset[i]['image_path']))
507 | 
508 |                 score = np.array([box.score for box in pred_boxes])
509 |                 pred_labels = np.array([box.label for box in pred_boxes])
510 | 
511 |                 if len(pred_boxes) > 0:
512 |                     pred_boxes = np.array([[box.xmin * raw_width, box.ymin * raw_height, box.xmax * raw_width,
513 |                                             box.ymax * raw_height, box.score] for box in pred_boxes])
514 |                 else:
515 |                     pred_boxes = np.array([[]])
516 | 
517 |                     # sort the boxes and the labels according to scores
518 |                 score_sort = np.argsort(-score)
519 |                 pred_labels = pred_labels[score_sort]
520 |                 pred_boxes = pred_boxes[score_sort]
521 | 
522 |                 # copy detections to all_detections
523 |                 for label in range(self.generator.num_classes()):
524 |                     all_detections[i][label] = pred_boxes[pred_labels == label, :]
525 | 
526 |                 annotations = self.generator.load_annotation(i)
527 | 
528 |                 # copy detections to all_annotations
529 |                 for label in range(self.generator.num_classes()):
530 |                     all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy()
531 | 
532 |             # compute mAP by comparing all detections and all annotations
533 |             average_precisions = {}
534 | 
535 |             for label in range(self.generator.num_classes()):
536 |                 false_positives = np.zeros((0,))
537 |                 true_positives = np.zeros((0,))
538 |                 scores = np.zeros((0,))
539 |                 num_annotations = 0.0
540 | 
541 |                 for i in range(self.generator.size()):
542 |                     detections = all_detections[i][label]
543 |                     annotations = all_annotations[i][label]
544 |                     num_annotations += annotations.shape[0]
545 |                     detected_annotations = []
546 | 
547 |                     for d in detections:
548 |                         scores = np.append(scores, d[4])
549 | 
550 |                         if annotations.shape[0] == 0:
551 |                             false_positives = np.append(false_positives, 1)
552 |                             true_positives = np.append(true_positives, 0)
553 |                             continue
554 | 
555 |                         overlaps = compute_overlap(np.expand_dims(d, axis=0), annotations)
556 |                         assigned_annotation = np.argmax(overlaps, axis=1)
557 |                         max_overlap = overlaps[0, assigned_annotation]
558 | 
559 |                         if max_overlap >= self.iou_threshold and assigned_annotation not in detected_annotations:
560 |                             false_positives = np.append(false_positives, 0)
561 |                             true_positives = np.append(true_positives, 1)
562 |                             detected_annotations.append(assigned_annotation)
563 |                         else:
564 |                             false_positives = np.append(false_positives, 1)
565 |                             true_positives = np.append(true_positives, 0)
566 | 
567 |                 # no annotations -> AP for this class is 0 (is this correct?)
568 |                 if num_annotations == 0:
569 |                     average_precisions[label] = 0
570 |                     continue
571 | 
572 |                 # sort by score
573 |                 indices = np.argsort(-scores)
574 |                 false_positives = false_positives[indices]
575 |                 true_positives = true_positives[indices]
576 | 
577 |                 # compute false positives and true positives
578 |                 false_positives = np.cumsum(false_positives)
579 |                 true_positives = np.cumsum(true_positives)
580 | 
581 |                 # compute recall and precision
582 |                 recall = true_positives / num_annotations
583 |                 precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)
584 | 
585 |                 # compute average precision
586 |                 average_precision = compute_ap(recall, precision)
587 |                 average_precisions[label] = average_precision
588 | 
589 |             return average_precisions
590 | 
591 | 
592 | def dummy_loss(y_true, y_pred):
593 |     return tf.sqrt(tf.reduce_sum(y_pred))


--------------------------------------------------------------------------------