├── out
├── .gitkeep
├── out1.png
├── out2.png
├── out3.png
├── out4.png
├── out5.png
├── out6.png
├── out7.png
├── out8.png
├── out9.png
├── out10.png
├── out11.png
├── out12.png
├── out13.png
├── out14.png
├── out15.png
└── out16.png
├── dataset
└── images
│ └── .gitkeep
├── test_images
├── .gitkeep
├── test (1).png
├── test (10).png
├── test (11).png
├── test (12).png
├── test (13).png
├── test (14).png
├── test (15).png
├── test (16).png
├── test (2).png
├── test (3).png
├── test (4).png
├── test (5).png
├── test (6).png
├── test (7).png
├── test (8).png
└── test (9).png
├── main.py
├── config.json
├── predict.py
├── detect_test_images.py
├── .gitignore
├── preprocessing.py
├── grab_screen.py
├── README.md
├── generate_anchors.py
├── postprocessing.py
├── utils.py
└── yolo.py
/out/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/dataset/images/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/test_images/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/out/out1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out1.png
--------------------------------------------------------------------------------
/out/out2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out2.png
--------------------------------------------------------------------------------
/out/out3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out3.png
--------------------------------------------------------------------------------
/out/out4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out4.png
--------------------------------------------------------------------------------
/out/out5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out5.png
--------------------------------------------------------------------------------
/out/out6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out6.png
--------------------------------------------------------------------------------
/out/out7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out7.png
--------------------------------------------------------------------------------
/out/out8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out8.png
--------------------------------------------------------------------------------
/out/out9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out9.png
--------------------------------------------------------------------------------
/out/out10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out10.png
--------------------------------------------------------------------------------
/out/out11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out11.png
--------------------------------------------------------------------------------
/out/out12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out12.png
--------------------------------------------------------------------------------
/out/out13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out13.png
--------------------------------------------------------------------------------
/out/out14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out14.png
--------------------------------------------------------------------------------
/out/out15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out15.png
--------------------------------------------------------------------------------
/out/out16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out16.png
--------------------------------------------------------------------------------
/test_images/test (1).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (1).png
--------------------------------------------------------------------------------
/test_images/test (10).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (10).png
--------------------------------------------------------------------------------
/test_images/test (11).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (11).png
--------------------------------------------------------------------------------
/test_images/test (12).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (12).png
--------------------------------------------------------------------------------
/test_images/test (13).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (13).png
--------------------------------------------------------------------------------
/test_images/test (14).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (14).png
--------------------------------------------------------------------------------
/test_images/test (15).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (15).png
--------------------------------------------------------------------------------
/test_images/test (16).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (16).png
--------------------------------------------------------------------------------
/test_images/test (2).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (2).png
--------------------------------------------------------------------------------
/test_images/test (3).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (3).png
--------------------------------------------------------------------------------
/test_images/test (4).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (4).png
--------------------------------------------------------------------------------
/test_images/test (5).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (5).png
--------------------------------------------------------------------------------
/test_images/test (6).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (6).png
--------------------------------------------------------------------------------
/test_images/test (7).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (7).png
--------------------------------------------------------------------------------
/test_images/test (8).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (8).png
--------------------------------------------------------------------------------
/test_images/test (9).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (9).png
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 |
4 | from yolo import YOLO
5 |
6 |
7 | def main(args):
8 | config_path = args.conf
9 |
10 | with open(config_path) as config_buffer:
11 | config = json.loads(config_buffer.read())
12 |
13 | yolo = YOLO(config)
14 | if config['train']['enabled']:
15 | #yolo.load_weights(config['model']['saved_model_name'])
16 | yolo.train()
17 | else:
18 | yolo.evaluate()
19 |
20 |
21 | if __name__ == "__main__":
22 | arg_parser = argparse.ArgumentParser(
23 | description='Train and validate autonomous car module')
24 |
25 | arg_parser.add_argument(
26 | '-c',
27 | '--conf',
28 | help='path to the configuration file')
29 |
30 | main(arg_parser.parse_args())
31 |
--------------------------------------------------------------------------------
/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "model" :
3 | {
4 | "image_h": 416,
5 | "image_w": 416,
6 | "anchors": [0.24,0.79, 0.80,2.12],
7 | "max_obj": 5,
8 | "grid_h": 13,
9 | "grid_w": 13,
10 | "num_classes": 2,
11 | "classes": ["go", "stop"],
12 | "obj_thresh": 0.20,
13 | "nms_thresh": 0.01,
14 | "saved_model_name": "model.09-2.01.h5",
15 | "obj_scale": 85,
16 | "no_obj_scale": 75,
17 | "coord_scale": 70,
18 | "class_scale": 70
19 | },
20 |
21 | "plot_model": true,
22 |
23 | "train": {
24 | "enabled": false,
25 | "annot_file_name": "carla_all.csv",
26 | "batch_size": 8,
27 | "learning_rate": 1e-4,
28 | "nb_epochs": 500,
29 | "warmup_batches": 0,
30 | "debug": true
31 | }
32 | }
--------------------------------------------------------------------------------
/predict.py:
--------------------------------------------------------------------------------
1 | from keras.models import load_model
2 | import os
3 | import numpy as np
4 |
5 | from yolo import YOLO, dummy_loss
6 | from preprocessing import load_image_predict
7 | from postprocessing import decode_netout
8 |
9 |
10 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
11 |
12 |
13 | def get_model(config):
14 | model = YOLO(
15 | config =config
16 | )
17 | model.load_weights(os.path.join(BASE_DIR, config['model']['saved_model_name']))
18 | return model
19 |
20 |
21 | def get_model_from_file(config):
22 | path = os.path.join(BASE_DIR, 'checkpoints', config['model']['saved_model_name'])
23 | model = load_model(path, custom_objects={'custom_loss': dummy_loss})
24 | return model
25 |
26 |
27 | def predict_with_model_from_file(config, model, image_path):
28 | image = load_image_predict(image_path, config['model']['image_h'], config['model']['image_w'])
29 |
30 | dummy_array = np.zeros((1, 1, 1, 1, config['model']['max_obj'], 4))
31 | netout = model.predict([image, dummy_array])[0]
32 |
33 | boxes = decode_netout(netout=netout, anchors=config['model']['anchors'],
34 | nb_class=config['model']['num_classes'],
35 | obj_threshold=config['model']['obj_thresh'],
36 | nms_threshold=config['model']['nms_thresh'])
37 | return boxes
--------------------------------------------------------------------------------
/detect_test_images.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import argparse
4 | import cv2
5 |
6 | from postprocessing import draw_boxes
7 | from predict import predict_with_model_from_file, get_model_from_file
8 |
9 |
10 | BASE_DIR = os.path.dirname(__file__)
11 | TEST_IMAGES_DIR = os.path.join(BASE_DIR, 'test_images')
12 | OUT_IMAGES_DIR = os.path.join(BASE_DIR, 'out')
13 |
14 |
15 | def detect_on_test_images(config):
16 | model = get_model_from_file(config)
17 |
18 | all_images = [f for f in os.listdir(TEST_IMAGES_DIR) if os.path.isfile(os.path.join(TEST_IMAGES_DIR, f))]
19 | img_num = 1
20 | for image_name in all_images:
21 | image_path = os.path.join(TEST_IMAGES_DIR, image_name)
22 |
23 | netout = predict_with_model_from_file(config, model, image_path)
24 | plt_image = draw_boxes(cv2.imread(image_path), netout, config['model']['classes'])
25 |
26 | #cv2.imshow('demo', plt_image)
27 | #cv2.waitKey(0)
28 |
29 | cv2.imwrite(os.path.join(OUT_IMAGES_DIR, 'out' + str(img_num) + '.png'), plt_image)
30 | img_num += 1
31 |
32 |
33 | def main(args):
34 | config_path = args.conf
35 |
36 | with open(config_path) as config_buffer:
37 | config = json.loads(config_buffer.read())
38 |
39 | detect_on_test_images(config)
40 |
41 |
42 | if __name__ == '__main__':
43 | argparser = argparse.ArgumentParser()
44 |
45 | argparser.add_argument(
46 | '-c',
47 | '--conf',
48 | default='config.json',
49 | help='Path to configuration file')
50 |
51 | args = argparser.parse_args()
52 | main(args)
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | .idea
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | dataset/images/*
30 | !dataset/images/.gitkeep
31 | *.h5
32 | trainval.json
33 | checkpoints/*
34 |
35 | # PyInstaller
36 | # Usually these files are written by a python script from a template
37 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
38 | *.manifest
39 | *.spec
40 |
41 | # Installer logs
42 | pip-log.txt
43 | pip-delete-this-directory.txt
44 |
45 | # Unit test / coverage reports
46 | htmlcov/
47 | .tox/
48 | .coverage
49 | .coverage.*
50 | .cache
51 | nosetests.xml
52 | coverage.xml
53 | *.cover
54 | .hypothesis/
55 | .pytest_cache/
56 |
57 | # Translations
58 | *.mo
59 | *.pot
60 |
61 | # Django stuff:
62 | *.log
63 | local_settings.py
64 | db.sqlite3
65 |
66 | # Flask stuff:
67 | instance/
68 | .webassets-cache
69 |
70 | # Scrapy stuff:
71 | .scrapy
72 |
73 | # Sphinx documentation
74 | docs/_build/
75 |
76 | # PyBuilder
77 | target/
78 |
79 | # Jupyter Notebook
80 | .ipynb_checkpoints
81 |
82 | # pyenv
83 | .python-version
84 |
85 | # celery beat schedule file
86 | celerybeat-schedule
87 |
88 | # SageMath parsed files
89 | *.sage.py
90 |
91 | # Environments
92 | .env
93 | .venv
94 | env/
95 | venv/
96 | ENV/
97 | env.bak/
98 | venv.bak/
99 |
100 | # Spyder project settings
101 | .spyderproject
102 | .spyproject
103 |
104 | # Rope project settings
105 | .ropeproject
106 |
107 | # mkdocs documentation
108 | /site
109 |
110 | # mypy
111 | .mypy_cache/
112 |
--------------------------------------------------------------------------------
/preprocessing.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import pandas as pd
3 | import numpy as np
4 | import os
5 | from sklearn.preprocessing import LabelEncoder
6 |
7 |
8 | def load_image_predict(image_path, image_h, image_w):
9 | image = cv2.imread(image_path)
10 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
11 | image = cv2.resize(image, (image_h, image_w))
12 | image = image/255
13 | image = np.expand_dims(image, 0)
14 |
15 | return image
16 |
17 |
18 | def load_carla_data(path, labels):
19 | le = LabelEncoder()
20 | le.fit_transform(labels)
21 |
22 | data = pd.read_csv(path, delimiter=",", header=None)
23 |
24 | dataset = {}
25 |
26 | objects_omitted = 0
27 | red = 0
28 | green = 0
29 | for record in data[1:][data.columns[:7]].values:
30 | tokens = record[5].split(",")
31 |
32 | xmin, ymin, xmax, ymax = float(tokens[1].split(":")[1]), float(tokens[2].split(":")[1]),\
33 | float(tokens[3].split(":")[1]), float(tokens[4].split(":")[1].replace("}", ""))
34 |
35 | #omit small images
36 | if xmax < 15:
37 | objects_omitted += 1
38 | continue
39 |
40 | xmax += xmin
41 | ymax += ymin
42 |
43 | if "stop" in record[6]:
44 | obj_class = "stop"
45 | red += 1
46 | else:
47 | obj_class = "go"
48 | green += 1
49 |
50 | obj = {}
51 | obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax'], obj['class'] = xmin, ymin, xmax, ymax, obj_class
52 |
53 | image_path = record[0]
54 |
55 | #image_path = os.path.join("images", image_path)
56 |
57 | if image_path in dataset:
58 | dataset[image_path].append(obj)
59 | else:
60 | dataset[image_path] = [obj]
61 |
62 | print("Objects omitted", objects_omitted)
63 | print("Red light: ", red)
64 | print("Green light: ", green)
65 |
66 | instances = []
67 |
68 | for key in dataset.keys():
69 | inst = {}
70 |
71 | inst['image_path'] = key
72 | inst['object'] = dataset[key]
73 |
74 | instances.append(inst)
75 |
76 | return instances
77 |
78 |
79 | def load_image(path):
80 | img = cv2.imread(os.path.join(path))
81 |
82 | if len(img.shape) == 3:
83 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
84 |
85 | if len(img.shape) == 1:
86 | img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
87 |
88 | return img
--------------------------------------------------------------------------------
/grab_screen.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import json
3 | import os
4 | import argparse
5 | import cv2
6 | import numpy as np
7 | import win32gui, win32ui, win32con, win32api
8 | import tensorflow as tf
9 | from keras.models import load_model
10 | from mss import mss
11 |
12 | from yolo import dummy_loss
13 | from postprocessing import decode_netout, draw_boxes
14 |
15 |
16 | BASE_DIR = os.path.dirname(__file__)
17 |
18 |
19 | def grab_screen_slower(region=None):
20 | hwin = win32gui.GetDesktopWindow()
21 |
22 | if region:
23 | left, top, x2, y2 = region
24 | width = x2 - left + 1
25 | height = y2 - top + 1
26 | else:
27 | width = win32api.GetSystemMetrics(win32con.SM_CXVIRTUALSCREEN)
28 | height = win32api.GetSystemMetrics(win32con.SM_CYVIRTUALSCREEN)
29 | left = win32api.GetSystemMetrics(win32con.SM_XVIRTUALSCREEN)
30 | top = win32api.GetSystemMetrics(win32con.SM_YVIRTUALSCREEN)
31 |
32 | hwindc = win32gui.GetWindowDC(hwin)
33 | srcdc = win32ui.CreateDCFromHandle(hwindc)
34 | memdc = srcdc.CreateCompatibleDC()
35 | bmp = win32ui.CreateBitmap()
36 | bmp.CreateCompatibleBitmap(srcdc, width, height)
37 | memdc.SelectObject(bmp)
38 | memdc.BitBlt((0, 0), (width, height), srcdc, (left, top), win32con.SRCCOPY)
39 |
40 | signedIntsArray = bmp.GetBitmapBits(True)
41 | img = np.fromstring(signedIntsArray, dtype='uint8')
42 | img.shape = (height, width, 4)
43 |
44 | srcdc.DeleteDC()
45 | memdc.DeleteDC()
46 | win32gui.ReleaseDC(hwin, hwindc)
47 | win32gui.DeleteObject(bmp.GetHandle())
48 |
49 |
50 | return cv2.cvtColor(img, cv2.COLOR_BGRA2RGB)
51 |
52 |
53 | def grab_and_broadcast_screen(config):
54 | paused = False
55 |
56 | mon = {'top': 10, 'left': 10, 'width': 750, 'height': 680}
57 | sct = mss()
58 |
59 | dummy_array = np.zeros((1, 1, 1, 1, config['model']['max_obj'], 4))
60 |
61 | model = load_model("checkpoints/traffic-light-model.h5",
62 | custom_objects={'custom_loss': dummy_loss, 'tf': tf})
63 |
64 | frame_num = 0
65 | while (True):
66 |
67 | if not paused:
68 | screen = np.array(sct.grab(mon))
69 | screen = cv2.cvtColor(screen, cv2.COLOR_BGR2RGB)
70 |
71 | plt_image = copy.deepcopy(screen)
72 |
73 | screen = cv2.resize(screen, (config['model']['image_h'], config['model']['image_w']))
74 | screen = screen / 255.
75 | screen = np.expand_dims(screen, 0)
76 |
77 | netout = model.predict([screen, dummy_array])[0]
78 |
79 | boxes = decode_netout(netout,
80 | obj_threshold=config['model']['obj_thresh'],
81 | nms_threshold=config['model']['nms_thresh'],
82 | anchors=config['model']['anchors'],
83 | nb_class=config['model']['num_classes'])
84 |
85 | plt_image = draw_boxes(plt_image, boxes, labels=config['model']['classes'])
86 | cv2.imshow('window', cv2.cvtColor(plt_image, cv2.COLOR_BGR2RGB))
87 |
88 | key = cv2.waitKey(10) & 0xff
89 | if key == 27:
90 | cv2.destroyAllWindows()
91 | break
92 |
93 |
94 | #save images when o or O is pressed
95 | if key == ord('o') or key == ord('O'):
96 | cv2.imwrite(os.path.join(BASE_DIR, 'out', str(frame_num)+".png"), cv2.cvtColor(plt_image, cv2.COLOR_BGR2RGB))
97 | frame_num += 1
98 |
99 |
100 | def main(args):
101 | config_path = args.conf
102 |
103 | with open(config_path) as config_buffer:
104 | config = json.loads(config_buffer.read())
105 |
106 | grab_and_broadcast_screen(config)
107 |
108 |
109 | if __name__ == '__main__':
110 | argparser = argparse.ArgumentParser()
111 |
112 | argparser.add_argument(
113 | '-c',
114 | '--conf',
115 | default='config.json',
116 | help='Path to configuration file')
117 |
118 | args = argparser.parse_args()
119 | main(args)
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # traffic-light-detection-module
2 |
3 | ⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️
4 | # IMPORTANT UPDATE
5 | This repo is deprecated. For a newer and improved model, implemented in PyTorch, please refer to this [repo](https://github.com/filipbasara0/simple-object-detection/blob/main/README.md#carla-traffic-lights).
6 |
7 | ⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️
8 |
9 |
10 | 
11 |
12 | ## About
13 |
14 | Module for detecting traffic lights in the [CARLA autonomous driving simulator](http://carla.org/) (version: 0.8.4).
15 | Built upon and inspired by https://github.com/experiencor/keras-yolo2.
16 | Instructions and more traffic light detection examples can be found below.
17 |
18 | - This module is used along several other [modules](https://github.com/affinis-lab) to implement our version of imitation learning in the CARLA simulator. Results of the [core](https://github.com/affinis-lab/core) module can be found on this [repository](https://github.com/affinis-lab/core)
19 |
20 | - Model for objection detection is based on tiny yolov2
21 |
22 | - Training started with yolov2 coco pretrained weights
23 |
24 | - It was first trained on the LISA traffic light detection dataset (~5800 images), and after that on the dataset collected from the CARLA simulator by myself (~1800 images).
25 |
26 | ## CARLA dataset and model
27 |
28 | - Dataset collected by myself in the CARLA simulator can be found [here](https://drive.google.com/drive/folders/1TXkPLWlNgauPhQnKEoPDZsx7Px1MD9n_?usp=sharing), annotations can be found [here](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/dataset/carla_all.csv).
29 |
30 | - **Important note** - several images in the dataset are left out of annotations because bounding boxes are too small (too far away). I also filtered (left out) all images that have xmax < 15 when loading the dataset. There is around 70-80 out of ~1800 images that are left out, so it isn't that problematic.
31 |
32 | - Pretrained model can be found [here](https://drive.google.com/file/d/1FVb6b6axN2WAYePv0_zLyiWDois7PgMZ/view?usp=sharing).
33 |
34 |
35 | ## Instructions
36 | - To train:
37 | - In the [config](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/config.json) file set _training_ -> _enabled_ to **true**
38 | - Put your annotations file in the **dataset** folder
39 | - In the [config](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/config.json) file _set training_ -> _annot_file_name_ to the name of your annotations file
40 | - Put your images in the **dataset/images** folder
41 | - If necessary, adjust parameters in [config](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/config.json) according to your problem/dataset
42 | - run main.py with **-c config.json**
43 |
44 | - To evaluate:
45 | - In the [config](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/config.json) file set _training_ -> _enabled_ to **false**
46 | - Put your annotations file in the **evaluation** folder
47 | - In the [config](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/config.json) file _set training_ -> _annot_file_name_ to to the name of your annotations file containing images for evaluation
48 | - Put your images in the **evaluation/images** folder
49 | - If necessary, adjust parameters in [config](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/config.json) according to your problem/dataset
50 | - run main.py with **-c config.json**
51 |
52 | - To generate anchors:
53 | - run generate_anchors.py with **-c config.json**
54 |
55 | - Soon to be added:
56 | - Real time traffic light detecting gifs
57 |
58 | ## Examples
59 | - Several examples of predictions, more can be found in the [out folder](https://github.com/affinis-lab/traffic-light-detection-module/tree/master/out)
60 |
61 | 
62 | 
63 | 
64 | 
65 | 
66 | 
67 | 
68 |
--------------------------------------------------------------------------------
/generate_anchors.py:
--------------------------------------------------------------------------------
1 | import random
2 | import argparse
3 | import numpy as np
4 | import cv2
5 | import os
6 | import json
7 |
8 | from preprocessing import load_carla_data
9 |
10 |
11 | argparser = argparse.ArgumentParser()
12 |
13 | argparser.add_argument(
14 | '-c',
15 | '--conf',
16 | default='config.json',
17 | help='path to configuration file')
18 |
19 | argparser.add_argument(
20 | '-a',
21 | '--anchors',
22 | default=2,
23 | help='number of anchors to use')
24 |
25 |
26 | BASE_DIR = os.path.dirname(__file__)
27 | IMAGES_DIR = os.path.join(BASE_DIR, 'dataset', 'images')
28 |
29 |
30 | def IOU(ann, centroids):
31 | w, h = ann
32 | similarities = []
33 |
34 | for centroid in centroids:
35 | c_w, c_h = centroid
36 |
37 | if c_w >= w and c_h >= h:
38 | similarity = w*h/(c_w*c_h)
39 | elif c_w >= w and c_h <= h:
40 | similarity = w*c_h/(w*h + (c_w-w)*c_h)
41 | elif c_w <= w and c_h >= h:
42 | similarity = c_w*h/(w*h + c_w*(c_h-h))
43 | else: #means both w,h are bigger than c_w and c_h respectively
44 | similarity = (c_w*c_h)/(w*h)
45 | similarities.append(similarity) # will become (k,) shape
46 |
47 | return np.array(similarities)
48 |
49 |
50 | def avg_IOU(anns, centroids):
51 | n,d = anns.shape
52 | sum = 0.
53 |
54 | for i in range(anns.shape[0]):
55 | sum+= max(IOU(anns[i], centroids))
56 |
57 | return sum/n
58 |
59 |
60 | def print_anchors(centroids):
61 | anchors = centroids.copy()
62 |
63 | widths = anchors[:, 0]
64 | sorted_indices = np.argsort(widths)
65 |
66 | r = "anchors: ["
67 | for i in sorted_indices[:-1]:
68 | r += '%0.2f,%0.2f, ' % (anchors[i,0], anchors[i,1])
69 |
70 | r += '%0.2f,%0.2f' % (anchors[sorted_indices[-1:],0], anchors[sorted_indices[-1:],1])
71 | r += "]"
72 |
73 | print(r)
74 |
75 |
76 | def run_kmeans(ann_dims, anchor_num):
77 | ann_num = ann_dims.shape[0]
78 | prev_assignments = np.ones(ann_num)*(-1)
79 | iterations = 0
80 | old_distances = np.zeros((ann_num, anchor_num))
81 |
82 | indices = [random.randrange(ann_dims.shape[0]) for i in range(anchor_num)]
83 | centroids = ann_dims[indices]
84 | anchor_dim = ann_dims.shape[1]
85 |
86 | while True:
87 | distances = []
88 | iterations += 1
89 | for i in range(ann_num):
90 | d = 1 - IOU(ann_dims[i], centroids)
91 | distances.append(d)
92 | distances = np.array(distances) # distances.shape = (ann_num, anchor_num)
93 |
94 | print("iteration {}: dists = {}".format(iterations, np.sum(np.abs(old_distances-distances))))
95 |
96 | #assign samples to centroids
97 | assignments = np.argmin(distances,axis=1)
98 |
99 | if (assignments == prev_assignments).all():
100 | return centroids
101 |
102 | #calculate new centroids
103 | centroid_sums=np.zeros((anchor_num, anchor_dim), np.float)
104 | for i in range(ann_num):
105 | centroid_sums[assignments[i]]+=ann_dims[i]
106 | for j in range(anchor_num):
107 | centroids[j] = centroid_sums[j]/(np.sum(assignments==j) + 1e-6)
108 |
109 | prev_assignments = assignments.copy()
110 | old_distances = distances.copy()
111 |
112 |
113 | def main(args):
114 | config_path = args.conf
115 | num_anchors = args.anchors
116 |
117 | with open(config_path) as config_buffer:
118 | config = json.loads(config_buffer.read())
119 |
120 | data = load_carla_data(os.path.join(BASE_DIR, 'dataset',config['train']['annot_file_name']), config['model']['classes'])
121 |
122 | print(len(data))
123 |
124 | np.random.shuffle(data)
125 |
126 | grid_w = config['model']['grid_w']
127 | grid_h = config['model']['grid_h']
128 |
129 | # run k_mean to find the anchors
130 | annotation_dims = []
131 | for image in data:
132 | img = cv2.imread(os.path.join(IMAGES_DIR,image['image_path']))
133 | h, w, c = img.shape
134 | cell_w = w/grid_w
135 | cell_h = h/grid_h
136 |
137 | for obj in image['object']:
138 | relative_w = (float(obj['xmax']) - float(obj['xmin']))/cell_w
139 | relatice_h = (float(obj["ymax"]) - float(obj['ymin']))/cell_h
140 | annotation_dims.append(tuple(map(float, (relative_w,relatice_h))))
141 |
142 | annotation_dims = np.array(annotation_dims)
143 | centroids = run_kmeans(annotation_dims, num_anchors)
144 |
145 | # write anchors to file
146 | print('\naverage IOU for', num_anchors, 'anchors:', '%0.2f' % avg_IOU(annotation_dims, centroids))
147 | print_anchors(centroids)
148 |
149 |
150 | if __name__ == '__main__':
151 | args = argparser.parse_args()
152 | main(args)
--------------------------------------------------------------------------------
/postprocessing.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 |
4 |
5 | def bbox_iou(box1, box2):
6 | intersect_w = interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
7 | intersect_h = interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
8 |
9 | intersect = intersect_w * intersect_h
10 |
11 | w1, h1 = box1.xmax - box1.xmin, box1.ymax - box1.ymin
12 | w2, h2 = box2.xmax - box2.xmin, box2.ymax - box2.ymin
13 |
14 | union = w1 * h1 + w2 * h2 - intersect
15 |
16 | return float(intersect) / union
17 |
18 |
19 | def decode_netout(netout, anchors, nb_class, obj_threshold=0.3, nms_threshold=0.3):
20 | grid_h, grid_w, nb_box = netout.shape[:3]
21 | #grid_h, grid_w, nb_box = config['models']['traffic_light_module']['grid_h'], GRID_W, BOX
22 | boxes = []
23 |
24 | # decode the output by the network
25 | netout[..., 4] = _sigmoid(netout[..., 4])
26 | netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])
27 | netout[..., 5:] *= netout[..., 5:] > obj_threshold
28 |
29 | for row in range(grid_h):
30 | for col in range(grid_w):
31 | for b in range(nb_box):
32 | # from 4th element onwards are confidence and class classes
33 | classes = netout[row, col, b, 5:]
34 |
35 | if np.sum(classes) > 0:
36 | # first 4 elements are x, y, w, and h
37 | x, y, w, h = netout[row, col, b, :4]
38 |
39 | x = (col + _sigmoid(x)) / grid_w # center position, unit: image width
40 | y = (row + _sigmoid(y)) / grid_h # center position, unit: image height
41 |
42 | w = anchors[2 * b + 0] * np.exp(w) / grid_w # unit: image width
43 | h = anchors[2 * b + 1] * np.exp(h) / grid_h # unit: image height
44 | confidence = netout[row, col, b, 4]
45 |
46 | box = BoundBox(x - w / 2, y - h / 2, x + w / 2, y + h / 2, confidence, classes)
47 |
48 | boxes.append(box)
49 |
50 | # suppress non-maximal boxes
51 | for c in range(nb_class):
52 | sorted_indices = list(reversed(np.argsort([box.classes[c] for box in boxes])))
53 |
54 | for i in range(len(sorted_indices)):
55 | index_i = sorted_indices[i]
56 |
57 | if boxes[index_i].classes[c] == 0:
58 | continue
59 | else:
60 | for j in range(i + 1, len(sorted_indices)):
61 | index_j = sorted_indices[j]
62 |
63 | if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_threshold:
64 | boxes[index_j].classes[c] = 0
65 |
66 | # remove the boxes which are less likely than a obj_threshold
67 | boxes = [box for box in boxes if box.get_score() > obj_threshold]
68 |
69 | boxes = sorted(boxes, key=lambda box: box.get_score(), reverse=True)
70 | if len(boxes) > 0: boxes = [boxes[0]]
71 | return boxes
72 |
73 |
74 | def draw_boxes(image, boxes, labels):
75 | image_h, image_w, _ = image.shape
76 |
77 | for box in boxes:
78 |
79 | if box.xmin>image_w or box.xmax>image_w or box.ymin>image_h or box.ymax>image_h:
80 | continue
81 |
82 | if (box.xmin<0): box.xmin = 0
83 | if (box.ymin < 0): box.ymin = 0
84 | if (box.xmax < 0): box.xmax = 0
85 | if (box.ymax < 0): box.ymax = 0
86 |
87 | xmin = int(box.xmin * image_w)
88 | ymin = int(box.ymin * image_h)
89 | xmax = int(box.xmax * image_w)
90 | ymax = int(box.ymax * image_h)
91 |
92 | label = labels[box.get_label()]
93 |
94 | r_color = 0
95 | g_color = 0
96 |
97 | if label== "go":
98 | g_color = 255
99 | else:
100 | r_color = 255
101 |
102 | cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, g_color, r_color), 3)
103 | cv2.putText(image,
104 | label + ' ' + str(round(box.get_score(),4)),
105 | (xmin, ymin - 13),
106 | cv2.FONT_HERSHEY_SIMPLEX,
107 | 1e-3 * image_h,
108 | (0, g_color, r_color), 2)
109 |
110 | return image
111 |
112 |
113 | def compute_overlap(a, b):
114 | """
115 | Code originally from https://github.com/rbgirshick/py-faster-rcnn.
116 | Parameters
117 | ----------
118 | a: (N, 4) ndarray of float
119 | b: (K, 4) ndarray of float
120 | Returns
121 | -------
122 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes
123 | """
124 | area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
125 |
126 | iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0])
127 | ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1])
128 |
129 | iw = np.maximum(iw, 0)
130 | ih = np.maximum(ih, 0)
131 |
132 | ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih
133 |
134 | ua = np.maximum(ua, np.finfo(float).eps)
135 |
136 | intersection = iw * ih
137 |
138 | return intersection / ua
139 |
140 |
141 | def interval_overlap(interval_a, interval_b):
142 | x1, x2 = interval_a
143 | x3, x4 = interval_b
144 |
145 | if x3 < x1:
146 | if x4 < x1:
147 | return 0
148 | else:
149 | return min(x2, x4) - x1
150 | else:
151 | if x2 < x3:
152 | return 0
153 | else:
154 | return min(x2, x4) - x3
155 |
156 |
157 | def compute_ap(recall, precision):
158 | """ Compute the average precision, given the recall and precision curves.
159 | Code originally from https://github.com/rbgirshick/py-faster-rcnn.
160 | # Arguments
161 | recall: The recall curve (list).
162 | precision: The precision curve (list).
163 | # Returns
164 | The average precision as computed in py-faster-rcnn.
165 | """
166 | # correct AP calculation
167 | # first append sentinel values at the end
168 | mrec = np.concatenate(([0.], recall, [1.]))
169 | mpre = np.concatenate(([0.], precision, [0.]))
170 |
171 | # compute the precision envelope
172 | for i in range(mpre.size - 1, 0, -1):
173 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
174 |
175 | # to calculate area under PR curve, look for points
176 | # where X axis (recall) changes value
177 | i = np.where(mrec[1:] != mrec[:-1])[0]
178 |
179 | # and sum (\Delta recall) * prec
180 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
181 | return ap
182 |
183 |
184 | def _sigmoid(x):
185 | return 1. / (1. + np.exp(-x))
186 |
187 |
188 | def _softmax(x, axis=-1, t=-100.):
189 | x = x - np.max(x)
190 |
191 | if np.min(x) < t:
192 | x = x / np.min(x) * t
193 |
194 | e_x = np.exp(x)
195 |
196 | return e_x / e_x.sum(axis, keepdims=True)
197 |
198 |
199 | class BoundBox:
200 | def __init__(self, xmin, ymin, xmax, ymax, c=None, classes=None):
201 | self.xmin = xmin
202 | self.ymin = ymin
203 | self.xmax = xmax
204 | self.ymax = ymax
205 |
206 | self.c = c
207 | self.classes = classes
208 |
209 | self.label = -1
210 | self.score = -1
211 |
212 | def get_label(self):
213 | if self.label == -1:
214 | self.label = np.argmax(self.classes)
215 |
216 | return self.label
217 |
218 | def get_score(self):
219 | if self.score == -1:
220 | self.score = self.classes[self.get_label()]
221 |
222 | return self.score
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import keras
3 | import cv2
4 | import copy
5 | import os
6 | from imgaug import augmenters as iaa
7 | from sklearn.preprocessing import LabelEncoder
8 |
9 | from postprocessing import interval_overlap
10 |
11 |
12 | BASE_DIR = os.path.dirname(__file__)
13 | IMAGES_DIR = os.path.join(BASE_DIR, 'dataset', 'images')
14 |
15 |
16 | def bbox_iou(box1, box2):
17 | # 0 ,1 ,2 ,3
18 | # xmin,ymin,xmax,ymax
19 | intersect_w = interval_overlap([box1[0], box1[2]], [box2[0], box2[2]])
20 | intersect_h = interval_overlap([box1[1], box1[3]], [box2[1], box2[3]])
21 |
22 | intersect = intersect_w * intersect_h
23 |
24 | w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
25 | w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
26 |
27 | union = w1 * h1 + w2 * h2 - intersect
28 |
29 | return float(intersect) / union
30 |
31 |
32 | class BatchGenerator(keras.utils.Sequence):
33 | 'Generates data for Keras'
34 | def __init__(self, config, dataset, shuffle=True, jitter = True):
35 | 'Initialization'
36 | self.config = config
37 | self.dataset = dataset
38 |
39 | self.image_h = config['model']['image_h']
40 | self.image_w = config['model']['image_w']
41 | self.n_channels = 3
42 |
43 | self.grid_h = config['model']['grid_h']
44 | self.grid_w = config['model']['grid_w']
45 |
46 | self.n_classes = config['model']['num_classes']
47 | self.labels = config['model']['classes']
48 |
49 | self.batch_size = config['train']['batch_size']
50 | self.max_obj = config['model']['max_obj']
51 |
52 | self.shuffle = shuffle
53 | self.jitter = jitter
54 |
55 | self.nb_anchors = int(len(config['model']['anchors']) / 2)
56 |
57 | self.anchors = [[0, 0, config['model']['anchors'][2 * i], config['model']['anchors'][2 * i + 1]] for i in
58 | range(int(len(config['model']['anchors']) // 2))]
59 |
60 | self.on_epoch_end()
61 |
62 | sometimes = lambda aug: iaa.Sometimes(0.5, aug)
63 |
64 | self.aug_pipe = iaa.Sequential(
65 | [
66 | # apply the following augmenters to most images
67 | # iaa.Fliplr(0.5), # horizontally flip 50% of all images
68 | # iaa.Flipud(0.2), # vertically flip 20% of all images
69 | # sometimes(iaa.Crop(percent=(0, 0.1))), # crop images by 0-10% of their height/width
70 | #sometimes(iaa.Affine(
71 | # scale={"x": (0.8, 1.2), "y": (0.8, 1.2)}, # scale images to 80-120% of their size, individually per axis
72 | # translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}, # translate by -20 to +20 percent (per axis)
73 | # rotate=(-5, 5), # rotate by -45 to +45 degrees
74 | # shear=(-5, 5), # shear by -16 to +16 degrees
75 | # order=[0, 1], # use nearest neighbour or bilinear interpolation (fast)
76 | # cval=(0, 255), # if mode is constant, use a cval between 0 and 255
77 | # mode=ia.ALL # use any of scikit-image's warping modes (see 2nd image from the top for examples)
78 | #)),
79 | # execute 0 to 5 of the following (less important) augmenters per image
80 | # don't execute all of them, as that would often be way too strong
81 | iaa.SomeOf((0, 3),
82 | [
83 | # sometimes(iaa.Superpixels(p_replace=(0, 1.0), n_segments=(20, 200))), # convert images into their superpixel representation
84 | iaa.OneOf([
85 | iaa.GaussianBlur((0, 3.0)), # blur images with a sigma between 0 and 3.0
86 | iaa.AverageBlur(k=(2, 7)),
87 | # blur image using local means with kernel sizes between 2 and 7
88 | iaa.MedianBlur(k=(3, 11)),
89 | # blur image using local medians with kernel sizes between 2 and 7
90 | ]),
91 | iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)), # sharpen images
92 | # iaa.Emboss(alpha=(0, 1.0), strength=(0, 2.0)), # emboss images
93 | # search either for all edges or for directed edges
94 | # sometimes(iaa.OneOf([
95 | # iaa.EdgeDetect(alpha=(0, 0.7)),
96 | # iaa.DirectedEdgeDetect(alpha=(0, 0.7), direction=(0.0, 1.0)),
97 | # ])),
98 | iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05 * 255), per_channel=0.5),
99 | # add gaussian noise to images
100 | iaa.OneOf([
101 | iaa.Dropout((0.01, 0.1), per_channel=0.5), # randomly remove up to 10% of the pixels
102 | # iaa.CoarseDropout((0.03, 0.15), size_percent=(0.02, 0.05), per_channel=0.2),
103 | ]),
104 | # iaa.Invert(0.05, per_channel=True), # invert color channels
105 | iaa.Add((-10, 10), per_channel=0.5),
106 | # change brightness of images (by -10 to 10 of original value)
107 | iaa.Multiply((0.5, 1.5), per_channel=0.5),
108 | # change brightness of images (50-150% of original value)
109 | iaa.ContrastNormalization((0.5, 2.0), per_channel=0.5), # improve or worsen the contrast
110 | # iaa.Grayscale(alpha=(0.0, 1.0)),
111 | # sometimes(iaa.ElasticTransformation(alpha=(0.5, 3.5), sigma=0.25)), # move pixels locally around (with random strengths)
112 | # sometimes(iaa.PiecewiseAffine(scale=(0.01, 0.05))) # sometimes move parts of the image around
113 | ],
114 | random_order=True
115 | )
116 | ],
117 | random_order=True
118 | )
119 |
120 |
121 | def __len__(self):
122 | 'Denotes the number of batches per epoch'
123 | return int(np.ceil(float(len(self.dataset)) / self.batch_size))
124 |
125 |
126 | def __getitem__(self, index):
127 | 'Generate one batch of data'
128 |
129 | '''
130 | l_bound = index*self.config['BATCH_SIZE']
131 | r_bound = (index+1)*self.config['BATCH_SIZE']
132 |
133 | if r_bound > len(self.images):
134 | r_bound = len(self.images)
135 | l_bound = r_bound - self.config['BATCH_SIZE']
136 | '''
137 |
138 | le = LabelEncoder()
139 | le.fit_transform(self.labels)
140 |
141 | x_batch = np.zeros((self.batch_size, self.image_h, self.image_w, self.n_channels))
142 | b_batch = np.zeros((self.batch_size, 1, 1, 1, self.max_obj, 4))
143 |
144 | y_batch = np.zeros((self.batch_size, self.grid_h, self.grid_w, self.nb_anchors, 4 + 1 + self.num_classes())) # desired network output
145 |
146 | #current_batch = self.dataset[l_bound:r_bound]
147 | current_batch = self.dataset[index * self.batch_size:(index + 1) * self.batch_size]
148 |
149 | instance_num = 0
150 |
151 | for instance in current_batch:
152 | img, object_annotations = self.prep_image_and_annot(instance, jitter=self.jitter)
153 |
154 | obj_num = 0
155 |
156 | # center of the bounding box is divided with the image width/height and grid width/height
157 | # to get the coordinates relative to a single element of a grid
158 | for obj in object_annotations:
159 | if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj['ymin'] and obj['class'] in self.labels:
160 | center_x = .5 * (obj['xmin'] + obj['xmax']) # center of the lower side of the bb (by x axis)
161 | center_x = center_x / (float(self.image_w) / self.grid_w) # scaled to the grid unit (a value between 0 and GRID_W-1)
162 | center_y = .5 * (obj['ymin'] + obj['ymax']) # center of the lower side (by y axis)
163 | center_y = center_y / (float(self.image_h) / self.grid_h) # scaled to the grid unit (a value between 0 and GRID_H-1)
164 |
165 | grid_x = int(np.floor(center_x)) # assigns the object to the matching
166 | grid_y = int(np.floor(center_y)) # grid element according to (center_x, center_y)
167 |
168 | if grid_x < self.grid_w and grid_y < self.grid_h:
169 | center_w = (obj['xmax'] - obj['xmin']) / (float(self.image_w) / self.grid_w)
170 | center_h = (obj['ymax'] - obj['ymin']) / (float(self.image_h) / self.grid_h)
171 |
172 | box = [center_x, center_y, center_w, center_h]
173 |
174 | # find the anchor that best predicts this box
175 | best_anchor = -1
176 | max_iou = -1
177 |
178 | shifted_box = [0, 0, center_w, center_h]
179 |
180 | for i in range(len(self.anchors)):
181 | anchor = self.anchors[i]
182 | iou = bbox_iou(shifted_box, anchor)
183 |
184 | if max_iou < iou:
185 | best_anchor = i
186 | max_iou = iou
187 |
188 | classes = [0, 0]
189 |
190 | obj_label = int(le.transform([obj['class']]))
191 |
192 | if obj_label == 0:
193 | classes[0] = 1
194 | else:
195 | classes[1] = 1
196 |
197 | img = self.normalize(img)
198 |
199 | x_batch[instance_num] = img
200 |
201 | b_batch[instance_num, 0, 0, 0, obj_num] = box
202 | y_batch[instance_num, grid_y, grid_x, best_anchor] = [box[0], box[1], box[2], box[3], 1.0, classes[0], classes[1]]
203 |
204 | obj_num += 1
205 | obj_num %= self.max_obj
206 |
207 | instance_num += 1
208 |
209 | return [x_batch, b_batch], y_batch
210 |
211 |
212 | def prep_image_and_annot(self, dataset_instance, jitter):
213 | image_path = dataset_instance['image_path']
214 | image = self.load_image(os.path.join(IMAGES_DIR,image_path))
215 |
216 | h, w, c = image.shape
217 |
218 | if jitter:
219 | image = self.aug_pipe.augment_image(image)
220 |
221 | # resize the image to standard size
222 | image = cv2.resize(image, (self.image_h, self.image_w))
223 |
224 | object_annotations = copy.deepcopy(dataset_instance['object'])
225 | for obj in object_annotations:
226 | for attr in ['xmin', 'xmax']:
227 | obj[attr] = int(obj[attr] * float(self.image_w) / w)
228 | obj[attr] = max(min(obj[attr], self.image_w), 0)
229 |
230 | for attr in ['ymin', 'ymax']:
231 | obj[attr] = int(obj[attr] * float(self.image_h) / h)
232 | obj[attr] = max(min(obj[attr], self.image_h), 0)
233 |
234 | return image, object_annotations
235 |
236 |
237 | def on_epoch_end(self):
238 | 'Updates indexes after each epoch'
239 | if self.shuffle:
240 | np.random.shuffle(self.dataset)
241 |
242 |
243 | def load_image(self, path):
244 | img = cv2.imread(os.path.join(IMAGES_DIR, path))
245 |
246 | try:
247 | if len(img.shape) == 3:
248 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
249 |
250 | else:
251 | img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
252 | except:
253 | print(path)
254 |
255 | return img
256 |
257 |
258 | def load_annotation(self, i):
259 | annots = []
260 |
261 | for obj in self.dataset[i]['object']:
262 | annot = [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax'], self.labels.index(obj['class'])]
263 | annots += [annot]
264 |
265 | if len(annots) == 0: annots = [[]]
266 |
267 | return np.array(annots)
268 |
269 |
270 | def normalize(self, image):
271 | return image/255.
272 |
273 |
274 | def num_classes(self):
275 | return len(self.labels)
276 |
277 |
278 | def size(self):
279 | return len(self.dataset)
--------------------------------------------------------------------------------
/yolo.py:
--------------------------------------------------------------------------------
1 | from keras.models import Model, load_model
2 | from keras.layers import Reshape, Lambda, Conv2D, Input, MaxPooling2D, BatchNormalization
3 | from keras.layers.advanced_activations import LeakyReLU
4 | from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard, Callback
5 | from keras.optimizers import SGD, Adam, RMSprop
6 | import tensorflow as tf
7 | import os
8 | import numpy as np
9 |
10 | from postprocessing import decode_netout, interval_overlap, compute_overlap, compute_ap
11 | from preprocessing import load_image_predict, load_carla_data
12 | from utils import BatchGenerator
13 |
14 |
15 | BASE_DIR = os.path.dirname(__file__)
16 | ANNOT_DIR = os.path.join(BASE_DIR, 'dataset')
17 |
18 |
19 | class TinyYoloFeature:
20 | """Tiny yolo feature extractor"""
21 | def __init__(self, input_size, config):
22 | input_image = Input(shape=(input_size, input_size, 3))
23 |
24 | # Layer 1
25 | x = Conv2D(16, (3,3), strides=(1,1), padding='same', name='conv_1', use_bias=False)(input_image)
26 | x = BatchNormalization(name='norm_1')(x)
27 | x = LeakyReLU(alpha=0.1)(x)
28 | x = MaxPooling2D(pool_size=(2, 2))(x)
29 |
30 | # Layer 2 - 5
31 | for i in range(0,4):
32 | x = Conv2D(32*(2**i), (3,3), strides=(1,1), padding='same', name='conv_' + str(i+2), use_bias=False)(x)
33 | x = BatchNormalization(name='norm_' + str(i+2))(x)
34 | x = LeakyReLU(alpha=0.1)(x)
35 | x = MaxPooling2D(pool_size=(2, 2))(x)
36 |
37 | # Layer 6
38 | x = Conv2D(512, (3,3), strides=(1,1), padding='same', name='conv_6', use_bias=False)(x)
39 | x = BatchNormalization(name='norm_6')(x)
40 | x = LeakyReLU(alpha=0.1)(x)
41 | x = MaxPooling2D(pool_size=(2, 2), strides=(1,1), padding='same')(x)
42 |
43 | # Layer 7
44 | x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_' + str(7), use_bias=False)(x)
45 | x = BatchNormalization(name='norm_' + str(7))(x)
46 | x = LeakyReLU(alpha=0.1)(x)
47 |
48 | # Layer 8
49 | x = Conv2D(512, (3, 3), strides=(1, 1), padding='same', name='conv_' + str(8), use_bias=False)(x)
50 | x = BatchNormalization(name='norm_' + str(8))(x)
51 | x = LeakyReLU(alpha=0.1, name = 'last')(x)
52 |
53 | self.feature_extractor = Model(input_image, x)
54 |
55 |
56 | pretrained = load_model('checkpoints\\' + config['model']['saved_model_name'], custom_objects={'custom_loss': dummy_loss, 'tf': tf})
57 | pretrained = pretrained.get_layer('model_1')
58 |
59 | idx = 0
60 | for layer in self.feature_extractor.layers:
61 | print(layer.name)
62 | layer.set_weights(pretrained.get_layer(index=idx).get_weights())
63 | idx += 1
64 |
65 | frozen = [1, 2, 3, 4, 5, 6, 7]
66 |
67 | for l in frozen:
68 | self.feature_extractor.get_layer("conv_" + str(l)).trainable = False
69 | self.feature_extractor.get_layer("norm_" + str(l)).trainable = False
70 |
71 | self.feature_extractor.summary()
72 |
73 |
74 | class YOLO(object):
75 | def __init__(self, config):
76 |
77 | self.config = config
78 |
79 | self.image_h = config['model']['image_h']
80 | self.image_w = config['model']['image_w']
81 |
82 | self.grid_h, self.grid_w = config['model']['grid_h'], config['model']['grid_w']
83 |
84 | self.labels = config['model']['classes']
85 | self.nb_class = int(len(self.labels))
86 | self.nb_box = int(len(config['model']['anchors'])/2)
87 | self.class_wt = np.ones(self.nb_class, dtype='float32')
88 | self.anchors = config['model']['anchors']
89 |
90 | self.max_box_per_image = config['model']['max_obj']
91 | self.batch_size = config['train']['batch_size']
92 |
93 | self.object_scale = config['model']['obj_scale']
94 | self.no_object_scale = config['model']['no_obj_scale']
95 | self.coord_scale = config['model']['coord_scale']
96 | self.class_scale = config['model']['class_scale']
97 |
98 | self.obj_thresh = config['model']['obj_thresh']
99 | self.nms_thresh = config['model']['nms_thresh']
100 |
101 | self.warmup_batches = config['train']['warmup_batches']
102 | self.debug = config['train']['debug']
103 |
104 | ##########################
105 | # Make the model
106 | ##########################
107 |
108 | # make the feature extractor layers
109 | input_image = Input(shape=(self.image_h, self.image_w, 3))
110 | self.true_boxes = Input(shape=(1, 1, 1, self.max_box_per_image, 4))
111 |
112 | self.feature_extractor = TinyYoloFeature(self.image_h, config).feature_extractor
113 | features = self.feature_extractor(input_image)
114 |
115 | # Object detection layer
116 | output = Conv2D(self.nb_box * (4 + 1 + self.nb_class),
117 | (1, 1), strides=(1, 1),
118 | padding='same',
119 | name='DetectionLayer',
120 | activation='linear',
121 | kernel_initializer='lecun_normal')(features)
122 |
123 | output = Reshape((self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class))(output)
124 | output = Lambda(lambda args: args[0])([output, self.true_boxes])
125 |
126 | self.model = Model([input_image, self.true_boxes], output)
127 | self.model.summary()
128 |
129 | pretrained = load_model('checkpoints\\' + config['model']['saved_model_name'], custom_objects={'custom_loss': self.custom_loss, 'tf': tf})
130 | self.model.get_layer('DetectionLayer').set_weights(
131 | pretrained.get_layer('DetectionLayer').get_weights())
132 |
133 |
134 | def load_weights(self, model_path):
135 | model = load_model(model_path, custom_objects={'custom_loss': self.custom_loss, 'tf': tf})
136 |
137 | idx = 0
138 | for layer in self.model.layers:
139 | layer.set_weights(model.get_layer(index=idx).get_weights())
140 | idx += 1
141 |
142 |
143 | def predict(self, image_path):
144 | image = load_image_predict(image_path, self.image_h, self.image_w)
145 |
146 | dummy_array = np.zeros((1, 1, 1, 1, self.max_box_per_image, 4))
147 | netout = self.model.predict([image, dummy_array])[0]
148 |
149 | boxes = decode_netout(netout=netout, anchors = self.anchors, nb_class=self.nb_class,
150 | obj_threshold=self.obj_thresh, nms_threshold=self.nms_thresh)
151 | return boxes
152 |
153 |
154 | def train(self):
155 | data = load_carla_data(os.path.join(ANNOT_DIR, self.config['train']['annot_file_name']), self.config['model']['classes'])
156 |
157 | np.random.shuffle(data)
158 |
159 | train_instances, validation_instances = data[:1655], data[1655:]
160 |
161 | np.random.shuffle(train_instances)
162 | np.random.shuffle(validation_instances)
163 |
164 | train_generator = BatchGenerator(self.config, train_instances, jitter=True)
165 | validation_generator = BatchGenerator(self.config, validation_instances, jitter=False)
166 |
167 | checkpoint = ModelCheckpoint(
168 | 'checkpoints\\model.{epoch:02d}-{val_loss:.2f}.h5',
169 | monitor='val_loss',
170 | verbose=1,
171 | save_best_only=True,
172 | mode='auto',
173 | period=1
174 | )
175 |
176 | checkpoint_all = ModelCheckpoint(
177 | 'checkpoints\\all_models.{epoch:02d}-{loss:.2f}.h5',
178 | monitor='loss',
179 | verbose=1,
180 | save_best_only=True,
181 | mode='auto',
182 | period=1
183 | )
184 |
185 | # optimizer = RMSprop(lr=1e-3,rho=0.9, epsilon=1e-08, decay=0.0)
186 | # optimizer = Adam(lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
187 | optimizer = SGD(lr=1e-5, momentum=0.9, decay=0.0005)
188 |
189 | self.model.compile(loss=self.custom_loss, optimizer=optimizer) #, metrics=['accuracy'])
190 |
191 | self.model.summary()
192 |
193 | history = self.model.fit_generator(generator=train_generator,
194 | steps_per_epoch=len(train_generator),
195 | epochs=self.config['train']['nb_epochs'],
196 | verbose=1,
197 | validation_data=validation_generator,
198 | validation_steps=len(validation_generator),
199 | callbacks=[checkpoint, checkpoint_all],# map_evaluator_cb], # checkpoint, tensorboard
200 | max_queue_size=10,
201 | workers=3
202 | )
203 |
204 |
205 | def evaluate(self):
206 | data = load_carla_data(os.path.join(ANNOT_DIR, self.config['train']['annot_file_name']),
207 | self.config['model']['classes'])
208 |
209 | np.random.shuffle(data)
210 |
211 | validation_instances = data#[1400:]
212 |
213 | validation_generator = BatchGenerator(self.config, validation_instances, jitter=False)
214 |
215 | map_evaluator_cb = self.MAP_evaluation(self, validation_generator,
216 | save_best=True,
217 | save_name='checkpoints\\best-mAP.h5',
218 | # os.path.join(BASE_DIR,'best_mAP\\weights.{epoch:02d}-{val_loss:.2f}.h5'),
219 | tensorboard=None,
220 | iou_threshold=0.4)
221 |
222 | self.model.compile(loss=self.custom_loss, optimizer=SGD(lr=1e-5, momentum=0.9, decay=0.0005))
223 |
224 | self.model.summary()
225 |
226 | history = self.model.fit_generator(generator=validation_generator,
227 | steps_per_epoch=len(validation_generator),
228 | epochs=1,
229 | verbose=1,
230 | callbacks=[map_evaluator_cb]
231 | )
232 |
233 |
234 | def normalize(self, image):
235 | return image / 255.
236 |
237 |
238 | def custom_loss(self, y_true, y_pred):
239 | mask_shape = tf.shape(y_true)[:4]
240 |
241 | cell_x = tf.to_float(
242 | tf.reshape(tf.tile(tf.range(self.grid_w), [self.grid_h]), (1, self.grid_h, self.grid_w, 1, 1)))
243 | cell_y = tf.transpose(cell_x, (0, 2, 1, 3, 4))
244 |
245 | cell_grid = tf.tile(tf.concat([cell_x, cell_y], -1), [self.batch_size, 1, 1, self.nb_box, 1])
246 |
247 | coord_mask = tf.zeros(mask_shape)
248 | conf_mask = tf.zeros(mask_shape)
249 | class_mask = tf.zeros(mask_shape)
250 |
251 | seen = tf.Variable(0.)
252 | total_loss = tf.Variable(0.)
253 | total_recall = tf.Variable(0.)
254 | total_boxes = tf.Variable(self.grid_h*self.grid_w*self.nb_box*self.batch_size)
255 |
256 | """
257 | Adjust prediction
258 | """
259 | ### adjust x and y
260 | pred_box_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid
261 |
262 | ### adjust w and h tf.exp(
263 | pred_box_wh = tf.exp(y_pred[..., 2:4]) * np.reshape(self.anchors, [1, 1, 1, self.nb_box, 2])
264 |
265 | ### adjust confidence
266 | pred_box_conf = tf.sigmoid(y_pred[..., 4])
267 |
268 | ### adjust class probabilities
269 | pred_box_class = y_pred[..., 5:]
270 |
271 | """
272 | Adjust ground truth
273 | """
274 | ### adjust x and y
275 | true_box_xy = y_true[..., 0:2] # relative position to the containing cell
276 |
277 | ### adjust w and h
278 | true_box_wh = y_true[..., 2:4] # number of cells accross, horizontally and vertically
279 |
280 | ### adjust confidence
281 | true_wh_half = true_box_wh / 2.
282 | true_mins = true_box_xy - true_wh_half
283 | true_maxes = true_box_xy + true_wh_half
284 |
285 | pred_wh_half = pred_box_wh / 2.
286 | pred_mins = pred_box_xy - pred_wh_half
287 | pred_maxes = pred_box_xy + pred_wh_half
288 |
289 | intersect_mins = tf.maximum(pred_mins, true_mins)
290 | intersect_maxes = tf.minimum(pred_maxes, true_maxes)
291 | intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
292 | intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
293 |
294 | true_areas = true_box_wh[..., 0] * true_box_wh[..., 1]
295 | pred_areas = pred_box_wh[..., 0] * pred_box_wh[..., 1]
296 |
297 | union_areas = pred_areas + true_areas - intersect_areas
298 | iou_scores = tf.truediv(intersect_areas, union_areas)
299 |
300 | true_box_conf = iou_scores * y_true[..., 4]
301 |
302 | ### adjust class probabilities
303 | true_box_class = tf.argmax(y_true[..., 5:], -1)
304 |
305 | """
306 | Determine the masks
307 | """
308 | ### coordinate mask: simply the position of the ground truth boxes (the predictors)
309 | coord_mask = tf.expand_dims(y_true[..., 4], axis=-1) * self.coord_scale
310 |
311 | ### confidence mask: penelize predictors + penalize boxes with low IOU
312 | # penalize the confidence of the boxes, which have IOU with some ground truth box < 0.6
313 | true_xy = self.true_boxes[..., 0:2]
314 | true_wh = self.true_boxes[..., 2:4]
315 |
316 | true_wh_half = true_wh / 2.
317 | true_mins = true_xy - true_wh_half
318 | true_maxes = true_xy + true_wh_half
319 |
320 | pred_xy = tf.expand_dims(pred_box_xy, 4)
321 | pred_wh = tf.expand_dims(pred_box_wh, 4)
322 |
323 | pred_wh_half = pred_wh / 2.
324 | pred_mins = pred_xy - pred_wh_half
325 | pred_maxes = pred_xy + pred_wh_half
326 |
327 | intersect_mins = tf.maximum(pred_mins, true_mins)
328 | intersect_maxes = tf.minimum(pred_maxes, true_maxes)
329 | intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
330 | intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
331 |
332 | true_areas = true_wh[..., 0] * true_wh[..., 1]
333 | pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
334 |
335 | union_areas = pred_areas + true_areas - intersect_areas
336 | iou_scores = tf.truediv(intersect_areas, union_areas)
337 |
338 | best_ious = tf.reduce_max(iou_scores, axis=4)
339 | #conf_mask = conf_mask + tf.to_float(best_ious < 0.5) * (1 - y_true[..., 4]) * self.no_object_scale
340 |
341 | # penalize the confidence of the boxes, which are reponsible for corresponding ground truth box
342 | #conf_mask = conf_mask + y_true[..., 4] * self.object_scale
343 |
344 | conf_mask_neg = tf.to_float(best_ious < 0.50) * (1 - y_true[..., 4]) * self.no_object_scale
345 | conf_mask_pos = y_true[..., 4] * self.object_scale
346 |
347 | ### class mask: simply the position of the ground truth boxes (the predictors)
348 | class_mask = y_true[..., 4] * tf.gather(self.class_wt, true_box_class) * self.class_scale
349 |
350 | """
351 | Warm-up training
352 | """
353 | no_boxes_mask = tf.to_float(coord_mask < self.coord_scale / 2.)
354 | seen = tf.assign_add(seen, 1.)
355 |
356 | true_box_xy, true_box_wh, coord_mask = tf.cond(tf.less(seen, self.warmup_batches + 1),
357 | lambda: [true_box_xy + (0.5 + cell_grid) * no_boxes_mask,
358 | true_box_wh + tf.ones_like(true_box_wh) * \
359 | np.reshape(self.anchors, [1, 1, 1, self.nb_box, 2]) * \
360 | no_boxes_mask,
361 | tf.ones_like(coord_mask)],
362 | lambda: [true_box_xy,
363 | true_box_wh,
364 | coord_mask])
365 |
366 | """
367 | Finalize the loss
368 | """
369 | nb_coord_box = tf.reduce_sum(tf.to_float(coord_mask > 0.0))
370 | #nb_conf_box = tf.reduce_sum(tf.to_float(conf_mask > 0.0))
371 | nb_conf_box_neg = tf.reduce_sum(tf.to_float(conf_mask_neg > 0.0))
372 | nb_conf_box_pos = tf.subtract(tf.to_float(total_boxes), nb_conf_box_neg) #tf.reduce_sum(tf.to_float(conf_mask_pos > 0.0))
373 | nb_class_box = tf.reduce_sum(tf.to_float(class_mask > 0.0))
374 |
375 | true_box_wh = tf.sqrt(true_box_wh)
376 | pred_box_wh = tf.sqrt(pred_box_wh)
377 |
378 | loss_xy = tf.reduce_sum(tf.square(true_box_xy - pred_box_xy) * coord_mask) / (nb_coord_box + 1e-6) / 2.
379 | loss_wh = tf.reduce_sum(tf.square(true_box_wh - pred_box_wh) * coord_mask) / (nb_coord_box + 1e-6) / 2.
380 | loss_conf_neg = tf.reduce_sum(tf.square(true_box_conf - pred_box_conf) * conf_mask_neg) / (nb_conf_box_neg + 1e-6) / 2.
381 | loss_conf_pos = tf.reduce_sum(tf.square(true_box_conf - pred_box_conf) * conf_mask_pos) / (nb_conf_box_pos + 1e-6) / 2
382 | loss_conf = loss_conf_neg + loss_conf_pos
383 | #loss_conf = tf.reduce_sum(tf.square(true_box_conf - pred_box_conf) * conf_mask) / (nb_conf_box + 1e-6) / 2.
384 | loss_class = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class)
385 | loss_class = tf.reduce_sum(loss_class * class_mask) / (nb_class_box + 1e-6)
386 |
387 | loss = tf.cond(tf.less(seen, self.warmup_batches + 1),
388 | lambda: loss_xy + loss_wh + loss_conf + loss_class + 10,
389 | lambda: loss_xy + loss_wh + loss_conf + loss_class)
390 |
391 | if self.debug:
392 | nb_true_box = tf.reduce_sum(y_true[..., 4])
393 | nb_pred_box = tf.reduce_sum(tf.to_float(true_box_conf > 0.32) * tf.to_float(pred_box_conf > 0.3))
394 |
395 | current_recall = nb_pred_box / (nb_true_box + 1e-6)
396 | total_recall = tf.assign_add(total_recall, current_recall)
397 |
398 | total_loss = tf.assign_add(total_loss, loss)
399 |
400 | loss = tf.Print(loss, [loss_xy], message='\nLoss XY \t', summarize=1000)
401 | loss = tf.Print(loss, [loss_wh], message='Loss WH \t', summarize=1000)
402 | loss = tf.Print(loss, [nb_conf_box_neg], message='Nb Conf Box Negative \t', summarize=1000)
403 | loss = tf.Print(loss, [nb_conf_box_pos], message='Nb Conf Box Positive \t', summarize=1000)
404 | loss = tf.Print(loss, [loss_conf_neg], message='Loss Conf Negative \t', summarize=1000)
405 | loss = tf.Print(loss, [loss_conf_pos], message='Loss Conf Positive \t', summarize=1000)
406 | loss = tf.Print(loss, [loss_conf], message='Loss Conf \t', summarize=1000)
407 | loss = tf.Print(loss, [loss_class], message='Loss Class \t', summarize=1000)
408 | loss = tf.Print(loss, [loss], message='Total Loss \t', summarize=1000)
409 | loss = tf.Print(loss, [total_loss / seen], message='Average Loss \t', summarize=1000)
410 | loss = tf.Print(loss, [nb_pred_box], message='Number of pred boxes \t', summarize=1000)
411 | loss = tf.Print(loss, [nb_true_box], message='Number of true boxes \t', summarize=1000)
412 | loss = tf.Print(loss, [current_recall], message='Current Recall \t', summarize=1000)
413 | loss = tf.Print(loss, [total_recall / seen], message='Average Recall \t', summarize=1000)
414 |
415 |
416 | return loss
417 |
418 |
419 | class MAP_evaluation(Callback):
420 | """ Evaluate a given dataset using a given model.
421 | code originally from https://github.com/fizyr/keras-retinanet
422 | # Arguments
423 | generator : The generator that represents the dataset to evaluate.
424 | model : The model to evaluate.
425 | iou_threshold : The threshold used to consider when a detection is positive or negative.
426 | score_threshold : The score confidence threshold to use for detections.
427 | save_path : The path to save images with visualized detections to.
428 | # Returns
429 | A dict mapping class names to mAP scores.
430 | """
431 |
432 | def __init__(self,
433 | yolo,
434 | generator,
435 | iou_threshold=0.5,
436 | score_threshold=0.3,
437 | save_path=None,
438 | period=1,
439 | save_best=False,
440 | save_name=None,
441 | tensorboard=None):
442 |
443 | self.yolo = yolo
444 | self.generator = generator
445 | self.iou_threshold = iou_threshold
446 | self.save_path = save_path
447 | self.period = period
448 | self.save_best = save_best
449 | self.save_name = save_name
450 | self.tensorboard = tensorboard
451 |
452 | self.bestMap = 0
453 |
454 | self.model = self.yolo.model
455 |
456 | if not isinstance(self.tensorboard, TensorBoard) and self.tensorboard is not None:
457 | raise ValueError("Tensorboard object must be a instance from keras.callbacks.TensorBoard")
458 |
459 |
460 | def on_epoch_end(self, epoch, logs={}):
461 | print(epoch)
462 | #% self.period == 0 and self.period != 0:
463 | mAP, average_precisions = self.evaluate_mAP()
464 | print('\n')
465 | for label, average_precision in average_precisions.items():
466 | print(self.yolo.labels[label], '{:.4f}'.format(average_precision))
467 | print('mAP: {:.4f}'.format(mAP))
468 |
469 | if self.save_best and self.save_name is not None and mAP > self.bestMap:
470 | print(
471 | "mAP improved from {} to {}, saving model to {}.".format(self.bestMap, mAP, self.save_name))
472 | self.bestMap = mAP
473 | print(self.save_name)
474 | self.model.save(self.save_name)
475 | self.model.save_weights('checkpoints\\best-mAP.h5')
476 | else:
477 | print("mAP did not improve from {}.".format(self.bestMap))
478 |
479 | if self.tensorboard is not None and self.tensorboard.writer is not None:
480 | import tensorflow as tf
481 | summary = tf.Summary()
482 | summary_value = summary.value.add()
483 | summary_value.simple_value = mAP
484 | summary_value.tag = "val_mAP"
485 | self.tensorboard.writer.add_summary(summary, epoch)
486 |
487 |
488 | def evaluate_mAP(self):
489 | average_precisions = self._calc_avg_precisions()
490 | mAP = sum(average_precisions.values()) / len(average_precisions)
491 |
492 | return mAP, average_precisions
493 |
494 |
495 | def _calc_avg_precisions(self):
496 | # gather all detections and annotations
497 | all_detections = [[None for i in range(self.generator.num_classes())] for j in
498 | range(self.generator.size())]
499 | all_annotations = [[None for i in range(self.generator.num_classes())] for j in
500 | range(self.generator.size())]
501 |
502 | for i in range(self.generator.size()):
503 | raw_image = self.generator.load_image(self.generator.dataset[i]['image_path'])
504 | raw_height, raw_width, _ = raw_image.shape
505 | # make the boxes and the labels
506 | pred_boxes = self.yolo.predict(os.path.join(ANNOT_DIR, 'images', self.generator.dataset[i]['image_path']))
507 |
508 | score = np.array([box.score for box in pred_boxes])
509 | pred_labels = np.array([box.label for box in pred_boxes])
510 |
511 | if len(pred_boxes) > 0:
512 | pred_boxes = np.array([[box.xmin * raw_width, box.ymin * raw_height, box.xmax * raw_width,
513 | box.ymax * raw_height, box.score] for box in pred_boxes])
514 | else:
515 | pred_boxes = np.array([[]])
516 |
517 | # sort the boxes and the labels according to scores
518 | score_sort = np.argsort(-score)
519 | pred_labels = pred_labels[score_sort]
520 | pred_boxes = pred_boxes[score_sort]
521 |
522 | # copy detections to all_detections
523 | for label in range(self.generator.num_classes()):
524 | all_detections[i][label] = pred_boxes[pred_labels == label, :]
525 |
526 | annotations = self.generator.load_annotation(i)
527 |
528 | # copy detections to all_annotations
529 | for label in range(self.generator.num_classes()):
530 | all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy()
531 |
532 | # compute mAP by comparing all detections and all annotations
533 | average_precisions = {}
534 |
535 | for label in range(self.generator.num_classes()):
536 | false_positives = np.zeros((0,))
537 | true_positives = np.zeros((0,))
538 | scores = np.zeros((0,))
539 | num_annotations = 0.0
540 |
541 | for i in range(self.generator.size()):
542 | detections = all_detections[i][label]
543 | annotations = all_annotations[i][label]
544 | num_annotations += annotations.shape[0]
545 | detected_annotations = []
546 |
547 | for d in detections:
548 | scores = np.append(scores, d[4])
549 |
550 | if annotations.shape[0] == 0:
551 | false_positives = np.append(false_positives, 1)
552 | true_positives = np.append(true_positives, 0)
553 | continue
554 |
555 | overlaps = compute_overlap(np.expand_dims(d, axis=0), annotations)
556 | assigned_annotation = np.argmax(overlaps, axis=1)
557 | max_overlap = overlaps[0, assigned_annotation]
558 |
559 | if max_overlap >= self.iou_threshold and assigned_annotation not in detected_annotations:
560 | false_positives = np.append(false_positives, 0)
561 | true_positives = np.append(true_positives, 1)
562 | detected_annotations.append(assigned_annotation)
563 | else:
564 | false_positives = np.append(false_positives, 1)
565 | true_positives = np.append(true_positives, 0)
566 |
567 | # no annotations -> AP for this class is 0 (is this correct?)
568 | if num_annotations == 0:
569 | average_precisions[label] = 0
570 | continue
571 |
572 | # sort by score
573 | indices = np.argsort(-scores)
574 | false_positives = false_positives[indices]
575 | true_positives = true_positives[indices]
576 |
577 | # compute false positives and true positives
578 | false_positives = np.cumsum(false_positives)
579 | true_positives = np.cumsum(true_positives)
580 |
581 | # compute recall and precision
582 | recall = true_positives / num_annotations
583 | precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)
584 |
585 | # compute average precision
586 | average_precision = compute_ap(recall, precision)
587 | average_precisions[label] = average_precision
588 |
589 | return average_precisions
590 |
591 |
592 | def dummy_loss(y_true, y_pred):
593 | return tf.sqrt(tf.reduce_sum(y_pred))
--------------------------------------------------------------------------------