├── out ├── .gitkeep ├── out1.png ├── out2.png ├── out3.png ├── out4.png ├── out5.png ├── out6.png ├── out7.png ├── out8.png ├── out9.png ├── out10.png ├── out11.png ├── out12.png ├── out13.png ├── out14.png ├── out15.png └── out16.png ├── dataset └── images │ └── .gitkeep ├── test_images ├── .gitkeep ├── test (1).png ├── test (10).png ├── test (11).png ├── test (12).png ├── test (13).png ├── test (14).png ├── test (15).png ├── test (16).png ├── test (2).png ├── test (3).png ├── test (4).png ├── test (5).png ├── test (6).png ├── test (7).png ├── test (8).png └── test (9).png ├── main.py ├── config.json ├── predict.py ├── detect_test_images.py ├── .gitignore ├── preprocessing.py ├── grab_screen.py ├── README.md ├── generate_anchors.py ├── postprocessing.py ├── utils.py └── yolo.py /out/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dataset/images/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test_images/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /out/out1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out1.png -------------------------------------------------------------------------------- /out/out2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out2.png -------------------------------------------------------------------------------- /out/out3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out3.png -------------------------------------------------------------------------------- /out/out4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out4.png -------------------------------------------------------------------------------- /out/out5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out5.png -------------------------------------------------------------------------------- /out/out6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out6.png -------------------------------------------------------------------------------- /out/out7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out7.png -------------------------------------------------------------------------------- /out/out8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out8.png -------------------------------------------------------------------------------- /out/out9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out9.png -------------------------------------------------------------------------------- /out/out10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out10.png -------------------------------------------------------------------------------- /out/out11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out11.png -------------------------------------------------------------------------------- /out/out12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out12.png -------------------------------------------------------------------------------- /out/out13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out13.png -------------------------------------------------------------------------------- /out/out14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out14.png -------------------------------------------------------------------------------- /out/out15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out15.png -------------------------------------------------------------------------------- /out/out16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/out/out16.png -------------------------------------------------------------------------------- /test_images/test (1).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (1).png -------------------------------------------------------------------------------- /test_images/test (10).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (10).png -------------------------------------------------------------------------------- /test_images/test (11).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (11).png -------------------------------------------------------------------------------- /test_images/test (12).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (12).png -------------------------------------------------------------------------------- /test_images/test (13).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (13).png -------------------------------------------------------------------------------- /test_images/test (14).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (14).png -------------------------------------------------------------------------------- /test_images/test (15).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (15).png -------------------------------------------------------------------------------- /test_images/test (16).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (16).png -------------------------------------------------------------------------------- /test_images/test (2).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (2).png -------------------------------------------------------------------------------- /test_images/test (3).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (3).png -------------------------------------------------------------------------------- /test_images/test (4).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (4).png -------------------------------------------------------------------------------- /test_images/test (5).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (5).png -------------------------------------------------------------------------------- /test_images/test (6).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (6).png -------------------------------------------------------------------------------- /test_images/test (7).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (7).png -------------------------------------------------------------------------------- /test_images/test (8).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (8).png -------------------------------------------------------------------------------- /test_images/test (9).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/affinis-lab/traffic-light-detection-module/HEAD/test_images/test (9).png -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | 4 | from yolo import YOLO 5 | 6 | 7 | def main(args): 8 | config_path = args.conf 9 | 10 | with open(config_path) as config_buffer: 11 | config = json.loads(config_buffer.read()) 12 | 13 | yolo = YOLO(config) 14 | if config['train']['enabled']: 15 | #yolo.load_weights(config['model']['saved_model_name']) 16 | yolo.train() 17 | else: 18 | yolo.evaluate() 19 | 20 | 21 | if __name__ == "__main__": 22 | arg_parser = argparse.ArgumentParser( 23 | description='Train and validate autonomous car module') 24 | 25 | arg_parser.add_argument( 26 | '-c', 27 | '--conf', 28 | help='path to the configuration file') 29 | 30 | main(arg_parser.parse_args()) 31 | -------------------------------------------------------------------------------- /config.json: -------------------------------------------------------------------------------- 1 | { 2 | "model" : 3 | { 4 | "image_h": 416, 5 | "image_w": 416, 6 | "anchors": [0.24,0.79, 0.80,2.12], 7 | "max_obj": 5, 8 | "grid_h": 13, 9 | "grid_w": 13, 10 | "num_classes": 2, 11 | "classes": ["go", "stop"], 12 | "obj_thresh": 0.20, 13 | "nms_thresh": 0.01, 14 | "saved_model_name": "model.09-2.01.h5", 15 | "obj_scale": 85, 16 | "no_obj_scale": 75, 17 | "coord_scale": 70, 18 | "class_scale": 70 19 | }, 20 | 21 | "plot_model": true, 22 | 23 | "train": { 24 | "enabled": false, 25 | "annot_file_name": "carla_all.csv", 26 | "batch_size": 8, 27 | "learning_rate": 1e-4, 28 | "nb_epochs": 500, 29 | "warmup_batches": 0, 30 | "debug": true 31 | } 32 | } -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | from keras.models import load_model 2 | import os 3 | import numpy as np 4 | 5 | from yolo import YOLO, dummy_loss 6 | from preprocessing import load_image_predict 7 | from postprocessing import decode_netout 8 | 9 | 10 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 11 | 12 | 13 | def get_model(config): 14 | model = YOLO( 15 | config =config 16 | ) 17 | model.load_weights(os.path.join(BASE_DIR, config['model']['saved_model_name'])) 18 | return model 19 | 20 | 21 | def get_model_from_file(config): 22 | path = os.path.join(BASE_DIR, 'checkpoints', config['model']['saved_model_name']) 23 | model = load_model(path, custom_objects={'custom_loss': dummy_loss}) 24 | return model 25 | 26 | 27 | def predict_with_model_from_file(config, model, image_path): 28 | image = load_image_predict(image_path, config['model']['image_h'], config['model']['image_w']) 29 | 30 | dummy_array = np.zeros((1, 1, 1, 1, config['model']['max_obj'], 4)) 31 | netout = model.predict([image, dummy_array])[0] 32 | 33 | boxes = decode_netout(netout=netout, anchors=config['model']['anchors'], 34 | nb_class=config['model']['num_classes'], 35 | obj_threshold=config['model']['obj_thresh'], 36 | nms_threshold=config['model']['nms_thresh']) 37 | return boxes -------------------------------------------------------------------------------- /detect_test_images.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import argparse 4 | import cv2 5 | 6 | from postprocessing import draw_boxes 7 | from predict import predict_with_model_from_file, get_model_from_file 8 | 9 | 10 | BASE_DIR = os.path.dirname(__file__) 11 | TEST_IMAGES_DIR = os.path.join(BASE_DIR, 'test_images') 12 | OUT_IMAGES_DIR = os.path.join(BASE_DIR, 'out') 13 | 14 | 15 | def detect_on_test_images(config): 16 | model = get_model_from_file(config) 17 | 18 | all_images = [f for f in os.listdir(TEST_IMAGES_DIR) if os.path.isfile(os.path.join(TEST_IMAGES_DIR, f))] 19 | img_num = 1 20 | for image_name in all_images: 21 | image_path = os.path.join(TEST_IMAGES_DIR, image_name) 22 | 23 | netout = predict_with_model_from_file(config, model, image_path) 24 | plt_image = draw_boxes(cv2.imread(image_path), netout, config['model']['classes']) 25 | 26 | #cv2.imshow('demo', plt_image) 27 | #cv2.waitKey(0) 28 | 29 | cv2.imwrite(os.path.join(OUT_IMAGES_DIR, 'out' + str(img_num) + '.png'), plt_image) 30 | img_num += 1 31 | 32 | 33 | def main(args): 34 | config_path = args.conf 35 | 36 | with open(config_path) as config_buffer: 37 | config = json.loads(config_buffer.read()) 38 | 39 | detect_on_test_images(config) 40 | 41 | 42 | if __name__ == '__main__': 43 | argparser = argparse.ArgumentParser() 44 | 45 | argparser.add_argument( 46 | '-c', 47 | '--conf', 48 | default='config.json', 49 | help='Path to configuration file') 50 | 51 | args = argparser.parse_args() 52 | main(args) -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | .idea 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | dataset/images/* 30 | !dataset/images/.gitkeep 31 | *.h5 32 | trainval.json 33 | checkpoints/* 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # pyenv 83 | .python-version 84 | 85 | # celery beat schedule file 86 | celerybeat-schedule 87 | 88 | # SageMath parsed files 89 | *.sage.py 90 | 91 | # Environments 92 | .env 93 | .venv 94 | env/ 95 | venv/ 96 | ENV/ 97 | env.bak/ 98 | venv.bak/ 99 | 100 | # Spyder project settings 101 | .spyderproject 102 | .spyproject 103 | 104 | # Rope project settings 105 | .ropeproject 106 | 107 | # mkdocs documentation 108 | /site 109 | 110 | # mypy 111 | .mypy_cache/ 112 | -------------------------------------------------------------------------------- /preprocessing.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import pandas as pd 3 | import numpy as np 4 | import os 5 | from sklearn.preprocessing import LabelEncoder 6 | 7 | 8 | def load_image_predict(image_path, image_h, image_w): 9 | image = cv2.imread(image_path) 10 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 11 | image = cv2.resize(image, (image_h, image_w)) 12 | image = image/255 13 | image = np.expand_dims(image, 0) 14 | 15 | return image 16 | 17 | 18 | def load_carla_data(path, labels): 19 | le = LabelEncoder() 20 | le.fit_transform(labels) 21 | 22 | data = pd.read_csv(path, delimiter=",", header=None) 23 | 24 | dataset = {} 25 | 26 | objects_omitted = 0 27 | red = 0 28 | green = 0 29 | for record in data[1:][data.columns[:7]].values: 30 | tokens = record[5].split(",") 31 | 32 | xmin, ymin, xmax, ymax = float(tokens[1].split(":")[1]), float(tokens[2].split(":")[1]),\ 33 | float(tokens[3].split(":")[1]), float(tokens[4].split(":")[1].replace("}", "")) 34 | 35 | #omit small images 36 | if xmax < 15: 37 | objects_omitted += 1 38 | continue 39 | 40 | xmax += xmin 41 | ymax += ymin 42 | 43 | if "stop" in record[6]: 44 | obj_class = "stop" 45 | red += 1 46 | else: 47 | obj_class = "go" 48 | green += 1 49 | 50 | obj = {} 51 | obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax'], obj['class'] = xmin, ymin, xmax, ymax, obj_class 52 | 53 | image_path = record[0] 54 | 55 | #image_path = os.path.join("images", image_path) 56 | 57 | if image_path in dataset: 58 | dataset[image_path].append(obj) 59 | else: 60 | dataset[image_path] = [obj] 61 | 62 | print("Objects omitted", objects_omitted) 63 | print("Red light: ", red) 64 | print("Green light: ", green) 65 | 66 | instances = [] 67 | 68 | for key in dataset.keys(): 69 | inst = {} 70 | 71 | inst['image_path'] = key 72 | inst['object'] = dataset[key] 73 | 74 | instances.append(inst) 75 | 76 | return instances 77 | 78 | 79 | def load_image(path): 80 | img = cv2.imread(os.path.join(path)) 81 | 82 | if len(img.shape) == 3: 83 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 84 | 85 | if len(img.shape) == 1: 86 | img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) 87 | 88 | return img -------------------------------------------------------------------------------- /grab_screen.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import json 3 | import os 4 | import argparse 5 | import cv2 6 | import numpy as np 7 | import win32gui, win32ui, win32con, win32api 8 | import tensorflow as tf 9 | from keras.models import load_model 10 | from mss import mss 11 | 12 | from yolo import dummy_loss 13 | from postprocessing import decode_netout, draw_boxes 14 | 15 | 16 | BASE_DIR = os.path.dirname(__file__) 17 | 18 | 19 | def grab_screen_slower(region=None): 20 | hwin = win32gui.GetDesktopWindow() 21 | 22 | if region: 23 | left, top, x2, y2 = region 24 | width = x2 - left + 1 25 | height = y2 - top + 1 26 | else: 27 | width = win32api.GetSystemMetrics(win32con.SM_CXVIRTUALSCREEN) 28 | height = win32api.GetSystemMetrics(win32con.SM_CYVIRTUALSCREEN) 29 | left = win32api.GetSystemMetrics(win32con.SM_XVIRTUALSCREEN) 30 | top = win32api.GetSystemMetrics(win32con.SM_YVIRTUALSCREEN) 31 | 32 | hwindc = win32gui.GetWindowDC(hwin) 33 | srcdc = win32ui.CreateDCFromHandle(hwindc) 34 | memdc = srcdc.CreateCompatibleDC() 35 | bmp = win32ui.CreateBitmap() 36 | bmp.CreateCompatibleBitmap(srcdc, width, height) 37 | memdc.SelectObject(bmp) 38 | memdc.BitBlt((0, 0), (width, height), srcdc, (left, top), win32con.SRCCOPY) 39 | 40 | signedIntsArray = bmp.GetBitmapBits(True) 41 | img = np.fromstring(signedIntsArray, dtype='uint8') 42 | img.shape = (height, width, 4) 43 | 44 | srcdc.DeleteDC() 45 | memdc.DeleteDC() 46 | win32gui.ReleaseDC(hwin, hwindc) 47 | win32gui.DeleteObject(bmp.GetHandle()) 48 | 49 | 50 | return cv2.cvtColor(img, cv2.COLOR_BGRA2RGB) 51 | 52 | 53 | def grab_and_broadcast_screen(config): 54 | paused = False 55 | 56 | mon = {'top': 10, 'left': 10, 'width': 750, 'height': 680} 57 | sct = mss() 58 | 59 | dummy_array = np.zeros((1, 1, 1, 1, config['model']['max_obj'], 4)) 60 | 61 | model = load_model("checkpoints/traffic-light-model.h5", 62 | custom_objects={'custom_loss': dummy_loss, 'tf': tf}) 63 | 64 | frame_num = 0 65 | while (True): 66 | 67 | if not paused: 68 | screen = np.array(sct.grab(mon)) 69 | screen = cv2.cvtColor(screen, cv2.COLOR_BGR2RGB) 70 | 71 | plt_image = copy.deepcopy(screen) 72 | 73 | screen = cv2.resize(screen, (config['model']['image_h'], config['model']['image_w'])) 74 | screen = screen / 255. 75 | screen = np.expand_dims(screen, 0) 76 | 77 | netout = model.predict([screen, dummy_array])[0] 78 | 79 | boxes = decode_netout(netout, 80 | obj_threshold=config['model']['obj_thresh'], 81 | nms_threshold=config['model']['nms_thresh'], 82 | anchors=config['model']['anchors'], 83 | nb_class=config['model']['num_classes']) 84 | 85 | plt_image = draw_boxes(plt_image, boxes, labels=config['model']['classes']) 86 | cv2.imshow('window', cv2.cvtColor(plt_image, cv2.COLOR_BGR2RGB)) 87 | 88 | key = cv2.waitKey(10) & 0xff 89 | if key == 27: 90 | cv2.destroyAllWindows() 91 | break 92 | 93 | 94 | #save images when o or O is pressed 95 | if key == ord('o') or key == ord('O'): 96 | cv2.imwrite(os.path.join(BASE_DIR, 'out', str(frame_num)+".png"), cv2.cvtColor(plt_image, cv2.COLOR_BGR2RGB)) 97 | frame_num += 1 98 | 99 | 100 | def main(args): 101 | config_path = args.conf 102 | 103 | with open(config_path) as config_buffer: 104 | config = json.loads(config_buffer.read()) 105 | 106 | grab_and_broadcast_screen(config) 107 | 108 | 109 | if __name__ == '__main__': 110 | argparser = argparse.ArgumentParser() 111 | 112 | argparser.add_argument( 113 | '-c', 114 | '--conf', 115 | default='config.json', 116 | help='Path to configuration file') 117 | 118 | args = argparser.parse_args() 119 | main(args) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # traffic-light-detection-module 2 | 3 | ⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️ 4 | # IMPORTANT UPDATE 5 | This repo is deprecated. For a newer and improved model, implemented in PyTorch, please refer to this [repo](https://github.com/filipbasara0/simple-object-detection/blob/main/README.md#carla-traffic-lights). 6 | 7 | ⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️ 8 | 9 | 10 | ![out(2).png](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/out/out2.png) 11 | 12 | ## About 13 | 14 | Module for detecting traffic lights in the [CARLA autonomous driving simulator](http://carla.org/) (version: 0.8.4).
15 | Built upon and inspired by https://github.com/experiencor/keras-yolo2.
16 | Instructions and more traffic light detection examples can be found below.
17 | 18 | - This module is used along several other [modules](https://github.com/affinis-lab) to implement our version of imitation learning in the CARLA simulator. Results of the [core](https://github.com/affinis-lab/core) module can be found on this [repository](https://github.com/affinis-lab/core) 19 | 20 | - Model for objection detection is based on tiny yolov2 21 | 22 | - Training started with yolov2 coco pretrained weights 23 | 24 | - It was first trained on the LISA traffic light detection dataset (~5800 images), and after that on the dataset collected from the CARLA simulator by myself (~1800 images). 25 | 26 | ## CARLA dataset and model 27 | 28 | - Dataset collected by myself in the CARLA simulator can be found [here](https://drive.google.com/drive/folders/1TXkPLWlNgauPhQnKEoPDZsx7Px1MD9n_?usp=sharing), annotations can be found [here](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/dataset/carla_all.csv). 29 | 30 | - **Important note** - several images in the dataset are left out of annotations because bounding boxes are too small (too far away). I also filtered (left out) all images that have xmax < 15 when loading the dataset. There is around 70-80 out of ~1800 images that are left out, so it isn't that problematic. 31 | 32 | - Pretrained model can be found [here](https://drive.google.com/file/d/1FVb6b6axN2WAYePv0_zLyiWDois7PgMZ/view?usp=sharing). 33 | 34 | 35 | ## Instructions 36 | - To train: 37 | - In the [config](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/config.json) file set _training_ -> _enabled_ to **true** 38 | - Put your annotations file in the **dataset** folder 39 | - In the [config](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/config.json) file _set training_ -> _annot_file_name_ to the name of your annotations file 40 | - Put your images in the **dataset/images** folder 41 | - If necessary, adjust parameters in [config](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/config.json) according to your problem/dataset 42 | - run main.py with **-c config.json** 43 | 44 | - To evaluate: 45 | - In the [config](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/config.json) file set _training_ -> _enabled_ to **false** 46 | - Put your annotations file in the **evaluation** folder 47 | - In the [config](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/config.json) file _set training_ -> _annot_file_name_ to to the name of your annotations file containing images for evaluation 48 | - Put your images in the **evaluation/images** folder 49 | - If necessary, adjust parameters in [config](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/config.json) according to your problem/dataset 50 | - run main.py with **-c config.json** 51 | 52 | - To generate anchors: 53 | - run generate_anchors.py with **-c config.json** 54 | 55 | - Soon to be added: 56 | - Real time traffic light detecting gifs 57 | 58 | ## Examples 59 | - Several examples of predictions, more can be found in the [out folder](https://github.com/affinis-lab/traffic-light-detection-module/tree/master/out) 60 | 61 | ![out(11).png](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/out/out11.png) 62 | ![out(12).png](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/out/out12.png) 63 | ![out(6).png](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/out/out6.png) 64 | ![out(7).png](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/out/out7.png) 65 | ![out(14).png](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/out/out14.png) 66 | ![out(15).png](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/out/out15.png) 67 | ![out(4).png](https://github.com/affinis-lab/traffic-light-detection-module/blob/master/out/out4.png) 68 | -------------------------------------------------------------------------------- /generate_anchors.py: -------------------------------------------------------------------------------- 1 | import random 2 | import argparse 3 | import numpy as np 4 | import cv2 5 | import os 6 | import json 7 | 8 | from preprocessing import load_carla_data 9 | 10 | 11 | argparser = argparse.ArgumentParser() 12 | 13 | argparser.add_argument( 14 | '-c', 15 | '--conf', 16 | default='config.json', 17 | help='path to configuration file') 18 | 19 | argparser.add_argument( 20 | '-a', 21 | '--anchors', 22 | default=2, 23 | help='number of anchors to use') 24 | 25 | 26 | BASE_DIR = os.path.dirname(__file__) 27 | IMAGES_DIR = os.path.join(BASE_DIR, 'dataset', 'images') 28 | 29 | 30 | def IOU(ann, centroids): 31 | w, h = ann 32 | similarities = [] 33 | 34 | for centroid in centroids: 35 | c_w, c_h = centroid 36 | 37 | if c_w >= w and c_h >= h: 38 | similarity = w*h/(c_w*c_h) 39 | elif c_w >= w and c_h <= h: 40 | similarity = w*c_h/(w*h + (c_w-w)*c_h) 41 | elif c_w <= w and c_h >= h: 42 | similarity = c_w*h/(w*h + c_w*(c_h-h)) 43 | else: #means both w,h are bigger than c_w and c_h respectively 44 | similarity = (c_w*c_h)/(w*h) 45 | similarities.append(similarity) # will become (k,) shape 46 | 47 | return np.array(similarities) 48 | 49 | 50 | def avg_IOU(anns, centroids): 51 | n,d = anns.shape 52 | sum = 0. 53 | 54 | for i in range(anns.shape[0]): 55 | sum+= max(IOU(anns[i], centroids)) 56 | 57 | return sum/n 58 | 59 | 60 | def print_anchors(centroids): 61 | anchors = centroids.copy() 62 | 63 | widths = anchors[:, 0] 64 | sorted_indices = np.argsort(widths) 65 | 66 | r = "anchors: [" 67 | for i in sorted_indices[:-1]: 68 | r += '%0.2f,%0.2f, ' % (anchors[i,0], anchors[i,1]) 69 | 70 | r += '%0.2f,%0.2f' % (anchors[sorted_indices[-1:],0], anchors[sorted_indices[-1:],1]) 71 | r += "]" 72 | 73 | print(r) 74 | 75 | 76 | def run_kmeans(ann_dims, anchor_num): 77 | ann_num = ann_dims.shape[0] 78 | prev_assignments = np.ones(ann_num)*(-1) 79 | iterations = 0 80 | old_distances = np.zeros((ann_num, anchor_num)) 81 | 82 | indices = [random.randrange(ann_dims.shape[0]) for i in range(anchor_num)] 83 | centroids = ann_dims[indices] 84 | anchor_dim = ann_dims.shape[1] 85 | 86 | while True: 87 | distances = [] 88 | iterations += 1 89 | for i in range(ann_num): 90 | d = 1 - IOU(ann_dims[i], centroids) 91 | distances.append(d) 92 | distances = np.array(distances) # distances.shape = (ann_num, anchor_num) 93 | 94 | print("iteration {}: dists = {}".format(iterations, np.sum(np.abs(old_distances-distances)))) 95 | 96 | #assign samples to centroids 97 | assignments = np.argmin(distances,axis=1) 98 | 99 | if (assignments == prev_assignments).all(): 100 | return centroids 101 | 102 | #calculate new centroids 103 | centroid_sums=np.zeros((anchor_num, anchor_dim), np.float) 104 | for i in range(ann_num): 105 | centroid_sums[assignments[i]]+=ann_dims[i] 106 | for j in range(anchor_num): 107 | centroids[j] = centroid_sums[j]/(np.sum(assignments==j) + 1e-6) 108 | 109 | prev_assignments = assignments.copy() 110 | old_distances = distances.copy() 111 | 112 | 113 | def main(args): 114 | config_path = args.conf 115 | num_anchors = args.anchors 116 | 117 | with open(config_path) as config_buffer: 118 | config = json.loads(config_buffer.read()) 119 | 120 | data = load_carla_data(os.path.join(BASE_DIR, 'dataset',config['train']['annot_file_name']), config['model']['classes']) 121 | 122 | print(len(data)) 123 | 124 | np.random.shuffle(data) 125 | 126 | grid_w = config['model']['grid_w'] 127 | grid_h = config['model']['grid_h'] 128 | 129 | # run k_mean to find the anchors 130 | annotation_dims = [] 131 | for image in data: 132 | img = cv2.imread(os.path.join(IMAGES_DIR,image['image_path'])) 133 | h, w, c = img.shape 134 | cell_w = w/grid_w 135 | cell_h = h/grid_h 136 | 137 | for obj in image['object']: 138 | relative_w = (float(obj['xmax']) - float(obj['xmin']))/cell_w 139 | relatice_h = (float(obj["ymax"]) - float(obj['ymin']))/cell_h 140 | annotation_dims.append(tuple(map(float, (relative_w,relatice_h)))) 141 | 142 | annotation_dims = np.array(annotation_dims) 143 | centroids = run_kmeans(annotation_dims, num_anchors) 144 | 145 | # write anchors to file 146 | print('\naverage IOU for', num_anchors, 'anchors:', '%0.2f' % avg_IOU(annotation_dims, centroids)) 147 | print_anchors(centroids) 148 | 149 | 150 | if __name__ == '__main__': 151 | args = argparser.parse_args() 152 | main(args) -------------------------------------------------------------------------------- /postprocessing.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | 5 | def bbox_iou(box1, box2): 6 | intersect_w = interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax]) 7 | intersect_h = interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax]) 8 | 9 | intersect = intersect_w * intersect_h 10 | 11 | w1, h1 = box1.xmax - box1.xmin, box1.ymax - box1.ymin 12 | w2, h2 = box2.xmax - box2.xmin, box2.ymax - box2.ymin 13 | 14 | union = w1 * h1 + w2 * h2 - intersect 15 | 16 | return float(intersect) / union 17 | 18 | 19 | def decode_netout(netout, anchors, nb_class, obj_threshold=0.3, nms_threshold=0.3): 20 | grid_h, grid_w, nb_box = netout.shape[:3] 21 | #grid_h, grid_w, nb_box = config['models']['traffic_light_module']['grid_h'], GRID_W, BOX 22 | boxes = [] 23 | 24 | # decode the output by the network 25 | netout[..., 4] = _sigmoid(netout[..., 4]) 26 | netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:]) 27 | netout[..., 5:] *= netout[..., 5:] > obj_threshold 28 | 29 | for row in range(grid_h): 30 | for col in range(grid_w): 31 | for b in range(nb_box): 32 | # from 4th element onwards are confidence and class classes 33 | classes = netout[row, col, b, 5:] 34 | 35 | if np.sum(classes) > 0: 36 | # first 4 elements are x, y, w, and h 37 | x, y, w, h = netout[row, col, b, :4] 38 | 39 | x = (col + _sigmoid(x)) / grid_w # center position, unit: image width 40 | y = (row + _sigmoid(y)) / grid_h # center position, unit: image height 41 | 42 | w = anchors[2 * b + 0] * np.exp(w) / grid_w # unit: image width 43 | h = anchors[2 * b + 1] * np.exp(h) / grid_h # unit: image height 44 | confidence = netout[row, col, b, 4] 45 | 46 | box = BoundBox(x - w / 2, y - h / 2, x + w / 2, y + h / 2, confidence, classes) 47 | 48 | boxes.append(box) 49 | 50 | # suppress non-maximal boxes 51 | for c in range(nb_class): 52 | sorted_indices = list(reversed(np.argsort([box.classes[c] for box in boxes]))) 53 | 54 | for i in range(len(sorted_indices)): 55 | index_i = sorted_indices[i] 56 | 57 | if boxes[index_i].classes[c] == 0: 58 | continue 59 | else: 60 | for j in range(i + 1, len(sorted_indices)): 61 | index_j = sorted_indices[j] 62 | 63 | if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_threshold: 64 | boxes[index_j].classes[c] = 0 65 | 66 | # remove the boxes which are less likely than a obj_threshold 67 | boxes = [box for box in boxes if box.get_score() > obj_threshold] 68 | 69 | boxes = sorted(boxes, key=lambda box: box.get_score(), reverse=True) 70 | if len(boxes) > 0: boxes = [boxes[0]] 71 | return boxes 72 | 73 | 74 | def draw_boxes(image, boxes, labels): 75 | image_h, image_w, _ = image.shape 76 | 77 | for box in boxes: 78 | 79 | if box.xmin>image_w or box.xmax>image_w or box.ymin>image_h or box.ymax>image_h: 80 | continue 81 | 82 | if (box.xmin<0): box.xmin = 0 83 | if (box.ymin < 0): box.ymin = 0 84 | if (box.xmax < 0): box.xmax = 0 85 | if (box.ymax < 0): box.ymax = 0 86 | 87 | xmin = int(box.xmin * image_w) 88 | ymin = int(box.ymin * image_h) 89 | xmax = int(box.xmax * image_w) 90 | ymax = int(box.ymax * image_h) 91 | 92 | label = labels[box.get_label()] 93 | 94 | r_color = 0 95 | g_color = 0 96 | 97 | if label== "go": 98 | g_color = 255 99 | else: 100 | r_color = 255 101 | 102 | cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, g_color, r_color), 3) 103 | cv2.putText(image, 104 | label + ' ' + str(round(box.get_score(),4)), 105 | (xmin, ymin - 13), 106 | cv2.FONT_HERSHEY_SIMPLEX, 107 | 1e-3 * image_h, 108 | (0, g_color, r_color), 2) 109 | 110 | return image 111 | 112 | 113 | def compute_overlap(a, b): 114 | """ 115 | Code originally from https://github.com/rbgirshick/py-faster-rcnn. 116 | Parameters 117 | ---------- 118 | a: (N, 4) ndarray of float 119 | b: (K, 4) ndarray of float 120 | Returns 121 | ------- 122 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 123 | """ 124 | area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1]) 125 | 126 | iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0]) 127 | ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1]) 128 | 129 | iw = np.maximum(iw, 0) 130 | ih = np.maximum(ih, 0) 131 | 132 | ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih 133 | 134 | ua = np.maximum(ua, np.finfo(float).eps) 135 | 136 | intersection = iw * ih 137 | 138 | return intersection / ua 139 | 140 | 141 | def interval_overlap(interval_a, interval_b): 142 | x1, x2 = interval_a 143 | x3, x4 = interval_b 144 | 145 | if x3 < x1: 146 | if x4 < x1: 147 | return 0 148 | else: 149 | return min(x2, x4) - x1 150 | else: 151 | if x2 < x3: 152 | return 0 153 | else: 154 | return min(x2, x4) - x3 155 | 156 | 157 | def compute_ap(recall, precision): 158 | """ Compute the average precision, given the recall and precision curves. 159 | Code originally from https://github.com/rbgirshick/py-faster-rcnn. 160 | # Arguments 161 | recall: The recall curve (list). 162 | precision: The precision curve (list). 163 | # Returns 164 | The average precision as computed in py-faster-rcnn. 165 | """ 166 | # correct AP calculation 167 | # first append sentinel values at the end 168 | mrec = np.concatenate(([0.], recall, [1.])) 169 | mpre = np.concatenate(([0.], precision, [0.])) 170 | 171 | # compute the precision envelope 172 | for i in range(mpre.size - 1, 0, -1): 173 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 174 | 175 | # to calculate area under PR curve, look for points 176 | # where X axis (recall) changes value 177 | i = np.where(mrec[1:] != mrec[:-1])[0] 178 | 179 | # and sum (\Delta recall) * prec 180 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 181 | return ap 182 | 183 | 184 | def _sigmoid(x): 185 | return 1. / (1. + np.exp(-x)) 186 | 187 | 188 | def _softmax(x, axis=-1, t=-100.): 189 | x = x - np.max(x) 190 | 191 | if np.min(x) < t: 192 | x = x / np.min(x) * t 193 | 194 | e_x = np.exp(x) 195 | 196 | return e_x / e_x.sum(axis, keepdims=True) 197 | 198 | 199 | class BoundBox: 200 | def __init__(self, xmin, ymin, xmax, ymax, c=None, classes=None): 201 | self.xmin = xmin 202 | self.ymin = ymin 203 | self.xmax = xmax 204 | self.ymax = ymax 205 | 206 | self.c = c 207 | self.classes = classes 208 | 209 | self.label = -1 210 | self.score = -1 211 | 212 | def get_label(self): 213 | if self.label == -1: 214 | self.label = np.argmax(self.classes) 215 | 216 | return self.label 217 | 218 | def get_score(self): 219 | if self.score == -1: 220 | self.score = self.classes[self.get_label()] 221 | 222 | return self.score -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import keras 3 | import cv2 4 | import copy 5 | import os 6 | from imgaug import augmenters as iaa 7 | from sklearn.preprocessing import LabelEncoder 8 | 9 | from postprocessing import interval_overlap 10 | 11 | 12 | BASE_DIR = os.path.dirname(__file__) 13 | IMAGES_DIR = os.path.join(BASE_DIR, 'dataset', 'images') 14 | 15 | 16 | def bbox_iou(box1, box2): 17 | # 0 ,1 ,2 ,3 18 | # xmin,ymin,xmax,ymax 19 | intersect_w = interval_overlap([box1[0], box1[2]], [box2[0], box2[2]]) 20 | intersect_h = interval_overlap([box1[1], box1[3]], [box2[1], box2[3]]) 21 | 22 | intersect = intersect_w * intersect_h 23 | 24 | w1, h1 = box1[2] - box1[0], box1[3] - box1[1] 25 | w2, h2 = box2[2] - box2[0], box2[3] - box2[1] 26 | 27 | union = w1 * h1 + w2 * h2 - intersect 28 | 29 | return float(intersect) / union 30 | 31 | 32 | class BatchGenerator(keras.utils.Sequence): 33 | 'Generates data for Keras' 34 | def __init__(self, config, dataset, shuffle=True, jitter = True): 35 | 'Initialization' 36 | self.config = config 37 | self.dataset = dataset 38 | 39 | self.image_h = config['model']['image_h'] 40 | self.image_w = config['model']['image_w'] 41 | self.n_channels = 3 42 | 43 | self.grid_h = config['model']['grid_h'] 44 | self.grid_w = config['model']['grid_w'] 45 | 46 | self.n_classes = config['model']['num_classes'] 47 | self.labels = config['model']['classes'] 48 | 49 | self.batch_size = config['train']['batch_size'] 50 | self.max_obj = config['model']['max_obj'] 51 | 52 | self.shuffle = shuffle 53 | self.jitter = jitter 54 | 55 | self.nb_anchors = int(len(config['model']['anchors']) / 2) 56 | 57 | self.anchors = [[0, 0, config['model']['anchors'][2 * i], config['model']['anchors'][2 * i + 1]] for i in 58 | range(int(len(config['model']['anchors']) // 2))] 59 | 60 | self.on_epoch_end() 61 | 62 | sometimes = lambda aug: iaa.Sometimes(0.5, aug) 63 | 64 | self.aug_pipe = iaa.Sequential( 65 | [ 66 | # apply the following augmenters to most images 67 | # iaa.Fliplr(0.5), # horizontally flip 50% of all images 68 | # iaa.Flipud(0.2), # vertically flip 20% of all images 69 | # sometimes(iaa.Crop(percent=(0, 0.1))), # crop images by 0-10% of their height/width 70 | #sometimes(iaa.Affine( 71 | # scale={"x": (0.8, 1.2), "y": (0.8, 1.2)}, # scale images to 80-120% of their size, individually per axis 72 | # translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}, # translate by -20 to +20 percent (per axis) 73 | # rotate=(-5, 5), # rotate by -45 to +45 degrees 74 | # shear=(-5, 5), # shear by -16 to +16 degrees 75 | # order=[0, 1], # use nearest neighbour or bilinear interpolation (fast) 76 | # cval=(0, 255), # if mode is constant, use a cval between 0 and 255 77 | # mode=ia.ALL # use any of scikit-image's warping modes (see 2nd image from the top for examples) 78 | #)), 79 | # execute 0 to 5 of the following (less important) augmenters per image 80 | # don't execute all of them, as that would often be way too strong 81 | iaa.SomeOf((0, 3), 82 | [ 83 | # sometimes(iaa.Superpixels(p_replace=(0, 1.0), n_segments=(20, 200))), # convert images into their superpixel representation 84 | iaa.OneOf([ 85 | iaa.GaussianBlur((0, 3.0)), # blur images with a sigma between 0 and 3.0 86 | iaa.AverageBlur(k=(2, 7)), 87 | # blur image using local means with kernel sizes between 2 and 7 88 | iaa.MedianBlur(k=(3, 11)), 89 | # blur image using local medians with kernel sizes between 2 and 7 90 | ]), 91 | iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)), # sharpen images 92 | # iaa.Emboss(alpha=(0, 1.0), strength=(0, 2.0)), # emboss images 93 | # search either for all edges or for directed edges 94 | # sometimes(iaa.OneOf([ 95 | # iaa.EdgeDetect(alpha=(0, 0.7)), 96 | # iaa.DirectedEdgeDetect(alpha=(0, 0.7), direction=(0.0, 1.0)), 97 | # ])), 98 | iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05 * 255), per_channel=0.5), 99 | # add gaussian noise to images 100 | iaa.OneOf([ 101 | iaa.Dropout((0.01, 0.1), per_channel=0.5), # randomly remove up to 10% of the pixels 102 | # iaa.CoarseDropout((0.03, 0.15), size_percent=(0.02, 0.05), per_channel=0.2), 103 | ]), 104 | # iaa.Invert(0.05, per_channel=True), # invert color channels 105 | iaa.Add((-10, 10), per_channel=0.5), 106 | # change brightness of images (by -10 to 10 of original value) 107 | iaa.Multiply((0.5, 1.5), per_channel=0.5), 108 | # change brightness of images (50-150% of original value) 109 | iaa.ContrastNormalization((0.5, 2.0), per_channel=0.5), # improve or worsen the contrast 110 | # iaa.Grayscale(alpha=(0.0, 1.0)), 111 | # sometimes(iaa.ElasticTransformation(alpha=(0.5, 3.5), sigma=0.25)), # move pixels locally around (with random strengths) 112 | # sometimes(iaa.PiecewiseAffine(scale=(0.01, 0.05))) # sometimes move parts of the image around 113 | ], 114 | random_order=True 115 | ) 116 | ], 117 | random_order=True 118 | ) 119 | 120 | 121 | def __len__(self): 122 | 'Denotes the number of batches per epoch' 123 | return int(np.ceil(float(len(self.dataset)) / self.batch_size)) 124 | 125 | 126 | def __getitem__(self, index): 127 | 'Generate one batch of data' 128 | 129 | ''' 130 | l_bound = index*self.config['BATCH_SIZE'] 131 | r_bound = (index+1)*self.config['BATCH_SIZE'] 132 | 133 | if r_bound > len(self.images): 134 | r_bound = len(self.images) 135 | l_bound = r_bound - self.config['BATCH_SIZE'] 136 | ''' 137 | 138 | le = LabelEncoder() 139 | le.fit_transform(self.labels) 140 | 141 | x_batch = np.zeros((self.batch_size, self.image_h, self.image_w, self.n_channels)) 142 | b_batch = np.zeros((self.batch_size, 1, 1, 1, self.max_obj, 4)) 143 | 144 | y_batch = np.zeros((self.batch_size, self.grid_h, self.grid_w, self.nb_anchors, 4 + 1 + self.num_classes())) # desired network output 145 | 146 | #current_batch = self.dataset[l_bound:r_bound] 147 | current_batch = self.dataset[index * self.batch_size:(index + 1) * self.batch_size] 148 | 149 | instance_num = 0 150 | 151 | for instance in current_batch: 152 | img, object_annotations = self.prep_image_and_annot(instance, jitter=self.jitter) 153 | 154 | obj_num = 0 155 | 156 | # center of the bounding box is divided with the image width/height and grid width/height 157 | # to get the coordinates relative to a single element of a grid 158 | for obj in object_annotations: 159 | if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj['ymin'] and obj['class'] in self.labels: 160 | center_x = .5 * (obj['xmin'] + obj['xmax']) # center of the lower side of the bb (by x axis) 161 | center_x = center_x / (float(self.image_w) / self.grid_w) # scaled to the grid unit (a value between 0 and GRID_W-1) 162 | center_y = .5 * (obj['ymin'] + obj['ymax']) # center of the lower side (by y axis) 163 | center_y = center_y / (float(self.image_h) / self.grid_h) # scaled to the grid unit (a value between 0 and GRID_H-1) 164 | 165 | grid_x = int(np.floor(center_x)) # assigns the object to the matching 166 | grid_y = int(np.floor(center_y)) # grid element according to (center_x, center_y) 167 | 168 | if grid_x < self.grid_w and grid_y < self.grid_h: 169 | center_w = (obj['xmax'] - obj['xmin']) / (float(self.image_w) / self.grid_w) 170 | center_h = (obj['ymax'] - obj['ymin']) / (float(self.image_h) / self.grid_h) 171 | 172 | box = [center_x, center_y, center_w, center_h] 173 | 174 | # find the anchor that best predicts this box 175 | best_anchor = -1 176 | max_iou = -1 177 | 178 | shifted_box = [0, 0, center_w, center_h] 179 | 180 | for i in range(len(self.anchors)): 181 | anchor = self.anchors[i] 182 | iou = bbox_iou(shifted_box, anchor) 183 | 184 | if max_iou < iou: 185 | best_anchor = i 186 | max_iou = iou 187 | 188 | classes = [0, 0] 189 | 190 | obj_label = int(le.transform([obj['class']])) 191 | 192 | if obj_label == 0: 193 | classes[0] = 1 194 | else: 195 | classes[1] = 1 196 | 197 | img = self.normalize(img) 198 | 199 | x_batch[instance_num] = img 200 | 201 | b_batch[instance_num, 0, 0, 0, obj_num] = box 202 | y_batch[instance_num, grid_y, grid_x, best_anchor] = [box[0], box[1], box[2], box[3], 1.0, classes[0], classes[1]] 203 | 204 | obj_num += 1 205 | obj_num %= self.max_obj 206 | 207 | instance_num += 1 208 | 209 | return [x_batch, b_batch], y_batch 210 | 211 | 212 | def prep_image_and_annot(self, dataset_instance, jitter): 213 | image_path = dataset_instance['image_path'] 214 | image = self.load_image(os.path.join(IMAGES_DIR,image_path)) 215 | 216 | h, w, c = image.shape 217 | 218 | if jitter: 219 | image = self.aug_pipe.augment_image(image) 220 | 221 | # resize the image to standard size 222 | image = cv2.resize(image, (self.image_h, self.image_w)) 223 | 224 | object_annotations = copy.deepcopy(dataset_instance['object']) 225 | for obj in object_annotations: 226 | for attr in ['xmin', 'xmax']: 227 | obj[attr] = int(obj[attr] * float(self.image_w) / w) 228 | obj[attr] = max(min(obj[attr], self.image_w), 0) 229 | 230 | for attr in ['ymin', 'ymax']: 231 | obj[attr] = int(obj[attr] * float(self.image_h) / h) 232 | obj[attr] = max(min(obj[attr], self.image_h), 0) 233 | 234 | return image, object_annotations 235 | 236 | 237 | def on_epoch_end(self): 238 | 'Updates indexes after each epoch' 239 | if self.shuffle: 240 | np.random.shuffle(self.dataset) 241 | 242 | 243 | def load_image(self, path): 244 | img = cv2.imread(os.path.join(IMAGES_DIR, path)) 245 | 246 | try: 247 | if len(img.shape) == 3: 248 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 249 | 250 | else: 251 | img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) 252 | except: 253 | print(path) 254 | 255 | return img 256 | 257 | 258 | def load_annotation(self, i): 259 | annots = [] 260 | 261 | for obj in self.dataset[i]['object']: 262 | annot = [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax'], self.labels.index(obj['class'])] 263 | annots += [annot] 264 | 265 | if len(annots) == 0: annots = [[]] 266 | 267 | return np.array(annots) 268 | 269 | 270 | def normalize(self, image): 271 | return image/255. 272 | 273 | 274 | def num_classes(self): 275 | return len(self.labels) 276 | 277 | 278 | def size(self): 279 | return len(self.dataset) -------------------------------------------------------------------------------- /yolo.py: -------------------------------------------------------------------------------- 1 | from keras.models import Model, load_model 2 | from keras.layers import Reshape, Lambda, Conv2D, Input, MaxPooling2D, BatchNormalization 3 | from keras.layers.advanced_activations import LeakyReLU 4 | from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard, Callback 5 | from keras.optimizers import SGD, Adam, RMSprop 6 | import tensorflow as tf 7 | import os 8 | import numpy as np 9 | 10 | from postprocessing import decode_netout, interval_overlap, compute_overlap, compute_ap 11 | from preprocessing import load_image_predict, load_carla_data 12 | from utils import BatchGenerator 13 | 14 | 15 | BASE_DIR = os.path.dirname(__file__) 16 | ANNOT_DIR = os.path.join(BASE_DIR, 'dataset') 17 | 18 | 19 | class TinyYoloFeature: 20 | """Tiny yolo feature extractor""" 21 | def __init__(self, input_size, config): 22 | input_image = Input(shape=(input_size, input_size, 3)) 23 | 24 | # Layer 1 25 | x = Conv2D(16, (3,3), strides=(1,1), padding='same', name='conv_1', use_bias=False)(input_image) 26 | x = BatchNormalization(name='norm_1')(x) 27 | x = LeakyReLU(alpha=0.1)(x) 28 | x = MaxPooling2D(pool_size=(2, 2))(x) 29 | 30 | # Layer 2 - 5 31 | for i in range(0,4): 32 | x = Conv2D(32*(2**i), (3,3), strides=(1,1), padding='same', name='conv_' + str(i+2), use_bias=False)(x) 33 | x = BatchNormalization(name='norm_' + str(i+2))(x) 34 | x = LeakyReLU(alpha=0.1)(x) 35 | x = MaxPooling2D(pool_size=(2, 2))(x) 36 | 37 | # Layer 6 38 | x = Conv2D(512, (3,3), strides=(1,1), padding='same', name='conv_6', use_bias=False)(x) 39 | x = BatchNormalization(name='norm_6')(x) 40 | x = LeakyReLU(alpha=0.1)(x) 41 | x = MaxPooling2D(pool_size=(2, 2), strides=(1,1), padding='same')(x) 42 | 43 | # Layer 7 44 | x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_' + str(7), use_bias=False)(x) 45 | x = BatchNormalization(name='norm_' + str(7))(x) 46 | x = LeakyReLU(alpha=0.1)(x) 47 | 48 | # Layer 8 49 | x = Conv2D(512, (3, 3), strides=(1, 1), padding='same', name='conv_' + str(8), use_bias=False)(x) 50 | x = BatchNormalization(name='norm_' + str(8))(x) 51 | x = LeakyReLU(alpha=0.1, name = 'last')(x) 52 | 53 | self.feature_extractor = Model(input_image, x) 54 | 55 | 56 | pretrained = load_model('checkpoints\\' + config['model']['saved_model_name'], custom_objects={'custom_loss': dummy_loss, 'tf': tf}) 57 | pretrained = pretrained.get_layer('model_1') 58 | 59 | idx = 0 60 | for layer in self.feature_extractor.layers: 61 | print(layer.name) 62 | layer.set_weights(pretrained.get_layer(index=idx).get_weights()) 63 | idx += 1 64 | 65 | frozen = [1, 2, 3, 4, 5, 6, 7] 66 | 67 | for l in frozen: 68 | self.feature_extractor.get_layer("conv_" + str(l)).trainable = False 69 | self.feature_extractor.get_layer("norm_" + str(l)).trainable = False 70 | 71 | self.feature_extractor.summary() 72 | 73 | 74 | class YOLO(object): 75 | def __init__(self, config): 76 | 77 | self.config = config 78 | 79 | self.image_h = config['model']['image_h'] 80 | self.image_w = config['model']['image_w'] 81 | 82 | self.grid_h, self.grid_w = config['model']['grid_h'], config['model']['grid_w'] 83 | 84 | self.labels = config['model']['classes'] 85 | self.nb_class = int(len(self.labels)) 86 | self.nb_box = int(len(config['model']['anchors'])/2) 87 | self.class_wt = np.ones(self.nb_class, dtype='float32') 88 | self.anchors = config['model']['anchors'] 89 | 90 | self.max_box_per_image = config['model']['max_obj'] 91 | self.batch_size = config['train']['batch_size'] 92 | 93 | self.object_scale = config['model']['obj_scale'] 94 | self.no_object_scale = config['model']['no_obj_scale'] 95 | self.coord_scale = config['model']['coord_scale'] 96 | self.class_scale = config['model']['class_scale'] 97 | 98 | self.obj_thresh = config['model']['obj_thresh'] 99 | self.nms_thresh = config['model']['nms_thresh'] 100 | 101 | self.warmup_batches = config['train']['warmup_batches'] 102 | self.debug = config['train']['debug'] 103 | 104 | ########################## 105 | # Make the model 106 | ########################## 107 | 108 | # make the feature extractor layers 109 | input_image = Input(shape=(self.image_h, self.image_w, 3)) 110 | self.true_boxes = Input(shape=(1, 1, 1, self.max_box_per_image, 4)) 111 | 112 | self.feature_extractor = TinyYoloFeature(self.image_h, config).feature_extractor 113 | features = self.feature_extractor(input_image) 114 | 115 | # Object detection layer 116 | output = Conv2D(self.nb_box * (4 + 1 + self.nb_class), 117 | (1, 1), strides=(1, 1), 118 | padding='same', 119 | name='DetectionLayer', 120 | activation='linear', 121 | kernel_initializer='lecun_normal')(features) 122 | 123 | output = Reshape((self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class))(output) 124 | output = Lambda(lambda args: args[0])([output, self.true_boxes]) 125 | 126 | self.model = Model([input_image, self.true_boxes], output) 127 | self.model.summary() 128 | 129 | pretrained = load_model('checkpoints\\' + config['model']['saved_model_name'], custom_objects={'custom_loss': self.custom_loss, 'tf': tf}) 130 | self.model.get_layer('DetectionLayer').set_weights( 131 | pretrained.get_layer('DetectionLayer').get_weights()) 132 | 133 | 134 | def load_weights(self, model_path): 135 | model = load_model(model_path, custom_objects={'custom_loss': self.custom_loss, 'tf': tf}) 136 | 137 | idx = 0 138 | for layer in self.model.layers: 139 | layer.set_weights(model.get_layer(index=idx).get_weights()) 140 | idx += 1 141 | 142 | 143 | def predict(self, image_path): 144 | image = load_image_predict(image_path, self.image_h, self.image_w) 145 | 146 | dummy_array = np.zeros((1, 1, 1, 1, self.max_box_per_image, 4)) 147 | netout = self.model.predict([image, dummy_array])[0] 148 | 149 | boxes = decode_netout(netout=netout, anchors = self.anchors, nb_class=self.nb_class, 150 | obj_threshold=self.obj_thresh, nms_threshold=self.nms_thresh) 151 | return boxes 152 | 153 | 154 | def train(self): 155 | data = load_carla_data(os.path.join(ANNOT_DIR, self.config['train']['annot_file_name']), self.config['model']['classes']) 156 | 157 | np.random.shuffle(data) 158 | 159 | train_instances, validation_instances = data[:1655], data[1655:] 160 | 161 | np.random.shuffle(train_instances) 162 | np.random.shuffle(validation_instances) 163 | 164 | train_generator = BatchGenerator(self.config, train_instances, jitter=True) 165 | validation_generator = BatchGenerator(self.config, validation_instances, jitter=False) 166 | 167 | checkpoint = ModelCheckpoint( 168 | 'checkpoints\\model.{epoch:02d}-{val_loss:.2f}.h5', 169 | monitor='val_loss', 170 | verbose=1, 171 | save_best_only=True, 172 | mode='auto', 173 | period=1 174 | ) 175 | 176 | checkpoint_all = ModelCheckpoint( 177 | 'checkpoints\\all_models.{epoch:02d}-{loss:.2f}.h5', 178 | monitor='loss', 179 | verbose=1, 180 | save_best_only=True, 181 | mode='auto', 182 | period=1 183 | ) 184 | 185 | # optimizer = RMSprop(lr=1e-3,rho=0.9, epsilon=1e-08, decay=0.0) 186 | # optimizer = Adam(lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) 187 | optimizer = SGD(lr=1e-5, momentum=0.9, decay=0.0005) 188 | 189 | self.model.compile(loss=self.custom_loss, optimizer=optimizer) #, metrics=['accuracy']) 190 | 191 | self.model.summary() 192 | 193 | history = self.model.fit_generator(generator=train_generator, 194 | steps_per_epoch=len(train_generator), 195 | epochs=self.config['train']['nb_epochs'], 196 | verbose=1, 197 | validation_data=validation_generator, 198 | validation_steps=len(validation_generator), 199 | callbacks=[checkpoint, checkpoint_all],# map_evaluator_cb], # checkpoint, tensorboard 200 | max_queue_size=10, 201 | workers=3 202 | ) 203 | 204 | 205 | def evaluate(self): 206 | data = load_carla_data(os.path.join(ANNOT_DIR, self.config['train']['annot_file_name']), 207 | self.config['model']['classes']) 208 | 209 | np.random.shuffle(data) 210 | 211 | validation_instances = data#[1400:] 212 | 213 | validation_generator = BatchGenerator(self.config, validation_instances, jitter=False) 214 | 215 | map_evaluator_cb = self.MAP_evaluation(self, validation_generator, 216 | save_best=True, 217 | save_name='checkpoints\\best-mAP.h5', 218 | # os.path.join(BASE_DIR,'best_mAP\\weights.{epoch:02d}-{val_loss:.2f}.h5'), 219 | tensorboard=None, 220 | iou_threshold=0.4) 221 | 222 | self.model.compile(loss=self.custom_loss, optimizer=SGD(lr=1e-5, momentum=0.9, decay=0.0005)) 223 | 224 | self.model.summary() 225 | 226 | history = self.model.fit_generator(generator=validation_generator, 227 | steps_per_epoch=len(validation_generator), 228 | epochs=1, 229 | verbose=1, 230 | callbacks=[map_evaluator_cb] 231 | ) 232 | 233 | 234 | def normalize(self, image): 235 | return image / 255. 236 | 237 | 238 | def custom_loss(self, y_true, y_pred): 239 | mask_shape = tf.shape(y_true)[:4] 240 | 241 | cell_x = tf.to_float( 242 | tf.reshape(tf.tile(tf.range(self.grid_w), [self.grid_h]), (1, self.grid_h, self.grid_w, 1, 1))) 243 | cell_y = tf.transpose(cell_x, (0, 2, 1, 3, 4)) 244 | 245 | cell_grid = tf.tile(tf.concat([cell_x, cell_y], -1), [self.batch_size, 1, 1, self.nb_box, 1]) 246 | 247 | coord_mask = tf.zeros(mask_shape) 248 | conf_mask = tf.zeros(mask_shape) 249 | class_mask = tf.zeros(mask_shape) 250 | 251 | seen = tf.Variable(0.) 252 | total_loss = tf.Variable(0.) 253 | total_recall = tf.Variable(0.) 254 | total_boxes = tf.Variable(self.grid_h*self.grid_w*self.nb_box*self.batch_size) 255 | 256 | """ 257 | Adjust prediction 258 | """ 259 | ### adjust x and y 260 | pred_box_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid 261 | 262 | ### adjust w and h tf.exp( 263 | pred_box_wh = tf.exp(y_pred[..., 2:4]) * np.reshape(self.anchors, [1, 1, 1, self.nb_box, 2]) 264 | 265 | ### adjust confidence 266 | pred_box_conf = tf.sigmoid(y_pred[..., 4]) 267 | 268 | ### adjust class probabilities 269 | pred_box_class = y_pred[..., 5:] 270 | 271 | """ 272 | Adjust ground truth 273 | """ 274 | ### adjust x and y 275 | true_box_xy = y_true[..., 0:2] # relative position to the containing cell 276 | 277 | ### adjust w and h 278 | true_box_wh = y_true[..., 2:4] # number of cells accross, horizontally and vertically 279 | 280 | ### adjust confidence 281 | true_wh_half = true_box_wh / 2. 282 | true_mins = true_box_xy - true_wh_half 283 | true_maxes = true_box_xy + true_wh_half 284 | 285 | pred_wh_half = pred_box_wh / 2. 286 | pred_mins = pred_box_xy - pred_wh_half 287 | pred_maxes = pred_box_xy + pred_wh_half 288 | 289 | intersect_mins = tf.maximum(pred_mins, true_mins) 290 | intersect_maxes = tf.minimum(pred_maxes, true_maxes) 291 | intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.) 292 | intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] 293 | 294 | true_areas = true_box_wh[..., 0] * true_box_wh[..., 1] 295 | pred_areas = pred_box_wh[..., 0] * pred_box_wh[..., 1] 296 | 297 | union_areas = pred_areas + true_areas - intersect_areas 298 | iou_scores = tf.truediv(intersect_areas, union_areas) 299 | 300 | true_box_conf = iou_scores * y_true[..., 4] 301 | 302 | ### adjust class probabilities 303 | true_box_class = tf.argmax(y_true[..., 5:], -1) 304 | 305 | """ 306 | Determine the masks 307 | """ 308 | ### coordinate mask: simply the position of the ground truth boxes (the predictors) 309 | coord_mask = tf.expand_dims(y_true[..., 4], axis=-1) * self.coord_scale 310 | 311 | ### confidence mask: penelize predictors + penalize boxes with low IOU 312 | # penalize the confidence of the boxes, which have IOU with some ground truth box < 0.6 313 | true_xy = self.true_boxes[..., 0:2] 314 | true_wh = self.true_boxes[..., 2:4] 315 | 316 | true_wh_half = true_wh / 2. 317 | true_mins = true_xy - true_wh_half 318 | true_maxes = true_xy + true_wh_half 319 | 320 | pred_xy = tf.expand_dims(pred_box_xy, 4) 321 | pred_wh = tf.expand_dims(pred_box_wh, 4) 322 | 323 | pred_wh_half = pred_wh / 2. 324 | pred_mins = pred_xy - pred_wh_half 325 | pred_maxes = pred_xy + pred_wh_half 326 | 327 | intersect_mins = tf.maximum(pred_mins, true_mins) 328 | intersect_maxes = tf.minimum(pred_maxes, true_maxes) 329 | intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.) 330 | intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] 331 | 332 | true_areas = true_wh[..., 0] * true_wh[..., 1] 333 | pred_areas = pred_wh[..., 0] * pred_wh[..., 1] 334 | 335 | union_areas = pred_areas + true_areas - intersect_areas 336 | iou_scores = tf.truediv(intersect_areas, union_areas) 337 | 338 | best_ious = tf.reduce_max(iou_scores, axis=4) 339 | #conf_mask = conf_mask + tf.to_float(best_ious < 0.5) * (1 - y_true[..., 4]) * self.no_object_scale 340 | 341 | # penalize the confidence of the boxes, which are reponsible for corresponding ground truth box 342 | #conf_mask = conf_mask + y_true[..., 4] * self.object_scale 343 | 344 | conf_mask_neg = tf.to_float(best_ious < 0.50) * (1 - y_true[..., 4]) * self.no_object_scale 345 | conf_mask_pos = y_true[..., 4] * self.object_scale 346 | 347 | ### class mask: simply the position of the ground truth boxes (the predictors) 348 | class_mask = y_true[..., 4] * tf.gather(self.class_wt, true_box_class) * self.class_scale 349 | 350 | """ 351 | Warm-up training 352 | """ 353 | no_boxes_mask = tf.to_float(coord_mask < self.coord_scale / 2.) 354 | seen = tf.assign_add(seen, 1.) 355 | 356 | true_box_xy, true_box_wh, coord_mask = tf.cond(tf.less(seen, self.warmup_batches + 1), 357 | lambda: [true_box_xy + (0.5 + cell_grid) * no_boxes_mask, 358 | true_box_wh + tf.ones_like(true_box_wh) * \ 359 | np.reshape(self.anchors, [1, 1, 1, self.nb_box, 2]) * \ 360 | no_boxes_mask, 361 | tf.ones_like(coord_mask)], 362 | lambda: [true_box_xy, 363 | true_box_wh, 364 | coord_mask]) 365 | 366 | """ 367 | Finalize the loss 368 | """ 369 | nb_coord_box = tf.reduce_sum(tf.to_float(coord_mask > 0.0)) 370 | #nb_conf_box = tf.reduce_sum(tf.to_float(conf_mask > 0.0)) 371 | nb_conf_box_neg = tf.reduce_sum(tf.to_float(conf_mask_neg > 0.0)) 372 | nb_conf_box_pos = tf.subtract(tf.to_float(total_boxes), nb_conf_box_neg) #tf.reduce_sum(tf.to_float(conf_mask_pos > 0.0)) 373 | nb_class_box = tf.reduce_sum(tf.to_float(class_mask > 0.0)) 374 | 375 | true_box_wh = tf.sqrt(true_box_wh) 376 | pred_box_wh = tf.sqrt(pred_box_wh) 377 | 378 | loss_xy = tf.reduce_sum(tf.square(true_box_xy - pred_box_xy) * coord_mask) / (nb_coord_box + 1e-6) / 2. 379 | loss_wh = tf.reduce_sum(tf.square(true_box_wh - pred_box_wh) * coord_mask) / (nb_coord_box + 1e-6) / 2. 380 | loss_conf_neg = tf.reduce_sum(tf.square(true_box_conf - pred_box_conf) * conf_mask_neg) / (nb_conf_box_neg + 1e-6) / 2. 381 | loss_conf_pos = tf.reduce_sum(tf.square(true_box_conf - pred_box_conf) * conf_mask_pos) / (nb_conf_box_pos + 1e-6) / 2 382 | loss_conf = loss_conf_neg + loss_conf_pos 383 | #loss_conf = tf.reduce_sum(tf.square(true_box_conf - pred_box_conf) * conf_mask) / (nb_conf_box + 1e-6) / 2. 384 | loss_class = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class) 385 | loss_class = tf.reduce_sum(loss_class * class_mask) / (nb_class_box + 1e-6) 386 | 387 | loss = tf.cond(tf.less(seen, self.warmup_batches + 1), 388 | lambda: loss_xy + loss_wh + loss_conf + loss_class + 10, 389 | lambda: loss_xy + loss_wh + loss_conf + loss_class) 390 | 391 | if self.debug: 392 | nb_true_box = tf.reduce_sum(y_true[..., 4]) 393 | nb_pred_box = tf.reduce_sum(tf.to_float(true_box_conf > 0.32) * tf.to_float(pred_box_conf > 0.3)) 394 | 395 | current_recall = nb_pred_box / (nb_true_box + 1e-6) 396 | total_recall = tf.assign_add(total_recall, current_recall) 397 | 398 | total_loss = tf.assign_add(total_loss, loss) 399 | 400 | loss = tf.Print(loss, [loss_xy], message='\nLoss XY \t', summarize=1000) 401 | loss = tf.Print(loss, [loss_wh], message='Loss WH \t', summarize=1000) 402 | loss = tf.Print(loss, [nb_conf_box_neg], message='Nb Conf Box Negative \t', summarize=1000) 403 | loss = tf.Print(loss, [nb_conf_box_pos], message='Nb Conf Box Positive \t', summarize=1000) 404 | loss = tf.Print(loss, [loss_conf_neg], message='Loss Conf Negative \t', summarize=1000) 405 | loss = tf.Print(loss, [loss_conf_pos], message='Loss Conf Positive \t', summarize=1000) 406 | loss = tf.Print(loss, [loss_conf], message='Loss Conf \t', summarize=1000) 407 | loss = tf.Print(loss, [loss_class], message='Loss Class \t', summarize=1000) 408 | loss = tf.Print(loss, [loss], message='Total Loss \t', summarize=1000) 409 | loss = tf.Print(loss, [total_loss / seen], message='Average Loss \t', summarize=1000) 410 | loss = tf.Print(loss, [nb_pred_box], message='Number of pred boxes \t', summarize=1000) 411 | loss = tf.Print(loss, [nb_true_box], message='Number of true boxes \t', summarize=1000) 412 | loss = tf.Print(loss, [current_recall], message='Current Recall \t', summarize=1000) 413 | loss = tf.Print(loss, [total_recall / seen], message='Average Recall \t', summarize=1000) 414 | 415 | 416 | return loss 417 | 418 | 419 | class MAP_evaluation(Callback): 420 | """ Evaluate a given dataset using a given model. 421 | code originally from https://github.com/fizyr/keras-retinanet 422 | # Arguments 423 | generator : The generator that represents the dataset to evaluate. 424 | model : The model to evaluate. 425 | iou_threshold : The threshold used to consider when a detection is positive or negative. 426 | score_threshold : The score confidence threshold to use for detections. 427 | save_path : The path to save images with visualized detections to. 428 | # Returns 429 | A dict mapping class names to mAP scores. 430 | """ 431 | 432 | def __init__(self, 433 | yolo, 434 | generator, 435 | iou_threshold=0.5, 436 | score_threshold=0.3, 437 | save_path=None, 438 | period=1, 439 | save_best=False, 440 | save_name=None, 441 | tensorboard=None): 442 | 443 | self.yolo = yolo 444 | self.generator = generator 445 | self.iou_threshold = iou_threshold 446 | self.save_path = save_path 447 | self.period = period 448 | self.save_best = save_best 449 | self.save_name = save_name 450 | self.tensorboard = tensorboard 451 | 452 | self.bestMap = 0 453 | 454 | self.model = self.yolo.model 455 | 456 | if not isinstance(self.tensorboard, TensorBoard) and self.tensorboard is not None: 457 | raise ValueError("Tensorboard object must be a instance from keras.callbacks.TensorBoard") 458 | 459 | 460 | def on_epoch_end(self, epoch, logs={}): 461 | print(epoch) 462 | #% self.period == 0 and self.period != 0: 463 | mAP, average_precisions = self.evaluate_mAP() 464 | print('\n') 465 | for label, average_precision in average_precisions.items(): 466 | print(self.yolo.labels[label], '{:.4f}'.format(average_precision)) 467 | print('mAP: {:.4f}'.format(mAP)) 468 | 469 | if self.save_best and self.save_name is not None and mAP > self.bestMap: 470 | print( 471 | "mAP improved from {} to {}, saving model to {}.".format(self.bestMap, mAP, self.save_name)) 472 | self.bestMap = mAP 473 | print(self.save_name) 474 | self.model.save(self.save_name) 475 | self.model.save_weights('checkpoints\\best-mAP.h5') 476 | else: 477 | print("mAP did not improve from {}.".format(self.bestMap)) 478 | 479 | if self.tensorboard is not None and self.tensorboard.writer is not None: 480 | import tensorflow as tf 481 | summary = tf.Summary() 482 | summary_value = summary.value.add() 483 | summary_value.simple_value = mAP 484 | summary_value.tag = "val_mAP" 485 | self.tensorboard.writer.add_summary(summary, epoch) 486 | 487 | 488 | def evaluate_mAP(self): 489 | average_precisions = self._calc_avg_precisions() 490 | mAP = sum(average_precisions.values()) / len(average_precisions) 491 | 492 | return mAP, average_precisions 493 | 494 | 495 | def _calc_avg_precisions(self): 496 | # gather all detections and annotations 497 | all_detections = [[None for i in range(self.generator.num_classes())] for j in 498 | range(self.generator.size())] 499 | all_annotations = [[None for i in range(self.generator.num_classes())] for j in 500 | range(self.generator.size())] 501 | 502 | for i in range(self.generator.size()): 503 | raw_image = self.generator.load_image(self.generator.dataset[i]['image_path']) 504 | raw_height, raw_width, _ = raw_image.shape 505 | # make the boxes and the labels 506 | pred_boxes = self.yolo.predict(os.path.join(ANNOT_DIR, 'images', self.generator.dataset[i]['image_path'])) 507 | 508 | score = np.array([box.score for box in pred_boxes]) 509 | pred_labels = np.array([box.label for box in pred_boxes]) 510 | 511 | if len(pred_boxes) > 0: 512 | pred_boxes = np.array([[box.xmin * raw_width, box.ymin * raw_height, box.xmax * raw_width, 513 | box.ymax * raw_height, box.score] for box in pred_boxes]) 514 | else: 515 | pred_boxes = np.array([[]]) 516 | 517 | # sort the boxes and the labels according to scores 518 | score_sort = np.argsort(-score) 519 | pred_labels = pred_labels[score_sort] 520 | pred_boxes = pred_boxes[score_sort] 521 | 522 | # copy detections to all_detections 523 | for label in range(self.generator.num_classes()): 524 | all_detections[i][label] = pred_boxes[pred_labels == label, :] 525 | 526 | annotations = self.generator.load_annotation(i) 527 | 528 | # copy detections to all_annotations 529 | for label in range(self.generator.num_classes()): 530 | all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy() 531 | 532 | # compute mAP by comparing all detections and all annotations 533 | average_precisions = {} 534 | 535 | for label in range(self.generator.num_classes()): 536 | false_positives = np.zeros((0,)) 537 | true_positives = np.zeros((0,)) 538 | scores = np.zeros((0,)) 539 | num_annotations = 0.0 540 | 541 | for i in range(self.generator.size()): 542 | detections = all_detections[i][label] 543 | annotations = all_annotations[i][label] 544 | num_annotations += annotations.shape[0] 545 | detected_annotations = [] 546 | 547 | for d in detections: 548 | scores = np.append(scores, d[4]) 549 | 550 | if annotations.shape[0] == 0: 551 | false_positives = np.append(false_positives, 1) 552 | true_positives = np.append(true_positives, 0) 553 | continue 554 | 555 | overlaps = compute_overlap(np.expand_dims(d, axis=0), annotations) 556 | assigned_annotation = np.argmax(overlaps, axis=1) 557 | max_overlap = overlaps[0, assigned_annotation] 558 | 559 | if max_overlap >= self.iou_threshold and assigned_annotation not in detected_annotations: 560 | false_positives = np.append(false_positives, 0) 561 | true_positives = np.append(true_positives, 1) 562 | detected_annotations.append(assigned_annotation) 563 | else: 564 | false_positives = np.append(false_positives, 1) 565 | true_positives = np.append(true_positives, 0) 566 | 567 | # no annotations -> AP for this class is 0 (is this correct?) 568 | if num_annotations == 0: 569 | average_precisions[label] = 0 570 | continue 571 | 572 | # sort by score 573 | indices = np.argsort(-scores) 574 | false_positives = false_positives[indices] 575 | true_positives = true_positives[indices] 576 | 577 | # compute false positives and true positives 578 | false_positives = np.cumsum(false_positives) 579 | true_positives = np.cumsum(true_positives) 580 | 581 | # compute recall and precision 582 | recall = true_positives / num_annotations 583 | precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps) 584 | 585 | # compute average precision 586 | average_precision = compute_ap(recall, precision) 587 | average_precisions[label] = average_precision 588 | 589 | return average_precisions 590 | 591 | 592 | def dummy_loss(y_true, y_pred): 593 | return tf.sqrt(tf.reduce_sum(y_pred)) --------------------------------------------------------------------------------