├── .gitattributes
├── .gitignore
├── README.md
├── font
    ├── FiraMono-Medium.otf
    └── SIL Open Font License.txt
├── kmeansfix.py
├── main.py
├── model_data
    ├── classes.txt.txt
    └── yolo_anchor.txt
├── sort.py
├── train.py
├── yolo.py
└── yolo3
    ├── __init__.py
    ├── model.py
    └── utils.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | db.sqlite3
 61 | 
 62 | # Flask stuff:
 63 | instance/
 64 | .webassets-cache
 65 | 
 66 | # Scrapy stuff:
 67 | .scrapy
 68 | 
 69 | # Sphinx documentation
 70 | docs/_build/
 71 | 
 72 | # PyBuilder
 73 | target/
 74 | 
 75 | # Jupyter Notebook
 76 | .ipynb_checkpoints
 77 | 
 78 | # IPython
 79 | profile_default/
 80 | ipython_config.py
 81 | 
 82 | # pyenv
 83 | .python-version
 84 | 
 85 | # pipenv
 86 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 87 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 88 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 89 | #   install all needed dependencies.
 90 | #Pipfile.lock
 91 | 
 92 | # celery beat schedule file
 93 | celerybeat-schedule
 94 | 
 95 | # SageMath parsed files
 96 | *.sage.py
 97 | 
 98 | # Environments
 99 | .env
100 | .venv
101 | env/
102 | venv/
103 | ENV/
104 | env.bak/
105 | venv.bak/
106 | 
107 | # Spyder project settings
108 | .spyderproject
109 | .spyproject
110 | 
111 | # Rope project settings
112 | .ropeproject
113 | 
114 | # mkdocs documentation
115 | /site
116 | 
117 | # mypy
118 | .mypy_cache/
119 | .dmypy.json
120 | dmypy.json
121 | 
122 | # Pyre type checker
123 | .pyre/
124 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Introduction
 2 | A vehicle counter with keras-YOLOv3 from https://github.com/qqwweee/keras-yolo3.git and SORT algorithm for tracking
 3 | 
 4 | I used my own dataset here, I trained about 2300 images. So I guess you can train your model with more images
 5 | 
 6 | ## Quick Start
 7 | 1. You can use weight from YOLO website (http://pjreddie.com/darknet/yolo/) or use your own trained weight
 8 | 2. Change lines coordinate with your own video in main.py
 9 | 3. Set path file with your own path
10 | 3. run main.py 
11 | 
12 | ## A Demo of Traffic Counting: traffic camera surveillance is used for detecting vehicles
13 | ### [Watch the Video](https://youtu.be/FBYR8Fy4GNo)
14 | 


--------------------------------------------------------------------------------
/font/FiraMono-Medium.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BenBenee/Traffic-Counting-with-YOLOv3-and-SORT/f38e2f05ee05d487082b6b668da9e3f8275ab8f1/font/FiraMono-Medium.otf


--------------------------------------------------------------------------------
/font/SIL Open Font License.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014, Mozilla Foundation https://mozilla.org/ with Reserved Font Name Fira Mono.
 2 | 
 3 | Copyright (c) 2014, Telefonica S.A.
 4 | 
 5 | This Font Software is licensed under the SIL Open Font License, Version 1.1.
 6 | This license is copied below, and is also available with a FAQ at: http://scripts.sil.org/OFL
 7 | 
 8 | -----------------------------------------------------------
 9 | SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
10 | -----------------------------------------------------------
11 | 
12 | PREAMBLE
13 | The goals of the Open Font License (OFL) are to stimulate worldwide development of collaborative font projects, to support the font creation efforts of academic and linguistic communities, and to provide a free and open framework in which fonts may be shared and improved in partnership with others.
14 | 
15 | The OFL allows the licensed fonts to be used, studied, modified and redistributed freely as long as they are not sold by themselves. The fonts, including any derivative works, can be bundled, embedded, redistributed and/or sold with any software provided that any reserved names are not used by derivative works. The fonts and derivatives, however, cannot be released under any other type of license. The requirement for fonts to remain under this license does not apply to any document created using the fonts or their derivatives.
16 | 
17 | DEFINITIONS
18 | "Font Software" refers to the set of files released by the Copyright Holder(s) under this license and clearly marked as such. This may include source files, build scripts and documentation.
19 | 
20 | "Reserved Font Name" refers to any names specified as such after the copyright statement(s).
21 | 
22 | "Original Version" refers to the collection of Font Software components as distributed by the Copyright Holder(s).
23 | 
24 | "Modified Version" refers to any derivative made by adding to, deleting, or substituting -- in part or in whole -- any of the components of the Original Version, by changing formats or by porting the Font Software to a new environment.
25 | 
26 | "Author" refers to any designer, engineer, programmer, technical writer or other person who contributed to the Font Software.
27 | 
28 | PERMISSION & CONDITIONS
29 | Permission is hereby granted, free of charge, to any person obtaining a copy of the Font Software, to use, study, copy, merge, embed, modify, redistribute, and sell modified and unmodified copies of the Font Software, subject to the following conditions:
30 | 
31 | 1) Neither the Font Software nor any of its individual components, in Original or Modified Versions, may be sold by itself.
32 | 
33 | 2) Original or Modified Versions of the Font Software may be bundled, redistributed and/or sold with any software, provided that each copy contains the above copyright notice and this license. These can be included either as stand-alone text files, human-readable headers or in the appropriate machine-readable metadata fields within text or binary files as long as those fields can be easily viewed by the user.
34 | 
35 | 3) No Modified Version of the Font Software may use the Reserved Font Name(s) unless explicit written permission is granted by the corresponding Copyright Holder. This restriction only applies to the primary font name as presented to the users.
36 | 
37 | 4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font Software shall not be used to promote, endorse or advertise any Modified Version, except to acknowledge the contribution(s) of the Copyright Holder(s) and the Author(s) or with their explicit written permission.
38 | 
39 | 5) The Font Software, modified or unmodified, in part or in whole, must be distributed entirely under this license, and must not be distributed under any other license. The requirement for fonts to remain under this license does not apply to any document created using the Font Software.
40 | 
41 | TERMINATION
42 | This license becomes null and void if any of the above conditions are not met.
43 | 
44 | DISCLAIMER
45 | THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM OTHER DEALINGS IN THE FONT SOFTWARE.


--------------------------------------------------------------------------------
/kmeansfix.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | class YOLO_Kmeans:
  5 | 
  6 |     def __init__(self, cluster_number, filename):
  7 |         self.cluster_number = cluster_number
  8 |         self.filename = "F:/annotation.txt"
  9 | 
 10 |     def iou(self, boxes, clusters):  # 1 box -> k clusters
 11 |         n = boxes.shape[0]
 12 |         k = self.cluster_number
 13 | 
 14 |         box_area = boxes[:, 0] * boxes[:, 1]
 15 |         box_area = box_area.repeat(k)
 16 |         box_area = np.reshape(box_area, (n, k))
 17 | 
 18 |         cluster_area = clusters[:, 0] * clusters[:, 1]
 19 |         cluster_area = np.tile(cluster_area, [1, n])
 20 |         cluster_area = np.reshape(cluster_area, (n, k))
 21 | 
 22 |         box_w_matrix = np.reshape(boxes[:, 0].repeat(k), (n, k))
 23 |         cluster_w_matrix = np.reshape(np.tile(clusters[:, 0], (1, n)), (n, k))
 24 |         min_w_matrix = np.minimum(cluster_w_matrix, box_w_matrix)
 25 | 
 26 |         box_h_matrix = np.reshape(boxes[:, 1].repeat(k), (n, k))
 27 |         cluster_h_matrix = np.reshape(np.tile(clusters[:, 1], (1, n)), (n, k))
 28 |         min_h_matrix = np.minimum(cluster_h_matrix, box_h_matrix)
 29 |         inter_area = np.multiply(min_w_matrix, min_h_matrix)
 30 | 
 31 |         result = inter_area / (box_area + cluster_area - inter_area)
 32 |         return result
 33 | 
 34 |     def avg_iou(self, boxes, clusters):
 35 |         accuracy = np.mean([np.max(self.iou(boxes, clusters), axis=1)])
 36 |         return accuracy
 37 | 
 38 |     def kmeans(self, boxes, k, dist=np.median):
 39 |         box_number = boxes.shape[0]
 40 |         distances = np.empty((box_number, k))
 41 |         last_nearest = np.zeros((box_number,))
 42 |         np.random.seed()
 43 |         clusters = boxes[np.random.choice(
 44 |             box_number, k, replace=False)]  # init k clusters
 45 |         while True:
 46 | 
 47 |             distances = 1 - self.iou(boxes, clusters)
 48 | 
 49 |             current_nearest = np.argmin(distances, axis=1)
 50 |             if (last_nearest == current_nearest).all():
 51 |                 break  # clusters won't change
 52 |             for cluster in range(k):
 53 |                 clusters[cluster] = dist(  # update clusters
 54 |                     boxes[current_nearest == cluster], axis=0)
 55 | 
 56 |             last_nearest = current_nearest
 57 | 
 58 |         return clusters
 59 | 
 60 |     def result2txt(self, data):
 61 |         f = open("F:/model_data/yolo_anchor.txt", 'w')
 62 |         row = np.shape(data)[0]
 63 |         for i in range(row):
 64 |             if i == 0:
 65 |                 x_y = "%d,%d" % (data[i][0], data[i][1])
 66 |             else:
 67 |                 x_y = ", %d,%d" % (data[i][0], data[i][1])
 68 |             f.write(x_y)
 69 |         f.close()
 70 | 
 71 |     def txt2boxes(self):
 72 |         f = open(self.filename, 'r')
 73 |         lines = f.readlines()
 74 |         dataSet = []
 75 |         for i in range(len(lines)-1):
 76 |             infos = lines[i].split()
 77 |             for j in range(2,len(infos)):
 78 |                 width = int(infos[j].split(",")[2]) - int(infos[j].split(",")[0])
 79 |                 height = int(infos[j].split(",")[3]) - int(infos[j].split(",")[1])
 80 |                 dataSet.append([width, height])
 81 |         result = np.array(dataSet)
 82 |         f.close()
 83 |         return result
 84 | 
 85 |     def txt2clusters(self):
 86 |         all_boxes = self.txt2boxes()
 87 |         result = self.kmeans(all_boxes, k=self.cluster_number)
 88 |         result = result[np.lexsort(result.T[0, None])]
 89 |         self.result2txt(result)
 90 |         print("K anchors:\n {}".format(result))
 91 |         print("Accuracy: {:.2f}%".format(
 92 |             self.avg_iou(all_boxes, result) * 100))
 93 | 
 94 | 
 95 | if __name__ == "__main__":
 96 |     cluster_number = 9
 97 |     filename = "F:/annotation.txt"
 98 |     kmeans = YOLO_Kmeans(cluster_number, filename)
 99 |     kmeans.txt2clusters()
100 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | # import the necessary packages
  2 | import numpy as np
  3 | import argparse
  4 | import imutils
  5 | import time
  6 | import cv2
  7 | import datetime
  8 | import os
  9 | from yolo import YOLO 
 10 | from PIL import Image, ImageDraw, ImageFont
 11 | from sort import *
 12 | import tensorflow as tf
 13 | from timeit import default_timer as timer
 14 | 
 15 | 
 16 | def main(yolo):
 17 |     tracker = Sort()
 18 |     memory = {}
 19 |     line1 = [(494, 462), (811, 462)]
 20 |     line2 = [(976,630), (1677,822)]    
 21 | 
 22 |     
 23 |     # construct the argument parse and parse the arguments
 24 |     ap = argparse.ArgumentParser()
 25 |     ap.add_argument("-c", "--confidence", type=float, default=0.5,
 26 |         help="minimum probability to filter weak detections")
 27 |     ap.add_argument("-t", "--threshold", type=float, default=0.4,
 28 |         help="threshold when applyong non-maxima suppression")
 29 |     args = vars(ap.parse_args())
 30 | 
 31 |     # Return true if line segments AB and CD intersect
 32 |     def intersect(A,B,C,D):
 33 |         return ccw(A,C,D) != ccw(B,C,D) and ccw(A,B,C) != ccw(A,B,D)
 34 | 
 35 |     def ccw(A,B,C):
 36 |         return (C[1]-A[1]) * (B[0]-A[0]) > (B[1]-A[1]) * (C[0]-A[0])
 37 | 
 38 |     # load the COCO class labels our YOLO model was trained on
 39 |     #labelsPath = os.path.sep.join([args["yolo"], "classes.names"])
 40 |     #LABELS = open(labelsPath).read().strip().split("\n")
 41 | 
 42 |     # initialize a list of colors to represent each possible class label
 43 |     np.random.seed(42)
 44 |     COLORS = np.random.randint(0, 255, size=(200, 3),dtype="uint8")
 45 | 
 46 |     # initialize the video stream, pointer to output video file, and
 47 |     # frame dimensions
 48 |     #Put your video path here
 49 |     vs = cv2.VideoCapture('video_path.mp4')
 50 |     writer = None
 51 |     (W, H) = (None, None)
 52 |     
 53 |     font = ImageFont.truetype(font='../font/FiraMono-Medium.otf', size=40)
 54 |     
 55 |     frameIndex = 0
 56 |     car = 0
 57 |     motor = 0
 58 |     bus = 0
 59 |     truck = 0
 60 |     car2 = 0 
 61 |     motor2 = 0
 62 |     bus2 = 0
 63 |     truck2 = 0
 64 |     # loop over frames from the video file stream
 65 |     prev_time = timer()
 66 |     accum_time = 0
 67 |     curr_fps = 0
 68 |     while True:
 69 |         # read the next frame from the file
 70 |         (grabbed, frame) = vs.read()
 71 |         
 72 |         # if the frame was not grabbed, then we have reached the end
 73 |         # of the stream
 74 |         if not grabbed:
 75 |             break
 76 | 
 77 |         image = Image.fromarray(frame[...,::-1])
 78 |         boxes, out_class, confidences, midPoint = yolo.detect_image(image)
 79 |         image = np.asarray(image)
 80 | 
 81 |         # and class IDs, respectively
 82 |         classIDs = []
 83 |         #classIDs.append(classID)
 84 | 
 85 |         # apply non-maxima suppression to suppress weak, overlapping
 86 |         # bounding boxes
 87 | 
 88 |         idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.4, 0.2)
 89 | 
 90 |         dets = []
 91 |         if len(idxs) > 0:
 92 |             # loop over the indexes we are keeping
 93 |             for i in idxs.flatten():
 94 |                 (x, y) = (boxes[i][0], boxes[i][1])
 95 |                 (w, h) = (boxes[i][2], boxes[i][3])
 96 |                 dets.append([x, y, x+w, y+h, confidences[i]])
 97 | 
 98 |         np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})
 99 |         dets = np.asarray(dets)
100 |         tracks = tracker.update(dets)
101 | 
102 |         boxes = []
103 |         indexIDs = []
104 |         c = []
105 |         previous = memory.copy()
106 |         memory = {}
107 |         
108 |         for track in tracks:
109 |             boxes.append([track[0], track[1], track[2], track[3]])
110 |             indexIDs.append(int(track[4]))
111 |             memory[indexIDs[-1]] = boxes[-1]
112 |                 
113 |         if len(boxes) > 0:
114 |             i = int(0)
115 |             for box in boxes:
116 |                 # extract the bounding box coordinates
117 |                 (x, y) = (int(box[0]), int(box[1]))
118 |                 (w, h) = (int(box[2]), int(box[3]))
119 | 
120 |                 # draw a bounding box rectangle and label on the image
121 |                 # color = [int(c) for c in COLORS[classIDs[i]]]
122 |                 # cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
123 | 
124 |                 color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]]
125 |                 cv2.rectangle(frame, (x, y), (w, h), color, 2)
126 | 
127 |                 if indexIDs[i] in previous:
128 |                     previous_box = previous[indexIDs[i]]
129 |                     (x2, y2) = (int(previous_box[0]), int(previous_box[1]))
130 |                     (w2, h2) = (int(previous_box[2]), int(previous_box[3]))
131 |                     p0 = (int(x + (w-x)/2), int(y + (h-y)/2))
132 |                     p1 = (int(x2 + (w2-x2)/2), int(y2 + (h2-y2)/2))
133 |                     cv2.line(frame, p0, p1, color, 3)
134 | 
135 |                     if intersect(p0, p1, line1[0], line1[1]):
136 |                         detected_class = yolo.counter(p0,out_class, midPoint)
137 |                         if detected_class == 'car':
138 |                             car = car + 1
139 |                         elif detected_class == 'motorbike':
140 |                             motor = motor+1
141 |                         elif detected_class == 'bus':
142 |                             bus = bus+1
143 |                         else:
144 |                             truck = truck + 1
145 |                             
146 |                     if intersect(p0, p1, line2[0], line2[1]):
147 |                         detected_class = yolo.counter(p0,out_class, midPoint)
148 |                         if detected_class == 'car':
149 |                             car2 = car2 + 1
150 |                         elif detected_class == 'motorbike':
151 |                             motor2 = motor2+1
152 |                         elif detected_class == 'bus':
153 |                             bus2 = bus2+1
154 |                         else:
155 |                             truck2 = truck2 + 1
156 |                     '''        
157 |                     if p0[0] > 921 and p0[0] < 1440 and p0[1] < 606 and p0[1] > 437:
158 |                         p0Memory = p0
159 |                         state = True
160 |                         if 
161 |                     '''        
162 |                 # text = "{}: {:.4f}".format(LABELS[classIDs[i]], confidences[i])
163 |                 text = "{}".format(indexIDs[i])
164 |                 cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
165 |                 i += 1
166 |                 
167 |         frame = Image.fromarray(frame)
168 |         draw = ImageDraw.Draw(frame)
169 |         draw.text((83,152), 'car %d \nmotor %d \nbus %d \ntruck %d' %(car,motor,bus,truck), fill=(255, 255, 255),font=font)
170 |         draw.text((1328,112), 'car %d \nmotor %d \nbus %d \ntruck %d' %(car2,motor2,bus2,truck2), fill=(255, 255, 255),font=font)
171 |         
172 |         frame = np.asarray(frame)
173 |         # draw line
174 |         cv2.line(frame, line1[0], line1[1], (0, 255, 255), 1)
175 |         cv2.line(frame, line2[0], line2[1], (0, 255, 255), 1)
176 | 
177 |         # draw counter
178 |         # counter += 1
179 |         curr_time = timer()
180 |         exec_time = curr_time - prev_time
181 |         prev_time = curr_time
182 |         accum_time = accum_time + exec_time
183 |         curr_fps = curr_fps + 1
184 |         if accum_time > 1:
185 |             accum_time = accum_time - 1
186 |             fps = "FPS: " + str(curr_fps)
187 |             curr_fps = 0
188 |         cv2.putText(frame, text=fps, org=(3, 30), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
189 |                     fontScale=0.8, color=(255, 0, 0), thickness=1)
190 |         # check if the video writer is None
191 |         if writer is None:
192 |             # initialize our video writer
193 |             fourcc = cv2.VideoWriter_fourcc(*"XVID")
194 |             video_fps = vs.get(cv2.CAP_PROP_FPS)
195 |             cv2.putText(frame, 'fps: %d' %(video_fps), (9,20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 5)
196 |             writer = cv2.VideoWriter('output.avi', fourcc, video_fps, (frame.shape[1], frame.shape[0]), True)
197 | 
198 |         # write the output frame to disk
199 |         writer.write(frame)
200 |         
201 |         cv2.namedWindow("hasil", cv2.WINDOW_NORMAL)
202 |         cv2.resizeWindow("hasil", 904, 544)
203 |         cv2.imshow('hasil',frame)
204 |         cv2.waitKey(1)
205 | 
206 |         # increase frame index
207 |         frameIndex += 1
208 | 
209 |     # release the file pointers
210 |     print("[INFO] cleaning up...")
211 |     writer.release()
212 |     vs.release()
213 | 
214 | if __name__ == '__main__':
215 |     config = tf.ConfigProto()
216 |     config.gpu_options.allow_growth = True
217 |     tf.keras.backend.set_session(tf.Session(config=config))
218 |     main(YOLO())
219 | 


--------------------------------------------------------------------------------
/model_data/classes.txt.txt:
--------------------------------------------------------------------------------
1 | car
2 | motorbike
3 | bus
4 | truck


--------------------------------------------------------------------------------
/model_data/yolo_anchor.txt:
--------------------------------------------------------------------------------
1 | 12,14, 14,26, 23,40, 24,21, 37,32, 41,61, 60,43, 81,74, 162,136


--------------------------------------------------------------------------------
/sort.py:
--------------------------------------------------------------------------------
  1 | """
  2 |     SORT: A Simple, Online and Realtime Tracker
  3 |     Copyright (C) 2016 Alex Bewley alex@dynamicdetection.com
  4 | 
  5 |     This program is free software: you can redistribute it and/or modify
  6 |     it under the terms of the GNU General Public License as published by
  7 |     the Free Software Foundation, either version 3 of the License, or
  8 |     (at your option) any later version.
  9 | 
 10 |     This program is distributed in the hope that it will be useful,
 11 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |     GNU General Public License for more details.
 14 | 
 15 |     You should have received a copy of the GNU General Public License
 16 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | """
 18 | from __future__ import print_function
 19 | 
 20 | from numba import jit
 21 | import numpy as np
 22 | from sklearn.utils.linear_assignment_ import linear_assignment
 23 | from filterpy.kalman import KalmanFilter
 24 | 
 25 | @jit
 26 | def iou(bb_test,bb_gt):
 27 |   """
 28 |   Computes IUO between two bboxes in the form [x1,y1,x2,y2]
 29 |   """
 30 |   xx1 = np.maximum(bb_test[0], bb_gt[0])
 31 |   yy1 = np.maximum(bb_test[1], bb_gt[1])
 32 |   xx2 = np.minimum(bb_test[2], bb_gt[2])
 33 |   yy2 = np.minimum(bb_test[3], bb_gt[3])
 34 |   w = np.maximum(0., xx2 - xx1)
 35 |   h = np.maximum(0., yy2 - yy1)
 36 |   wh = w * h
 37 |   o = wh / ((bb_test[2]-bb_test[0])*(bb_test[3]-bb_test[1])
 38 |     + (bb_gt[2]-bb_gt[0])*(bb_gt[3]-bb_gt[1]) - wh)
 39 |   return(o)
 40 | 
 41 | def convert_bbox_to_z(bbox):
 42 |   """
 43 |   Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
 44 |     [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
 45 |     the aspect ratio
 46 |   """
 47 |   w = bbox[2]-bbox[0]
 48 |   h = bbox[3]-bbox[1]
 49 |   x = bbox[0]+w/2.
 50 |   y = bbox[1]+h/2.
 51 |   s = w*h    #scale is just area
 52 |   r = w/float(h)
 53 |   return np.array([x,y,s,r]).reshape((4,1))
 54 | 
 55 | def convert_x_to_bbox(x,score=None):
 56 |   """
 57 |   Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
 58 |     [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
 59 |   """
 60 |   w = np.sqrt(x[2]*x[3])
 61 |   h = x[2]/w
 62 |   if(score==None):
 63 |     return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.]).reshape((1,4))
 64 |   else:
 65 |     return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.,score]).reshape((1,5))
 66 | 
 67 | class KalmanBoxTracker(object):
 68 |   """
 69 |   This class represents the internel state of individual tracked objects observed as bbox.
 70 |   """
 71 |   count = 0
 72 |   def __init__(self,bbox):
 73 |     """
 74 |     Initialises a tracker using initial bounding box.
 75 |     """
 76 |     #define constant velocity model
 77 |     self.kf = KalmanFilter(dim_x=7, dim_z=4)
 78 |     self.kf.F = np.array([[1,0,0,0,1,0,0],[0,1,0,0,0,1,0],[0,0,1,0,0,0,1],[0,0,0,1,0,0,0],  [0,0,0,0,1,0,0],[0,0,0,0,0,1,0],[0,0,0,0,0,0,1]])
 79 |     self.kf.H = np.array([[1,0,0,0,0,0,0],[0,1,0,0,0,0,0],[0,0,1,0,0,0,0],[0,0,0,1,0,0,0]])
 80 | 
 81 |     self.kf.R[2:,2:] *= 10.
 82 |     self.kf.P[4:,4:] *= 1000. #give high uncertainty to the unobservable initial velocities
 83 |     self.kf.P *= 10.
 84 |     self.kf.Q[-1,-1] *= 0.01
 85 |     self.kf.Q[4:,4:] *= 0.01
 86 | 
 87 |     self.kf.x[:4] = convert_bbox_to_z(bbox)
 88 |     self.time_since_update = 0
 89 |     self.id = KalmanBoxTracker.count
 90 |     KalmanBoxTracker.count += 1
 91 |     self.history = []
 92 |     self.hits = 0
 93 |     self.hit_streak = 0
 94 |     self.age = 0
 95 | 
 96 |   def update(self,bbox):
 97 |     """
 98 |     Updates the state vector with observed bbox.
 99 |     """
100 |     self.time_since_update = 0
101 |     self.history = []
102 |     self.hits += 1
103 |     self.hit_streak += 1
104 |     self.kf.update(convert_bbox_to_z(bbox))
105 | 
106 |   def predict(self):
107 |     """
108 |     Advances the state vector and returns the predicted bounding box estimate.
109 |     """
110 |     if((self.kf.x[6]+self.kf.x[2])<=0):
111 |       self.kf.x[6] *= 0.0
112 |     self.kf.predict()
113 |     self.age += 1
114 |     if(self.time_since_update>0):
115 |       self.hit_streak = 0
116 |     self.time_since_update += 1
117 |     self.history.append(convert_x_to_bbox(self.kf.x))
118 |     return self.history[-1]
119 | 
120 |   def get_state(self):
121 |     """
122 |     Returns the current bounding box estimate.
123 |     """
124 |     return convert_x_to_bbox(self.kf.x)
125 | 
126 | def associate_detections_to_trackers(detections,trackers,iou_threshold = 0.3):
127 |   """
128 |   Assigns detections to tracked object (both represented as bounding boxes)
129 | 
130 |   Returns 3 lists of matches, unmatched_detections and unmatched_trackers
131 |   """
132 |   if(len(trackers)==0) or (len(detections)==0):
133 |     return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int)
134 |   iou_matrix = np.zeros((len(detections),len(trackers)),dtype=np.float32)
135 | 
136 |   for d,det in enumerate(detections):
137 |     for t,trk in enumerate(trackers):
138 |       iou_matrix[d,t] = iou(det,trk)
139 |   matched_indices = linear_assignment(-iou_matrix)
140 | 
141 |   unmatched_detections = []
142 |   for d,det in enumerate(detections):
143 |     if(d not in matched_indices[:,0]):
144 |       unmatched_detections.append(d)
145 |   unmatched_trackers = []
146 |   for t,trk in enumerate(trackers):
147 |     if(t not in matched_indices[:,1]):
148 |       unmatched_trackers.append(t)
149 | 
150 |   #filter out matched with low IOU
151 |   matches = []
152 |   for m in matched_indices:
153 |     if(iou_matrix[m[0],m[1]]<iou_threshold):
154 |       unmatched_detections.append(m[0])
155 |       unmatched_trackers.append(m[1])
156 |     else:
157 |       matches.append(m.reshape(1,2))
158 |   if(len(matches)==0):
159 |     matches = np.empty((0,2),dtype=int)
160 |   else:
161 |     matches = np.concatenate(matches,axis=0)
162 | 
163 |   return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
164 | 
165 | class Sort(object):
166 |   def __init__(self,max_age=1,min_hits=3):
167 |     """
168 |     Sets key parameters for SORT
169 |     """
170 |     self.max_age = max_age
171 |     self.min_hits = min_hits
172 |     self.trackers = []
173 |     self.frame_count = 0
174 | 
175 |   def update(self,dets):
176 |     """
177 |     Params:
178 |       dets - a numpy array of detections in the format [[x,y,w,h,score],[x,y,w,h,score],...]
179 |     Requires: this method must be called once for each frame even with empty detections.
180 |     Returns the a similar array, where the last column is the object ID.
181 | 
182 |     NOTE: The number of objects returned may differ from the number of detections provided.
183 |     """
184 |     
185 |     # prevent "too many indices for array" error
186 |     if len(dets)==0:
187 |       return np.empty((0,5))
188 | 
189 |     self.frame_count += 1
190 |     #get predicted locations from existing trackers.
191 |     trks = np.zeros((len(self.trackers),5))
192 |     to_del = []
193 |     ret = []
194 |     for t,trk in enumerate(trks):
195 |       pos = self.trackers[t].predict()[0]
196 |       trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]
197 |       if(np.any(np.isnan(pos))):
198 |         to_del.append(t)
199 |     trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
200 |     for t in reversed(to_del):
201 |       self.trackers.pop(t)
202 |     matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(dets,trks)
203 | 
204 |     #update matched trackers with assigned detections
205 |     for t,trk in enumerate(self.trackers):
206 |       if(t not in unmatched_trks):
207 |         d = matched[np.where(matched[:,1]==t)[0],0]
208 |         trk.update(dets[d,:][0])
209 | 
210 |     #create and initialise new trackers for unmatched detections
211 |     for i in unmatched_dets:
212 |         trk = KalmanBoxTracker(dets[i,:])
213 |         self.trackers.append(trk)
214 |     i = len(self.trackers)
215 |     for trk in reversed(self.trackers):
216 |         d = trk.get_state()[0]
217 |         if((trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits)):
218 |           ret.append(np.concatenate((d,[trk.id+1])).reshape(1,-1)) # +1 as MOT benchmark requires positive
219 |         i -= 1
220 |         #remove dead tracklet
221 |         if(trk.time_since_update > self.max_age):
222 |           self.trackers.pop(i)
223 |     if(len(ret)>0):
224 |       return np.concatenate(ret)
225 |     return np.empty((0,5))
226 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Retrain the YOLO model for your own dataset.
  3 | """
  4 | 
  5 | import numpy as np
  6 | import keras.backend as K
  7 | from keras.layers import Input, Lambda
  8 | from keras.models import Model
  9 | from keras.optimizers import Adam
 10 | from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
 11 | from keras.utils import plot_model
 12 | import matplotlib.pyplot as plt 
 13 | from yolo3.model import preprocess_true_boxes, yolo_body, tiny_yolo_body, yolo_loss
 14 | from yolo3.utils import get_random_data
 15 | 
 16 | 
 17 | def _main():
 18 |     annotation_path = 'F:/annotation.txt'
 19 |     log_dir = 'F:/logs/000/'
 20 |     classes_path = 'F:/model_data/coco_classes.txt'
 21 |     anchors_path = 'F:/model_data/yolo_anchor.txt'
 22 |     class_names = get_classes(classes_path)
 23 |     num_classes = len(class_names)
 24 |     anchors = get_anchors(anchors_path)
 25 | 
 26 |     input_shape = (608,608) # multiple of 32, hw
 27 | 
 28 |     is_tiny_version = len(anchors)==6 # default setting
 29 |     if is_tiny_version:
 30 |         model = create_tiny_model(input_shape, anchors, num_classes,
 31 |             freeze_body=2, weights_path='tiny_yolo_weights.h5')
 32 |     else:
 33 |         model = create_model(input_shape, anchors, num_classes,
 34 |             freeze_body=2, weights_path='F:/model_data/trained_weights_final_1214_608_3.h5') # make sure you know what you freeze
 35 | 
 36 |     logging = TensorBoard(log_dir=log_dir)
 37 |     checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
 38 |         monitor='val_loss', save_weights_only=True, save_best_only=True, period=3)
 39 |     reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1)
 40 |     early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1)
 41 | 
 42 |     val_split = 0.12
 43 |     with open(annotation_path) as f:
 44 |         lines = f.readlines()
 45 |     np.random.seed(10101)
 46 |     np.random.shuffle(lines)
 47 |     np.random.seed(None)
 48 |     num_val = int(len(lines)*val_split)
 49 |     num_train = len(lines) - num_val
 50 | 
 51 |     # Train with frozen layers first, to get a stable loss.
 52 |     # Adjust num epochs to your dataset. This step is enough to obtain a not bad model.
 53 |     if True:
 54 |         model.compile(optimizer=Adam(lr=1e-3), loss={
 55 |             # use custom yolo_loss Lambda layer.
 56 |             'yolo_loss': lambda y_true, y_pred: y_pred})
 57 | 
 58 |         batch_size = 16
 59 |         print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
 60 |         model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes),
 61 |                 steps_per_epoch=max(1, num_train//batch_size),
 62 |                 validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes),
 63 |                 validation_steps=max(1, num_val//batch_size),
 64 |                 epochs=100,
 65 |                 initial_epoch=0,
 66 |                 callbacks=[logging, checkpoint])
 67 |         model.save_weights(log_dir + 'trained_weights_stage_1.h5')
 68 | 
 69 |     # Unfreeze and continue training, to fine-tune.
 70 |     # Train longer if the result is not good.
 71 |     if True:
 72 |         for i in range(len(model.layers)):
 73 |             model.layers[i].trainable = True
 74 |         model.compile(optimizer=Adam(lr=1e-4), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change
 75 |         print('Unfreeze all of the layers.')
 76 | 
 77 |         batch_size = 4 # note that more GPU memory is required after unfreezing the body
 78 |         print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
 79 |         model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes),
 80 |             steps_per_epoch=max(1, num_train//batch_size),
 81 |             validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes),
 82 |             validation_steps=max(1, num_val//batch_size),
 83 |             epochs=50,
 84 |             initial_epoch=100,
 85 |             callbacks=[logging, checkpoint, reduce_lr,early_stopping])
 86 |         model.save_weights(log_dir + 'trained_weights_final.h5')
 87 |         #plot_model(model, to_file='/content/drive/My Drive/App/model_data/model.png')
 88 |     # Further training if needed.
 89 | 
 90 | 
 91 | def get_classes(classes_path):
 92 |     '''loads the classes'''
 93 |     with open(classes_path) as f:
 94 |         class_names = f.readlines()
 95 |     class_names = [c.strip() for c in class_names]
 96 |     return class_names
 97 | 
 98 | def get_anchors(anchors_path):
 99 |     '''loads the anchors from a file'''
100 |     with open(anchors_path) as f:
101 |         anchors = f.readline()
102 |     anchors = [float(x) for x in anchors.split(',')]
103 |     return np.array(anchors).reshape(-1, 2)
104 | 
105 | 
106 | def create_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2,
107 |             weights_path='F:/model_data/trained_weights_final_1214_608_3.h5'):
108 |     '''create the training model'''
109 |     K.clear_session() # get a new session
110 |     image_input = Input(shape=(None, None, 3))
111 |     h, w = input_shape
112 |     num_anchors = len(anchors)
113 | 
114 |     y_true = [Input(shape=(h//{0:32, 1:16, 2:8}[l], w//{0:32, 1:16, 2:8}[l], \
115 |         num_anchors//3, num_classes+5)) for l in range(3)]
116 | 
117 |     model_body = yolo_body(image_input, num_anchors//3, num_classes)
118 |     print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))
119 | 
120 |     if load_pretrained:
121 |         model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)
122 |         print('Load weights {}.'.format(weights_path))
123 |         if freeze_body in [1, 2]:
124 |             # Freeze darknet53 body or freeze all but 3 output layers.
125 |             num = (185, len(model_body.layers)-3)[freeze_body-1]
126 |             for i in range(num): model_body.layers[i].trainable = False
127 |             print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))
128 | 
129 |     model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
130 |         arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})(
131 |         [*model_body.output, *y_true])
132 |     model = Model([model_body.input, *y_true], model_loss)
133 | 
134 |     return model
135 | 
136 | def create_tiny_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2,
137 |             weights_path='model_data/tiny_yolo_weights.h5'):
138 |     '''create the training model, for Tiny YOLOv3'''
139 |     K.clear_session() # get a new session
140 |     image_input = Input(shape=(None, None, 3))
141 |     h, w = input_shape
142 |     num_anchors = len(anchors)
143 | 
144 |     y_true = [Input(shape=(h//{0:32, 1:16}[l], w//{0:32, 1:16}[l], \
145 |         num_anchors//2, num_classes+5)) for l in range(2)]
146 | 
147 |     model_body = tiny_yolo_body(image_input, num_anchors//2, num_classes)
148 |     print('Create Tiny YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))
149 | 
150 |     if load_pretrained:
151 |         model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)
152 |         print('Load weights {}.'.format(weights_path))
153 |         if freeze_body in [1, 2]:
154 |             # Freeze the darknet body or freeze all but 2 output layers.
155 |             num = (20, len(model_body.layers)-2)[freeze_body-1]
156 |             for i in range(num): model_body.layers[i].trainable = False
157 |             print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))
158 | 
159 |     model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
160 |         arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.7})(
161 |         [*model_body.output, *y_true])
162 |     model = Model([model_body.input, *y_true], model_loss)
163 | 
164 |     return model
165 | 
166 | def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes):
167 |     '''data generator for fit_generator'''
168 |     n = len(annotation_lines)
169 |     i = 0
170 |     while True:
171 |         image_data = []
172 |         box_data = []
173 |         for b in range(batch_size):
174 |             if i==0:
175 |                 np.random.shuffle(annotation_lines)
176 |             image, box = get_random_data(annotation_lines[i], input_shape, random=True)
177 |             image_data.append(image)
178 |             box_data.append(box)
179 |             i = (i+1) % n
180 |         image_data = np.array(image_data)
181 |         box_data = np.array(box_data)
182 |         y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)
183 |         yield [image_data, *y_true], np.zeros(batch_size)
184 | 
185 | def data_generator_wrapper(annotation_lines, batch_size, input_shape, anchors, num_classes):
186 |     n = len(annotation_lines)
187 |     if n==0 or batch_size<=0: return None
188 |     return data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes)
189 | 
190 | if __name__ == '__main__':
191 |     _main()
192 | 


--------------------------------------------------------------------------------
/yolo.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Class definition of YOLO_v3 style detection model on image and video
  4 | """
  5 | 
  6 | import colorsys
  7 | import os
  8 | from timeit import default_timer as timer
  9 | 
 10 | import numpy as np
 11 | from keras import backend as K
 12 | from keras.models import load_model
 13 | from keras.layers import Input
 14 | from PIL import Image, ImageFont, ImageDraw
 15 | 
 16 | from yolo3.model import yolo_eval, yolo_body
 17 | from yolo3.utils import letterbox_image
 18 | import os
 19 | from keras.utils import multi_gpu_model
 20 | 
 21 | class YOLO(object):
 22 |     _defaults = {
 23 |         "model_path": '/model_data/yourweight.h5', #Put your weight name here
 24 |         "anchors_path": '/model_data/yolo_anchor.txt',
 25 |         "classes_path": '/model_data/coco_classes.txt',
 26 |         "score" : 0.4,
 27 |         "iou" : 0.5,
 28 |         "model_image_size" : (608, 608),
 29 |         "gpu_num" : 1,
 30 |         "class_used": ['car', 'motorbike', 'bus', 'truck']
 31 |     }
 32 | 
 33 |     @classmethod
 34 |     def get_defaults(cls, n):
 35 |         if n in cls._defaults:
 36 |             return cls._defaults[n]
 37 |         else:
 38 |             return "Unrecognized attribute name '" + n + "'"
 39 | 
 40 |     def __init__(self, **kwargs):
 41 |         self.__dict__.update(self._defaults) # set up default values
 42 |         self.__dict__.update(kwargs) # and update with user overrides
 43 |         self.class_names = self._get_class()
 44 |         self.anchors = self._get_anchors()
 45 |         self.sess = K.get_session()
 46 |         self.boxes, self.scores, self.classes = self.generate()
 47 | 
 48 |     def _get_class(self):
 49 |         classes_path = os.path.expanduser(self.classes_path)
 50 |         with open(classes_path) as f:
 51 |             class_names = f.readlines()
 52 |         class_names = [c.strip() for c in class_names]
 53 |         return class_names
 54 | 
 55 |     def _get_anchors(self):
 56 |         anchors_path = os.path.expanduser(self.anchors_path)
 57 |         with open(anchors_path) as f:
 58 |             anchors = f.readline()
 59 |         anchors = [float(x) for x in anchors.split(',')]
 60 |         return np.array(anchors).reshape(-1, 2)
 61 | 
 62 |     def generate(self):
 63 |         model_path = os.path.expanduser(self.model_path)
 64 |         assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
 65 | 
 66 |         # Load model, or construct model and load weights.
 67 |         num_anchors = len(self.anchors)
 68 |         num_classes = len(self.class_names)
 69 |         is_tiny_version = num_anchors==6 # default setting
 70 |         try:
 71 |             self.yolo_model = load_model(model_path, compile=False)
 72 |         except:
 73 |             self.yolo_model = tiny_yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes) \
 74 |                 if is_tiny_version else yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes)
 75 |             self.yolo_model.load_weights(self.model_path) # make sure model, anchors and classes match
 76 |         else:
 77 |             assert self.yolo_model.layers[-1].output_shape[-1] == \
 78 |                 num_anchors/len(self.yolo_model.output) * (num_classes + 5), \
 79 |                 'Mismatch between model and given anchor and class sizes'
 80 | 
 81 |         print('{} model, anchors, and classes loaded.'.format(model_path))
 82 | 
 83 |         # Generate colors for drawing bounding boxes.
 84 |         hsv_tuples = [(x / len(self.class_names), 1., 1.)
 85 |                       for x in range(len(self.class_names))]
 86 |         self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
 87 |         self.colors = list(
 88 |             map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
 89 |                 self.colors))
 90 |         np.random.seed(10101)  # Fixed seed for consistent colors across runs.
 91 |         np.random.shuffle(self.colors)  # Shuffle colors to decorrelate adjacent classes.
 92 |         np.random.seed(None)  # Reset seed to default.
 93 | 
 94 |         # Generate output tensor targets for filtered bounding boxes.
 95 |         self.input_image_shape = K.placeholder(shape=(2, ))
 96 |         if self.gpu_num>=2:
 97 |             self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num)
 98 |         boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
 99 |                 len(self.class_names), self.input_image_shape,
100 |                 score_threshold=self.score, iou_threshold=self.iou)
101 |         return boxes, scores, classes
102 | 
103 |     def detect_image(self, image):
104 |         start = timer()
105 | 
106 |         #image2 = image.crop((0,0,image.width/2,image.height))
107 |         if self.model_image_size != (None, None):
108 |             assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required'
109 |             assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required'
110 |             boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size)))
111 |         else:
112 |             new_image_size = (image.width - (image.width % 32),
113 |                               image.height - (image.height % 32))
114 |             boxed_image = letterbox_image(image, new_image_size)
115 |         image_data = np.array(boxed_image, dtype='float32')
116 | 
117 |         print(image_data.shape)
118 |         image_data /= 255.
119 |         image_data = np.expand_dims(image_data, 0)  # Add batch dimension.
120 | 
121 |         out_boxes, out_scores, out_classes = self.sess.run(
122 |             [self.boxes, self.scores, self.classes],
123 |             feed_dict={
124 |                 self.yolo_model.input: image_data,
125 |                 self.input_image_shape: [image.size[1], image.size[0]],
126 |                 K.learning_phase(): 0
127 |             })
128 | 
129 |         print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
130 | 
131 |         font = ImageFont.truetype(font='/font/FiraMono-Medium.otf',
132 |                     size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
133 |         
134 |         confidences = []
135 |         return_boxs = []
136 |         midPoint = []
137 |         detected_class = []
138 |         
139 |         for i, c in reversed(list(enumerate(out_classes))):
140 |             predicted_class = self.class_names[c]
141 |             box = out_boxes[i]
142 |             score = out_scores[i]
143 | 
144 |             label = '{} {:.2f}'.format(predicted_class, score)
145 |             draw = ImageDraw.Draw(image)
146 |             label_size = draw.textsize(label, font)
147 | 
148 |             top, left, bottom, right = box
149 |             top = max(0, np.floor(top + 0.5).astype('int32'))
150 |             left = max(0, np.floor(left + 0.5).astype('int32'))
151 |             bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
152 |             right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
153 |             midBoxYOLO = [int((right+left)/2),int((top+bottom)/2)]
154 |             print(label, (left, top), (right, bottom), midBoxYOLO)
155 |             confidences.append(float(score))
156 |             midPoint.append(midBoxYOLO)
157 |             detected_class.append(predicted_class)
158 |             
159 |             if top - label_size[1] >= 0:
160 |                 text_origin = np.array([left, top - label_size[1]])
161 |             else:
162 |                 text_origin = np.array([left, top + 1])
163 | 
164 |             box = out_boxes[i]
165 |             x = int(box[1])  
166 |             y = int(box[0])  
167 |             w = int(box[3]-box[1])
168 |             h = int(box[2]-box[0])
169 |             if x < 0 :
170 |                 w = w + x
171 |                 x = 0
172 |             if y < 0 :
173 |                 h = h + y
174 |                 y = 0 
175 |             return_boxs.append([x,y,w,h])
176 |             
177 |         return return_boxs, detected_class, confidences, midPoint
178 |     
179 |     def counter(self,p0, out_class, midPoint):
180 |         for mid in midPoint:
181 |             if abs(p0[0] - mid[0]) < 7 and abs(p0[1] - mid[1]) < 7:
182 |                 i = midPoint.index(mid)
183 |                 print(mid,midPoint,p0,out_class[i])
184 |                 return out_class[i]
185 |   
186 |     def close_session(self):
187 |         self.sess.close()
188 | 


--------------------------------------------------------------------------------
/yolo3/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BenBenee/Traffic-Counting-with-YOLOv3-and-SORT/f38e2f05ee05d487082b6b668da9e3f8275ab8f1/yolo3/__init__.py


--------------------------------------------------------------------------------
/yolo3/model.py:
--------------------------------------------------------------------------------
  1 | "YOLO_v3 Model Defined in Keras."""
  2 | 
  3 | from functools import wraps
  4 | 
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | from keras import backend as K
  8 | from keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D
  9 | from keras.layers.advanced_activations import LeakyReLU
 10 | from keras.layers.normalization import BatchNormalization
 11 | from keras.models import Model
 12 | from keras.regularizers import l2
 13 | 
 14 | from yolo3.utils import compose
 15 | 
 16 | 
 17 | @wraps(Conv2D)
 18 | def DarknetConv2D(*args, **kwargs):
 19 |     """Wrapper to set Darknet parameters for Convolution2D."""
 20 |     darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)}
 21 |     darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same'
 22 |     darknet_conv_kwargs.update(kwargs)
 23 |     return Conv2D(*args, **darknet_conv_kwargs)
 24 | 
 25 | def DarknetConv2D_BN_Leaky(*args, **kwargs):
 26 |     """Darknet Convolution2D followed by BatchNormalization and LeakyReLU."""
 27 |     no_bias_kwargs = {'use_bias': False}
 28 |     no_bias_kwargs.update(kwargs)
 29 |     return compose(
 30 |         DarknetConv2D(*args, **no_bias_kwargs),
 31 |         BatchNormalization(),
 32 |         LeakyReLU(alpha=0.1))
 33 | 
 34 | def resblock_body(x, num_filters, num_blocks):
 35 |     '''A series of resblocks starting with a downsampling Convolution2D'''
 36 |     # Darknet uses left and top padding instead of 'same' mode
 37 |     x = ZeroPadding2D(((1,0),(1,0)))(x)
 38 |     x = DarknetConv2D_BN_Leaky(num_filters, (3,3), strides=(2,2))(x)
 39 |     for i in range(num_blocks):
 40 |         y = compose(
 41 |                 DarknetConv2D_BN_Leaky(num_filters//2, (1,1)),
 42 |                 DarknetConv2D_BN_Leaky(num_filters, (3,3)))(x)
 43 |         x = Add()([x,y])
 44 |     return x
 45 | 
 46 | def darknet_body(x):
 47 |     '''Darknent body having 52 Convolution2D layers'''
 48 |     x = DarknetConv2D_BN_Leaky(32, (3,3))(x)
 49 |     x = resblock_body(x, 64, 1)
 50 |     x = resblock_body(x, 128, 2)
 51 |     x = resblock_body(x, 256, 8)
 52 |     x = resblock_body(x, 512, 8)
 53 |     x = resblock_body(x, 1024, 4)
 54 |     return x
 55 | 
 56 | def make_last_layers(x, num_filters, out_filters):
 57 |     '''6 Conv2D_BN_Leaky layers followed by a Conv2D_linear layer'''
 58 |     x = compose(
 59 |             DarknetConv2D_BN_Leaky(num_filters, (1,1)),
 60 |             DarknetConv2D_BN_Leaky(num_filters*2, (3,3)),
 61 |             DarknetConv2D_BN_Leaky(num_filters, (1,1)),
 62 |             DarknetConv2D_BN_Leaky(num_filters*2, (3,3)),
 63 |             DarknetConv2D_BN_Leaky(num_filters, (1,1)))(x)
 64 |     y = compose(
 65 |             DarknetConv2D_BN_Leaky(num_filters*2, (3,3)),
 66 |             DarknetConv2D(out_filters, (1,1)))(x)
 67 |     return x, y
 68 | 
 69 | 
 70 | def yolo_body(inputs, num_anchors, num_classes):
 71 |     """Create YOLO_V3 model CNN body in Keras."""
 72 |     darknet = Model(inputs, darknet_body(inputs))
 73 |     x, y1 = make_last_layers(darknet.output, 512, num_anchors*(num_classes+5))
 74 | 
 75 |     x = compose(
 76 |             DarknetConv2D_BN_Leaky(256, (1,1)),
 77 |             UpSampling2D(2))(x)
 78 |     x = Concatenate()([x,darknet.layers[152].output])
 79 |     x, y2 = make_last_layers(x, 256, num_anchors*(num_classes+5))
 80 | 
 81 |     x = compose(
 82 |             DarknetConv2D_BN_Leaky(128, (1,1)),
 83 |             UpSampling2D(2))(x)
 84 |     x = Concatenate()([x,darknet.layers[92].output])
 85 |     x, y3 = make_last_layers(x, 128, num_anchors*(num_classes+5))
 86 | 
 87 |     return Model(inputs, [y1,y2,y3])
 88 | 
 89 | def tiny_yolo_body(inputs, num_anchors, num_classes):
 90 |     '''Create Tiny YOLO_v3 model CNN body in keras.'''
 91 |     x1 = compose(
 92 |             DarknetConv2D_BN_Leaky(16, (3,3)),
 93 |             MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'),
 94 |             DarknetConv2D_BN_Leaky(32, (3,3)),
 95 |             MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'),
 96 |             DarknetConv2D_BN_Leaky(64, (3,3)),
 97 |             MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'),
 98 |             DarknetConv2D_BN_Leaky(128, (3,3)),
 99 |             MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'),
100 |             DarknetConv2D_BN_Leaky(256, (3,3)))(inputs)
101 |     x2 = compose(
102 |             MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'),
103 |             DarknetConv2D_BN_Leaky(512, (3,3)),
104 |             MaxPooling2D(pool_size=(2,2), strides=(1,1), padding='same'),
105 |             DarknetConv2D_BN_Leaky(1024, (3,3)),
106 |             DarknetConv2D_BN_Leaky(256, (1,1)))(x1)
107 |     y1 = compose(
108 |             DarknetConv2D_BN_Leaky(512, (3,3)),
109 |             DarknetConv2D(num_anchors*(num_classes+5), (1,1)))(x2)
110 | 
111 |     x2 = compose(
112 |             DarknetConv2D_BN_Leaky(128, (1,1)),
113 |             UpSampling2D(2))(x2)
114 |     y2 = compose(
115 |             Concatenate(),
116 |             DarknetConv2D_BN_Leaky(256, (3,3)),
117 |             DarknetConv2D(num_anchors*(num_classes+5), (1,1)))([x2,x1])
118 | 
119 |     return Model(inputs, [y1,y2])
120 | 
121 | 
122 | def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
123 |     """Convert final layer features to bounding box parameters."""
124 |     num_anchors = len(anchors)
125 |     # Reshape to batch, height, width, num_anchors, box_params.
126 |     anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])
127 | 
128 |     grid_shape = K.shape(feats)[1:3] # height, width
129 |     grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
130 |         [1, grid_shape[1], 1, 1])
131 |     grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
132 |         [grid_shape[0], 1, 1, 1])
133 |     grid = K.concatenate([grid_x, grid_y])
134 |     grid = K.cast(grid, K.dtype(feats))
135 | 
136 |     feats = K.reshape(
137 |         feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
138 | 
139 |     # Adjust preditions to each spatial grid point and anchor size.
140 |     box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
141 |     box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))
142 |     box_confidence = K.sigmoid(feats[..., 4:5])
143 |     box_class_probs = K.sigmoid(feats[..., 5:])
144 | 
145 |     if calc_loss == True:
146 |         return grid, feats, box_xy, box_wh
147 |     return box_xy, box_wh, box_confidence, box_class_probs
148 | 
149 | 
150 | def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
151 |     '''Get corrected boxes'''
152 |     box_yx = box_xy[..., ::-1]
153 |     box_hw = box_wh[..., ::-1]
154 |     input_shape = K.cast(input_shape, K.dtype(box_yx))
155 |     image_shape = K.cast(image_shape, K.dtype(box_yx))
156 |     new_shape = K.round(image_shape * K.min(input_shape/image_shape))
157 |     offset = (input_shape-new_shape)/2./input_shape
158 |     scale = input_shape/new_shape
159 |     box_yx = (box_yx - offset) * scale
160 |     box_hw *= scale
161 | 
162 |     box_mins = box_yx - (box_hw / 2.)
163 |     box_maxes = box_yx + (box_hw / 2.)
164 |     boxes =  K.concatenate([
165 |         box_mins[..., 0:1],  # y_min
166 |         box_mins[..., 1:2],  # x_min
167 |         box_maxes[..., 0:1],  # y_max
168 |         box_maxes[..., 1:2]  # x_max
169 |     ])
170 | 
171 |     # Scale boxes back to original image shape.
172 |     boxes *= K.concatenate([image_shape, image_shape])
173 |     return boxes
174 | 
175 | 
176 | def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape):
177 |     '''Process Conv layer output'''
178 |     box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats,
179 |         anchors, num_classes, input_shape)
180 |     boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape)
181 |     boxes = K.reshape(boxes, [-1, 4])
182 |     box_scores = box_confidence * box_class_probs
183 |     box_scores = K.reshape(box_scores, [-1, num_classes])
184 |     return boxes, box_scores
185 | 
186 | 
187 | def yolo_eval(yolo_outputs,
188 |               anchors,
189 |               num_classes,
190 |               image_shape,
191 |               max_boxes=40,
192 |               score_threshold=.5,
193 |               iou_threshold=.5):
194 |     """Evaluate YOLO model on given input and return filtered boxes."""
195 |     num_layers = len(yolo_outputs)
196 |     anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] # default setting
197 |     input_shape = K.shape(yolo_outputs[0])[1:3] * 32
198 |     boxes = []
199 |     box_scores = []
200 |     for l in range(num_layers):
201 |         _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l],
202 |             anchors[anchor_mask[l]], num_classes, input_shape, image_shape)
203 |         boxes.append(_boxes)
204 |         box_scores.append(_box_scores)
205 |     boxes = K.concatenate(boxes, axis=0)
206 |     box_scores = K.concatenate(box_scores, axis=0)
207 | 
208 |     mask = box_scores >= score_threshold
209 |     max_boxes_tensor = K.constant(max_boxes, dtype='int32')
210 |     boxes_ = []
211 |     scores_ = []
212 |     classes_ = []
213 |     for c in range(num_classes):
214 |         # TODO: use keras backend instead of tf.
215 |         class_boxes = tf.boolean_mask(boxes, mask[:, c])
216 |         class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
217 |         nms_index = tf.image.non_max_suppression(
218 |             class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
219 |         class_boxes = K.gather(class_boxes, nms_index)
220 |         class_box_scores = K.gather(class_box_scores, nms_index)
221 |         classes = K.ones_like(class_box_scores, 'int32') * c
222 |         boxes_.append(class_boxes)
223 |         scores_.append(class_box_scores)
224 |         classes_.append(classes)
225 |     boxes_ = K.concatenate(boxes_, axis=0)
226 |     scores_ = K.concatenate(scores_, axis=0)
227 |     classes_ = K.concatenate(classes_, axis=0)
228 | 
229 |     return boxes_, scores_, classes_
230 | 
231 | 
232 | def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
233 |     '''Preprocess true boxes to training input format
234 | 
235 |     Parameters
236 |     ----------
237 |     true_boxes: array, shape=(m, T, 5)
238 |         Absolute x_min, y_min, x_max, y_max, class_id relative to input_shape.
239 |     input_shape: array-like, hw, multiples of 32
240 |     anchors: array, shape=(N, 2), wh
241 |     num_classes: integer
242 | 
243 |     Returns
244 |     -------
245 |     y_true: list of array, shape like yolo_outputs, xywh are reletive value
246 | 
247 |     '''
248 |     assert (true_boxes[..., 4]<num_classes).all(), 'class id must be less than num_classes'
249 |     num_layers = len(anchors)//3 # default setting
250 |     anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
251 | 
252 |     true_boxes = np.array(true_boxes, dtype='float32')
253 |     input_shape = np.array(input_shape, dtype='int32')
254 |     boxes_xy = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) // 2
255 |     boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2]
256 |     true_boxes[..., 0:2] = boxes_xy/input_shape[::-1]
257 |     true_boxes[..., 2:4] = boxes_wh/input_shape[::-1]
258 | 
259 |     m = true_boxes.shape[0]
260 |     grid_shapes = [input_shape//{0:32, 1:16, 2:8}[l] for l in range(num_layers)]
261 |     y_true = [np.zeros((m,grid_shapes[l][0],grid_shapes[l][1],len(anchor_mask[l]),5+num_classes),
262 |         dtype='float32') for l in range(num_layers)]
263 | 
264 |     # Expand dim to apply broadcasting.
265 |     anchors = np.expand_dims(anchors, 0)
266 |     anchor_maxes = anchors / 2.
267 |     anchor_mins = -anchor_maxes
268 |     valid_mask = boxes_wh[..., 0]>0
269 | 
270 |     for b in range(m):
271 |         # Discard zero rows.
272 |         wh = boxes_wh[b, valid_mask[b]]
273 |         if len(wh)==0: continue
274 |         # Expand dim to apply broadcasting.
275 |         wh = np.expand_dims(wh, -2)
276 |         box_maxes = wh / 2.
277 |         box_mins = -box_maxes
278 | 
279 |         intersect_mins = np.maximum(box_mins, anchor_mins)
280 |         intersect_maxes = np.minimum(box_maxes, anchor_maxes)
281 |         intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
282 |         intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
283 |         box_area = wh[..., 0] * wh[..., 1]
284 |         anchor_area = anchors[..., 0] * anchors[..., 1]
285 |         iou = intersect_area / (box_area + anchor_area - intersect_area)
286 | 
287 |         # Find best anchor for each true box
288 |         best_anchor = np.argmax(iou, axis=-1)
289 | 
290 |         for t, n in enumerate(best_anchor):
291 |             for l in range(num_layers):
292 |                 if n in anchor_mask[l]:
293 |                     i = np.floor(true_boxes[b,t,0]*grid_shapes[l][1]).astype('int32')
294 |                     j = np.floor(true_boxes[b,t,1]*grid_shapes[l][0]).astype('int32')
295 |                     k = anchor_mask[l].index(n)
296 |                     c = true_boxes[b,t, 4].astype('int32')
297 |                     y_true[l][b, j, i, k, 0:4] = true_boxes[b,t, 0:4]
298 |                     y_true[l][b, j, i, k, 4] = 1
299 |                     y_true[l][b, j, i, k, 5+c] = 1
300 | 
301 |     return y_true
302 | 
303 | 
304 | def box_iou(b1, b2):
305 |     '''Return iou tensor
306 | 
307 |     Parameters
308 |     ----------
309 |     b1: tensor, shape=(i1,...,iN, 4), xywh
310 |     b2: tensor, shape=(j, 4), xywh
311 | 
312 |     Returns
313 |     -------
314 |     iou: tensor, shape=(i1,...,iN, j)
315 | 
316 |     '''
317 | 
318 |     # Expand dim to apply broadcasting.
319 |     b1 = K.expand_dims(b1, -2)
320 |     b1_xy = b1[..., :2]
321 |     b1_wh = b1[..., 2:4]
322 |     b1_wh_half = b1_wh/2.
323 |     b1_mins = b1_xy - b1_wh_half
324 |     b1_maxes = b1_xy + b1_wh_half
325 | 
326 |     # Expand dim to apply broadcasting.
327 |     b2 = K.expand_dims(b2, 0)
328 |     b2_xy = b2[..., :2]
329 |     b2_wh = b2[..., 2:4]
330 |     b2_wh_half = b2_wh/2.
331 |     b2_mins = b2_xy - b2_wh_half
332 |     b2_maxes = b2_xy + b2_wh_half
333 | 
334 |     intersect_mins = K.maximum(b1_mins, b2_mins)
335 |     intersect_maxes = K.minimum(b1_maxes, b2_maxes)
336 |     intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
337 |     intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
338 |     b1_area = b1_wh[..., 0] * b1_wh[..., 1]
339 |     b2_area = b2_wh[..., 0] * b2_wh[..., 1]
340 |     iou = intersect_area / (b1_area + b2_area - intersect_area)
341 | 
342 |     return iou
343 | 
344 | 
345 | def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
346 |     '''Return yolo_loss tensor
347 | 
348 |     Parameters
349 |     ----------
350 |     yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
351 |     y_true: list of array, the output of preprocess_true_boxes
352 |     anchors: array, shape=(N, 2), wh
353 |     num_classes: integer
354 |     ignore_thresh: float, the iou threshold whether to ignore object confidence loss
355 | 
356 |     Returns
357 |     -------
358 |     loss: tensor, shape=(1,)
359 | 
360 |     '''
361 |     num_layers = len(anchors)//3 # default setting
362 |     yolo_outputs = args[:num_layers]
363 |     y_true = args[num_layers:]
364 |     anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
365 |     input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
366 |     grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)]
367 |     loss = 0
368 |     m = K.shape(yolo_outputs[0])[0] # batch size, tensor
369 |     mf = K.cast(m, K.dtype(yolo_outputs[0]))
370 | 
371 |     for l in range(num_layers):
372 |         object_mask = y_true[l][..., 4:5]
373 |         true_class_probs = y_true[l][..., 5:]
374 | 
375 |         grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
376 |              anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True)
377 |         pred_box = K.concatenate([pred_xy, pred_wh])
378 | 
379 |         # Darknet raw box to calculate loss.
380 |         raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid
381 |         raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1])
382 |         raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf
383 |         box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4]
384 | 
385 |         # Find ignore mask, iterate over each of batch.
386 |         ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
387 |         object_mask_bool = K.cast(object_mask, 'bool')
388 |         def loop_body(b, ignore_mask):
389 |             true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0])
390 |             iou = box_iou(pred_box[b], true_box)
391 |             best_iou = K.max(iou, axis=-1)
392 |             ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box)))
393 |             return b+1, ignore_mask
394 |         _, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask])
395 |         ignore_mask = ignore_mask.stack()
396 |         ignore_mask = K.expand_dims(ignore_mask, -1)
397 | 
398 |         # K.binary_crossentropy is helpful to avoid exp overflow.
399 |         xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[...,0:2], from_logits=True)
400 |         wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh-raw_pred[...,2:4])
401 |         confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
402 |             (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask
403 |         class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True)
404 | 
405 |         xy_loss = K.sum(xy_loss) / mf
406 |         wh_loss = K.sum(wh_loss) / mf
407 |         confidence_loss = K.sum(confidence_loss) / mf
408 |         class_loss = K.sum(class_loss) / mf
409 |         loss += xy_loss + wh_loss + confidence_loss + class_loss
410 |         if print_loss:
411 |             loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message='loss: ')
412 |     return loss
413 | 


--------------------------------------------------------------------------------
/yolo3/utils.py:
--------------------------------------------------------------------------------
  1 | """Miscellaneous utility functions."""
  2 | 
  3 | from functools import reduce
  4 | 
  5 | from PIL import Image
  6 | import numpy as np
  7 | from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
  8 | 
  9 | def compose(*funcs):
 10 |     """Compose arbitrarily many functions, evaluated left to right.
 11 | 
 12 |     Reference: https://mathieularose.com/function-composition-in-python/
 13 |     """
 14 |     # return lambda x: reduce(lambda v, f: f(v), funcs, x)
 15 |     if funcs:
 16 |         return reduce(lambda f, g: lambda *a, **kw: g(f(*a, **kw)), funcs)
 17 |     else:
 18 |         raise ValueError('Composition of empty sequence not supported.')
 19 | 
 20 | def letterbox_image(image, size):
 21 |     '''resize image with unchanged aspect ratio using padding'''
 22 |     iw, ih = image.size
 23 |     w, h = size
 24 |     scale = min(w/iw, h/ih)
 25 |     nw = int(iw*scale)
 26 |     nh = int(ih*scale)
 27 | 
 28 |     image = image.resize((nw,nh), Image.BICUBIC)
 29 |     new_image = Image.new('RGB', size, (128,128,128))
 30 |     new_image.paste(image, ((w-nw)//2, (h-nh)//2))
 31 |     return new_image
 32 | 
 33 | def rand(a=0, b=1):
 34 |     return np.random.rand()*(b-a) + a
 35 | 
 36 | def get_random_data(annotation_line, input_shape, random=True, max_boxes=40, jitter=.3, hue=.1, sat=1.5, val=1.5, proc_img=True):
 37 |     '''random preprocessing for real-time data augmentation'''
 38 |     line = annotation_line.split()
 39 |     image = Image.open(line[0])
 40 |     iw, ih = image.size
 41 |     h, w = input_shape
 42 |     box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
 43 |     if not random:
 44 |         # resize image
 45 |         scale = min(w/iw, h/ih)
 46 |         nw = int(iw*scale)
 47 |         nh = int(ih*scale)
 48 |         dx = (w-nw)//2
 49 |         dy = (h-nh)//2
 50 |         image_data=0
 51 |         if proc_img:
 52 |             image = image.resize((nw,nh), Image.BICUBIC)
 53 |             new_image = Image.new('RGB', (w,h), (128,128,128))
 54 |             new_image.paste(image, (dx, dy))
 55 |             image_data = np.array(new_image)/255.
 56 | 
 57 |         # correct boxes
 58 |         box_data = np.zeros((max_boxes,5))
 59 |         if len(box)>0:
 60 |             np.random.shuffle(box)
 61 |             if len(box)>max_boxes: box = box[:max_boxes]
 62 |             box[:, [0,2]] = box[:, [0,2]]*scale + dx
 63 |             box[:, [1,3]] = box[:, [1,3]]*scale + dy
 64 |             box_data[:len(box)] = box
 65 | 
 66 |         return image_data, box_data
 67 | 
 68 |     # resize image
 69 |     new_ar = w/h * rand(1-jitter,1+jitter)/rand(1-jitter,1+jitter)
 70 |     scale = rand(.25, 2)
 71 |     if new_ar < 1:
 72 |         nh = int(scale*h)
 73 |         nw = int(nh*new_ar)
 74 |     else:
 75 |         nw = int(scale*w)
 76 |         nh = int(nw/new_ar)
 77 |     image = image.resize((nw,nh), Image.BICUBIC)
 78 | 
 79 |     # place image
 80 |     dx = int(rand(0, w-nw))
 81 |     dy = int(rand(0, h-nh))
 82 |     new_image = Image.new('RGB', (w,h), (128,128,128))
 83 |     new_image.paste(image, (dx, dy))
 84 |     image = new_image
 85 | 
 86 |     # flip image or not
 87 |     flip = rand()<.5
 88 |     if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
 89 | 
 90 |     # distort image
 91 |     hue = rand(-hue, hue)
 92 |     sat = rand(1, sat) if rand()<.5 else 1/rand(1, sat)
 93 |     val = rand(1, val) if rand()<.5 else 1/rand(1, val)
 94 |     x = rgb_to_hsv(np.array(image)/255.)
 95 |     x[..., 0] += hue
 96 |     x[..., 0][x[..., 0]>1] -= 1
 97 |     x[..., 0][x[..., 0]<0] += 1
 98 |     x[..., 1] *= sat
 99 |     x[..., 2] *= val
100 |     x[x>1] = 1
101 |     x[x<0] = 0
102 |     image_data = hsv_to_rgb(x) # numpy array, 0 to 1
103 | 
104 |     # correct boxes
105 |     box_data = np.zeros((max_boxes,5))
106 |     if len(box)>0:
107 |         np.random.shuffle(box)
108 |         box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
109 |         box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
110 |         if flip: box[:, [0,2]] = w - box[:, [2,0]]
111 |         box[:, 0:2][box[:, 0:2]<0] = 0
112 |         box[:, 2][box[:, 2]>w] = w
113 |         box[:, 3][box[:, 3]>h] = h
114 |         box_w = box[:, 2] - box[:, 0]
115 |         box_h = box[:, 3] - box[:, 1]
116 |         box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
117 |         if len(box)>max_boxes: box = box[:max_boxes]
118 |         box_data[:len(box)] = box
119 | 
120 |     return image_data, box_data
121 | 


--------------------------------------------------------------------------------