├── .gitignore
├── LICENSE
├── README.md
├── checkpoints
    └── .gitkeep
├── colab_gpu.ipynb
├── conda-cpu.yml
├── conda-gpu.yml
├── convert.py
├── data
    ├── checkpoint
    ├── coco.names
    ├── girl.png
    ├── meme.jpg
    ├── meme2.jpeg
    ├── meme_out.jpg
    ├── street.jpg
    ├── street_out.jpg
    └── voc2012.names
├── detect.py
├── detect_video.py
├── docs
    └── training_voc.md
├── requirements-gpu.txt
├── requirements.txt
├── setup.py
├── tools
    ├── export_tflite.py
    ├── export_tfserving.py
    ├── visualize_dataset.py
    └── voc2012.py
├── train.py
└── yolov3_tf2
    ├── __init__.py
    ├── dataset.py
    ├── models.py
    └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | *.h5
  2 | *.weights
  3 | *.tar
  4 | *.tfrecord
  5 | /checkpoints/*
  6 | /serving/*
  7 | /logs/
  8 | /Untitled.ipynb
  9 | /output.jpg
 10 | /data/voc2012_raw/
 11 | 
 12 | # Created by https://www.gitignore.io/api/python
 13 | # Edit at https://www.gitignore.io/?templates=python
 14 | 
 15 | ### Python ###
 16 | # Byte-compiled / optimized / DLL files
 17 | __pycache__/
 18 | *.py[cod]
 19 | *$py.class
 20 | 
 21 | # C extensions
 22 | *.so
 23 | 
 24 | # Distribution / packaging
 25 | .Python
 26 | build/
 27 | develop-eggs/
 28 | dist/
 29 | downloads/
 30 | eggs/
 31 | .eggs/
 32 | lib/
 33 | lib64/
 34 | parts/
 35 | sdist/
 36 | var/
 37 | wheels/
 38 | pip-wheel-metadata/
 39 | share/python-wheels/
 40 | *.egg-info/
 41 | .installed.cfg
 42 | *.egg
 43 | MANIFEST
 44 | 
 45 | # PyInstaller
 46 | #  Usually these files are written by a python script from a template
 47 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 48 | *.manifest
 49 | *.spec
 50 | 
 51 | # Installer logs
 52 | pip-log.txt
 53 | pip-delete-this-directory.txt
 54 | 
 55 | # Unit test / coverage reports
 56 | htmlcov/
 57 | .tox/
 58 | .nox/
 59 | .coverage
 60 | .coverage.*
 61 | .cache
 62 | nosetests.xml
 63 | coverage.xml
 64 | *.cover
 65 | .hypothesis/
 66 | .pytest_cache/
 67 | 
 68 | # Translations
 69 | *.mo
 70 | *.pot
 71 | 
 72 | # Django stuff:
 73 | *.log
 74 | local_settings.py
 75 | db.sqlite3
 76 | 
 77 | # Flask stuff:
 78 | instance/
 79 | .webassets-cache
 80 | 
 81 | # Scrapy stuff:
 82 | .scrapy
 83 | 
 84 | # Sphinx documentation
 85 | docs/_build/
 86 | 
 87 | # PyBuilder
 88 | target/
 89 | 
 90 | # Jupyter Notebook
 91 | .ipynb_checkpoints
 92 | 
 93 | # IPython
 94 | profile_default/
 95 | ipython_config.py
 96 | 
 97 | # pyenv
 98 | .python-version
 99 | 
100 | # pipenv
101 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
102 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
103 | #   having no cross-platform support, pipenv may install dependencies that don’t work, or not
104 | #   install all needed dependencies.
105 | #Pipfile.lock
106 | 
107 | # celery beat schedule file
108 | celerybeat-schedule
109 | 
110 | # SageMath parsed files
111 | *.sage.py
112 | 
113 | # Environments
114 | .env
115 | .venv
116 | env/
117 | venv/
118 | ENV/
119 | env.bak/
120 | venv.bak/
121 | 
122 | # Spyder project settings
123 | .spyderproject
124 | .spyproject
125 | 
126 | # Rope project settings
127 | .ropeproject
128 | 
129 | # mkdocs documentation
130 | /site
131 | 
132 | # mypy
133 | .mypy_cache/
134 | .dmypy.json
135 | dmypy.json
136 | 
137 | # Pyre type checker
138 | .pyre/
139 | 
140 | # End of https://www.gitignore.io/api/python
141 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Zihao Zhang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # YoloV3 Implemented in TensorFlow 2.0
  2 | 
  3 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/zzh8829/yolov3-tf2/blob/master/colab_gpu.ipynb)
  4 | 
  5 | This repo provides a clean implementation of YoloV3 in TensorFlow 2.0 using all the best practices.
  6 | 
  7 | ## Key Features
  8 | 
  9 | - [x] TensorFlow 2.0
 10 | - [x] `yolov3` with pre-trained Weights
 11 | - [x] `yolov3-tiny` with pre-trained Weights
 12 | - [x] Inference example
 13 | - [x] Transfer learning example
 14 | - [x] Eager mode training with `tf.GradientTape`
 15 | - [x] Graph mode training with `model.fit`
 16 | - [x] Functional model with `tf.keras.layers`
 17 | - [x] Input pipeline using `tf.data`
 18 | - [x] Tensorflow Serving
 19 | - [x] Vectorized transformations
 20 | - [x] GPU accelerated
 21 | - [x] Fully integrated with `absl-py` from [abseil.io](https://abseil.io)
 22 | - [x] Clean implementation
 23 | - [x] Following the best practices
 24 | - [x] MIT License
 25 | 
 26 | ![demo](https://raw.githubusercontent.com/zzh8829/yolov3-tf2/master/data/meme_out.jpg)
 27 | ![demo](https://raw.githubusercontent.com/zzh8829/yolov3-tf2/master/data/street_out.jpg)
 28 | 
 29 | ## Usage
 30 | 
 31 | ### Installation
 32 | 
 33 | #### Conda (Recommended)
 34 | 
 35 | ```bash
 36 | # Tensorflow CPU
 37 | conda env create -f conda-cpu.yml
 38 | conda activate yolov3-tf2-cpu
 39 | 
 40 | # Tensorflow GPU
 41 | conda env create -f conda-gpu.yml
 42 | conda activate yolov3-tf2-gpu
 43 | ```
 44 | 
 45 | #### Pip
 46 | 
 47 | ```bash
 48 | pip install -r requirements.txt
 49 | ```
 50 | 
 51 | ### Nvidia Driver (For GPU)
 52 | 
 53 | ```bash
 54 | # Ubuntu 18.04
 55 | sudo apt-add-repository -r ppa:graphics-drivers/ppa
 56 | sudo apt install nvidia-driver-430
 57 | # Windows/Other
 58 | https://www.nvidia.com/Download/index.aspx
 59 | ```
 60 | 
 61 | ### Convert pre-trained Darknet weights
 62 | 
 63 | ```bash
 64 | # yolov3
 65 | wget https://pjreddie.com/media/files/yolov3.weights -O data/yolov3.weights
 66 | python convert.py --weights ./data/yolov3.weights --output ./checkpoints/yolov3.tf
 67 | 
 68 | # yolov3-tiny
 69 | wget https://pjreddie.com/media/files/yolov3-tiny.weights -O data/yolov3-tiny.weights
 70 | python convert.py --weights ./data/yolov3-tiny.weights --output ./checkpoints/yolov3-tiny.tf --tiny
 71 | ```
 72 | 
 73 | ### Detection
 74 | 
 75 | ```bash
 76 | # yolov3
 77 | python detect.py --image ./data/meme.jpg
 78 | 
 79 | # yolov3-tiny
 80 | python detect.py --weights ./checkpoints/yolov3-tiny.tf --tiny --image ./data/street.jpg
 81 | 
 82 | # webcam
 83 | python detect_video.py --video 0
 84 | 
 85 | # video file
 86 | python detect_video.py --video path_to_file.mp4 --weights ./checkpoints/yolov3-tiny.tf --tiny
 87 | 
 88 | # video file with output
 89 | python detect_video.py --video path_to_file.mp4 --output ./output.avi
 90 | ```
 91 | 
 92 | ### Training
 93 | 
 94 | I have created a complete tutorial on how to train from scratch using the VOC2012 Dataset.
 95 | See the documentation here https://github.com/zzh8829/yolov3-tf2/blob/master/docs/training_voc.md
 96 | 
 97 | For customzied training, you need to generate tfrecord following the TensorFlow Object Detection API.
 98 | For example you can use [Microsoft VOTT](https://github.com/Microsoft/VoTT) to generate such dataset.
 99 | You can also use this [script](https://github.com/tensorflow/models/blob/master/research/object_detection/dataset_tools/create_pascal_tf_record.py) to create the pascal voc dataset.
100 | 
101 | Example commend line arguments for training
102 | ``` bash
103 | python train.py --batch_size 8 --dataset ~/Data/voc2012.tfrecord --val_dataset ~/Data/voc2012_val.tfrecord --epochs 100 --mode eager_tf --transfer fine_tune
104 | 
105 | python train.py --batch_size 8 --dataset ~/Data/voc2012.tfrecord --val_dataset ~/Data/voc2012_val.tfrecord --epochs 100 --mode fit --transfer none
106 | 
107 | python train.py --batch_size 8 --dataset ~/Data/voc2012.tfrecord --val_dataset ~/Data/voc2012_val.tfrecord --epochs 100 --mode fit --transfer no_output
108 | 
109 | python train.py --batch_size 8 --dataset ~/Data/voc2012.tfrecord --val_dataset ~/Data/voc2012_val.tfrecord --epochs 10 --mode eager_fit --transfer fine_tune --weights ./checkpoints/yolov3-tiny.tf --tiny
110 | ```
111 | 
112 | ### Tensorflow Serving
113 | You can export the model to tf serving
114 | ```
115 | python export_tfserving.py --output serving/yolov3/1/
116 | # verify tfserving graph
117 | saved_model_cli show --dir serving/yolov3/1/ --tag_set serve --signature_def serving_default
118 | ```
119 | 
120 | The inputs are preprocessed images (see `dataset.transform_iamges`)
121 | 
122 | outputs are
123 | ```
124 | yolo_nms_0: bounding boxes
125 | yolo_nms_1: scores
126 | yolo_nms_2: classes
127 | yolo_nms_3: numbers of valid detections
128 | ```
129 | 
130 | ## Benchmark (No Training Yet)
131 | 
132 | Numbers are obtained with rough calculations from `detect_video.py`
133 | 
134 | ### Macbook Pro 13 (2.7GHz i5)
135 | 
136 | | Detection   | 416x416 | 320x320 | 608x608 |
137 | |-------------|---------|---------|---------|
138 | | YoloV3      | 1000ms  | 500ms   | 1546ms  |
139 | | YoloV3-Tiny | 100ms   | 58ms    | 208ms   |
140 | 
141 | ### Desktop PC (GTX 970)
142 | 
143 | | Detection   | 416x416 | 320x320 | 608x608 |
144 | |-------------|---------|---------|---------|
145 | | YoloV3      | 74ms    | 57ms    | 129ms   |
146 | | YoloV3-Tiny | 18ms    | 15ms    | 28ms    |
147 | 
148 | ### AWS g3.4xlarge (Tesla M60)
149 | 
150 | | Detection   | 416x416 | 320x320 | 608x608 |
151 | |-------------|---------|---------|---------|
152 | | YoloV3      | 66ms    | 50ms    | 123ms   |
153 | | YoloV3-Tiny | 15ms    | 10ms    | 24ms    |
154 | 
155 | ### RTX 2070 (credit to @AnaRhisT94)
156 | 
157 | | Detection   | 416x416 |
158 | |-------------|---------|
159 | | YoloV3 predict_on_batch     | 29-32ms    | 
160 | | YoloV3 predict_on_batch + TensorRT     | 22-28ms    | 
161 | 
162 | 
163 | Darknet version of YoloV3 at 416x416 takes 29ms on Titan X.
164 | Considering Titan X has about double the benchmark of Tesla M60,
165 | Performance-wise this implementation is pretty comparable.
166 | 
167 | ## Implementation Details
168 | 
169 | ### Eager execution
170 | 
171 | Great addition for existing TensorFlow experts.
172 | Not very easy to use without some intermediate understanding of TensorFlow graphs.
173 | It is annoying when you accidentally use incompatible features like tensor.shape[0]
174 | or some sort of python control flow that works fine in eager mode, but
175 | totally breaks down when you try to compile the model to graph.
176 | 
177 | ### model(x) vs. model.predict(x)
178 | 
179 | When calling model(x) directly, we are executing the graph in eager mode. For
180 | `model.predict`, tf actually compiles the graph on the first run and then
181 | execute in graph mode. So if you are only running the model once, `model(x)` is
182 | faster since there is no compilation needed. Otherwise, `model.predict` or
183 | using exported SavedModel graph is much faster (by 2x). For non real-time usage,
184 | `model.predict_on_batch` is even faster as tested by @AnaRhisT94)
185 | 
186 | ### GradientTape
187 | 
188 | Extremely useful for debugging purpose, you can set breakpoints anywhere.
189 | You can compile all the keras fitting functionalities with gradient tape using the
190 | `run_eagerly` argument in model.compile. From my limited testing, all training methods
191 | including GradientTape, keras.fit, eager or not yeilds similar performance. But graph
192 | mode is still preferred since it's a tiny bit more efficient.
193 | 
194 | ### @tf.function
195 | 
196 | @tf.function is very cool. It's like an in-between version of eager and graph.
197 | You can step through the function by disabling tf.function and then gain
198 | performance when you enable it in production. Important note, you should not
199 | pass any non-tensor parameter to @tf.function, it will cause re-compilation
200 | on every call. I am not sure whats the best way other than using globals.
201 | 
202 | ### absl.py (abseil)
203 | 
204 | Absolutely amazing. If you don't know already, absl.py is officially used by
205 | internal projects at Google. It standardizes application interface for Python
206 | and many other languages. After using it within Google, I was so excited
207 | to hear abseil going open source. It includes many decades of best practices
208 | learned from creating large size scalable applications. I literally have
209 | nothing bad to say about it, strongly recommend absl.py to everybody.
210 | 
211 | ### Loading pre-trained Darknet weights
212 | 
213 | very hard with pure functional API because the layer ordering is different in
214 | tf.keras and darknet. The clean solution here is creating sub-models in keras.
215 | Keras is not able to save nested model in h5 format properly, TF Checkpoint is
216 | recommended since its offically supported by TensorFlow.
217 | 
218 | ### tf.keras.layers.BatchNormalization
219 | 
220 | It doesn't work very well for transfer learning. There are many articles and
221 | github issues all over the internet. I used a simple hack to make it work nicer
222 | on transfer learning with small batches.
223 | 
224 | ### What is the output of transform_targets ???
225 | 
226 | I know it's very confusion but the output is tuple of shape
227 | ```
228 | (
229 |   [N, 13, 13, 3, 6],
230 |   [N, 26, 26, 3, 6],
231 |   [N, 52, 52, 3, 6]
232 | )
233 | ```
234 | where N is the number of labels in batch and the last dimension "6" represents
235 | `[x, y, w, h, obj, class]` of the bounding boxes.
236 | 
237 | ### IOU and Score Threshold
238 | 
239 | the default threshold is 0.5 for both IOU and score, you can adjust them
240 | according to your need by setting `--yolo_iou_threshold` and
241 | `--yolo_score_threshold` flags
242 | 
243 | ### Maximum number of boxes
244 | 
245 | By default there can be maximum 100 bounding boxes per image, 
246 | if for some reason you would like to have more boxes you can use the `--yolo_max_boxes` flag.
247 | 
248 | ### NAN Loss / Training Failed / Doesn't Converge 
249 | 
250 | Many people including me have succeeded in training, so the code definitely works
251 | @LongxingTan in https://github.com/zzh8829/yolov3-tf2/issues/128 provided some of his insights summarized here:
252 |   
253 |   1. For nan loss, try to make learning rate smaller
254 |   2. Double check the format of your input data. Data input labelled by vott and labelImg is different. so make sure the input box is the right, and check carefully the format is `x1/width,y1/height,x2/width,y2/height` and **NOT** x1,y1,x2,y2, or x,y,w,h
255 | 
256 | Make sure to visualize your custom dataset using this tool
257 | ```
258 | python tools/visualize_dataset.py --classes=./data/voc2012.names
259 | ```
260 | 
261 | It will output one random image from your dataset with label to `output.jpg`
262 | Training definitely won't work if the rendered label doesn't look correct
263 | 
264 | ## Command Line Args Reference
265 | 
266 | ```bash
267 | convert.py:
268 |   --output: path to output
269 |     (default: './checkpoints/yolov3.tf')
270 |   --[no]tiny: yolov3 or yolov3-tiny
271 |     (default: 'false')
272 |   --weights: path to weights file
273 |     (default: './data/yolov3.weights')
274 |   --num_classes: number of classes in the model
275 |     (default: '80')
276 |     (an integer)
277 | 
278 | detect.py:
279 |   --classes: path to classes file
280 |     (default: './data/coco.names')
281 |   --image: path to input image
282 |     (default: './data/girl.png')
283 |   --output: path to output image
284 |     (default: './output.jpg')
285 |   --[no]tiny: yolov3 or yolov3-tiny
286 |     (default: 'false')
287 |   --weights: path to weights file
288 |     (default: './checkpoints/yolov3.tf')
289 |   --num_classes: number of classes in the model
290 |     (default: '80')
291 |     (an integer)
292 | 
293 | detect_video.py:
294 |   --classes: path to classes file
295 |     (default: './data/coco.names')
296 |   --video: path to input video (use 0 for cam)
297 |     (default: './data/video.mp4')
298 |   --output: path to output video (remember to set right codec for given format. e.g. XVID for .avi)
299 |     (default: None)
300 |   --output_format: codec used in VideoWriter when saving video to file
301 |     (default: 'XVID)
302 |   --[no]tiny: yolov3 or yolov3-tiny
303 |     (default: 'false')
304 |   --weights: path to weights file
305 |     (default: './checkpoints/yolov3.tf')
306 |   --num_classes: number of classes in the model
307 |     (default: '80')
308 |     (an integer)
309 | 
310 | train.py:
311 |   --batch_size: batch size
312 |     (default: '8')
313 |     (an integer)
314 |   --classes: path to classes file
315 |     (default: './data/coco.names')
316 |   --dataset: path to dataset
317 |     (default: '')
318 |   --epochs: number of epochs
319 |     (default: '2')
320 |     (an integer)
321 |   --learning_rate: learning rate
322 |     (default: '0.001')
323 |     (a number)
324 |   --mode: <fit|eager_fit|eager_tf>: fit: model.fit, eager_fit: model.fit(run_eagerly=True), eager_tf: custom GradientTape
325 |     (default: 'fit')
326 |   --num_classes: number of classes in the model
327 |     (default: '80')
328 |     (an integer)
329 |   --size: image size
330 |     (default: '416')
331 |     (an integer)
332 |   --[no]tiny: yolov3 or yolov3-tiny
333 |     (default: 'false')
334 |   --transfer: <none|darknet|no_output|frozen|fine_tune>: none: Training from scratch, darknet: Transfer darknet, no_output: Transfer all but output, frozen: Transfer and freeze all,
335 |     fine_tune: Transfer all and freeze darknet only
336 |     (default: 'none')
337 |   --val_dataset: path to validation dataset
338 |     (default: '')
339 |   --weights: path to weights file
340 |     (default: './checkpoints/yolov3.tf')
341 | ```
342 | 
343 | ## Change Log
344 | 
345 | #### October 1, 2019
346 | 
347 | - Updated to Tensorflow to v2.0.0 Release
348 | 
349 | 
350 | ## References
351 | 
352 | It is pretty much impossible to implement this from the yolov3 paper alone. I had to reference the official (very hard to understand) and many un-official (many minor errors) repos to piece together the complete picture.
353 | 
354 | - https://github.com/pjreddie/darknet
355 |     - official yolov3 implementation
356 | - https://github.com/AlexeyAB
357 |     - explinations of parameters
358 | - https://github.com/qqwweee/keras-yolo3
359 |     - models
360 |     - loss functions
361 | - https://github.com/YunYang1994/tensorflow-yolov3
362 |     - data transformations
363 |     - loss functions
364 | - https://github.com/ayooshkathuria/pytorch-yolo-v3
365 |     - models
366 | - https://github.com/broadinstitute/keras-resnet
367 |     - batch normalization fix
368 | 


--------------------------------------------------------------------------------
/checkpoints/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzh8829/yolov3-tf2/2784050f2fc1ff060f4c8b3ac2db231370569aa8/checkpoints/.gitkeep


--------------------------------------------------------------------------------
/conda-cpu.yml:
--------------------------------------------------------------------------------
 1 | name: yolov3-tf2-cpu
 2 | 
 3 | dependencies:
 4 |   - python==3.7
 5 |   - pip
 6 |   - matplotlib
 7 |   - opencv
 8 |   - pip:
 9 |     - tensorflow==2.1.0rc1
10 |     - lxml
11 |     - tqdm
12 |     - -e .
13 | 


--------------------------------------------------------------------------------
/conda-gpu.yml:
--------------------------------------------------------------------------------
 1 | name: yolov3-tf2-gpu
 2 | 
 3 | dependencies:
 4 |   - python==3.7
 5 |   - pip
 6 |   - matplotlib
 7 |   - opencv
 8 |   - cudnn
 9 |   - cudatoolkit==10.1.243
10 |   - pip:
11 |     - tensorflow-gpu==2.1.0rc1
12 |     - lxml
13 |     - tqdm
14 |     - -e .
15 | 


--------------------------------------------------------------------------------
/convert.py:
--------------------------------------------------------------------------------
 1 | from absl import app, flags, logging
 2 | from absl.flags import FLAGS
 3 | import numpy as np
 4 | from yolov3_tf2.models import YoloV3, YoloV3Tiny
 5 | from yolov3_tf2.utils import load_darknet_weights
 6 | import tensorflow as tf
 7 | 
 8 | flags.DEFINE_string('weights', './data/yolov3.weights', 'path to weights file')
 9 | flags.DEFINE_string('output', './checkpoints/yolov3.tf', 'path to output')
10 | flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')
11 | flags.DEFINE_integer('num_classes', 80, 'number of classes in the model')
12 | 
13 | 
14 | def main(_argv):
15 |     physical_devices = tf.config.experimental.list_physical_devices('GPU')
16 |     if len(physical_devices) > 0:
17 |         tf.config.experimental.set_memory_growth(physical_devices[0], True)
18 | 
19 |     if FLAGS.tiny:
20 |         yolo = YoloV3Tiny(classes=FLAGS.num_classes)
21 |     else:
22 |         yolo = YoloV3(classes=FLAGS.num_classes)
23 |     yolo.summary()
24 |     logging.info('model created')
25 | 
26 |     load_darknet_weights(yolo, FLAGS.weights, FLAGS.tiny)
27 |     logging.info('weights loaded')
28 | 
29 |     img = np.random.random((1, 320, 320, 3)).astype(np.float32)
30 |     output = yolo(img)
31 |     logging.info('sanity check passed')
32 | 
33 |     yolo.save_weights(FLAGS.output)
34 |     logging.info('weights saved')
35 | 
36 | 
37 | if __name__ == '__main__':
38 |     try:
39 |         app.run(main)
40 |     except SystemExit:
41 |         pass
42 | 


--------------------------------------------------------------------------------
/data/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "yolov3.tf"
2 | all_model_checkpoint_paths: "yolov3.tf"
3 | 


--------------------------------------------------------------------------------
/data/coco.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/data/girl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzh8829/yolov3-tf2/2784050f2fc1ff060f4c8b3ac2db231370569aa8/data/girl.png


--------------------------------------------------------------------------------
/data/meme.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzh8829/yolov3-tf2/2784050f2fc1ff060f4c8b3ac2db231370569aa8/data/meme.jpg


--------------------------------------------------------------------------------
/data/meme2.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzh8829/yolov3-tf2/2784050f2fc1ff060f4c8b3ac2db231370569aa8/data/meme2.jpeg


--------------------------------------------------------------------------------
/data/meme_out.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzh8829/yolov3-tf2/2784050f2fc1ff060f4c8b3ac2db231370569aa8/data/meme_out.jpg


--------------------------------------------------------------------------------
/data/street.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzh8829/yolov3-tf2/2784050f2fc1ff060f4c8b3ac2db231370569aa8/data/street.jpg


--------------------------------------------------------------------------------
/data/street_out.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzh8829/yolov3-tf2/2784050f2fc1ff060f4c8b3ac2db231370569aa8/data/street_out.jpg


--------------------------------------------------------------------------------
/data/voc2012.names:
--------------------------------------------------------------------------------
 1 | aeroplane
 2 | bicycle
 3 | bird
 4 | boat
 5 | bottle
 6 | bus
 7 | car
 8 | cat
 9 | chair
10 | cow
11 | diningtable
12 | dog
13 | horse
14 | motorbike
15 | person
16 | pottedplant
17 | sheep
18 | sofa
19 | train
20 | tvmonitor
21 | 


--------------------------------------------------------------------------------
/detect.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from absl import app, flags, logging
 3 | from absl.flags import FLAGS
 4 | import cv2
 5 | import numpy as np
 6 | import tensorflow as tf
 7 | from yolov3_tf2.models import (
 8 |     YoloV3, YoloV3Tiny
 9 | )
10 | from yolov3_tf2.dataset import transform_images, load_tfrecord_dataset
11 | from yolov3_tf2.utils import draw_outputs
12 | 
13 | flags.DEFINE_string('classes', './data/coco.names', 'path to classes file')
14 | flags.DEFINE_string('weights', './checkpoints/yolov3.tf',
15 |                     'path to weights file')
16 | flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')
17 | flags.DEFINE_integer('size', 416, 'resize images to')
18 | flags.DEFINE_string('image', './data/girl.png', 'path to input image')
19 | flags.DEFINE_string('tfrecord', None, 'tfrecord instead of image')
20 | flags.DEFINE_string('output', './output.jpg', 'path to output image')
21 | flags.DEFINE_integer('num_classes', 80, 'number of classes in the model')
22 | 
23 | 
24 | def main(_argv):
25 |     physical_devices = tf.config.experimental.list_physical_devices('GPU')
26 |     for physical_device in physical_devices:
27 |         tf.config.experimental.set_memory_growth(physical_device, True)
28 | 
29 |     if FLAGS.tiny:
30 |         yolo = YoloV3Tiny(classes=FLAGS.num_classes)
31 |     else:
32 |         yolo = YoloV3(classes=FLAGS.num_classes)
33 | 
34 |     yolo.load_weights(FLAGS.weights).expect_partial()
35 |     logging.info('weights loaded')
36 | 
37 |     class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
38 |     logging.info('classes loaded')
39 | 
40 |     if FLAGS.tfrecord:
41 |         dataset = load_tfrecord_dataset(
42 |             FLAGS.tfrecord, FLAGS.classes, FLAGS.size)
43 |         dataset = dataset.shuffle(512)
44 |         img_raw, _label = next(iter(dataset.take(1)))
45 |     else:
46 |         img_raw = tf.image.decode_image(
47 |             open(FLAGS.image, 'rb').read(), channels=3)
48 | 
49 |     img = tf.expand_dims(img_raw, 0)
50 |     img = transform_images(img, FLAGS.size)
51 | 
52 |     t1 = time.time()
53 |     boxes, scores, classes, nums = yolo(img)
54 |     t2 = time.time()
55 |     logging.info('time: {}'.format(t2 - t1))
56 | 
57 |     logging.info('detections:')
58 |     for i in range(nums[0]):
59 |         logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])],
60 |                                            np.array(scores[0][i]),
61 |                                            np.array(boxes[0][i])))
62 | 
63 |     img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR)
64 |     img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
65 |     cv2.imwrite(FLAGS.output, img)
66 |     logging.info('output saved to: {}'.format(FLAGS.output))
67 | 
68 | 
69 | if __name__ == '__main__':
70 |     try:
71 |         app.run(main)
72 |     except SystemExit:
73 |         pass
74 | 


--------------------------------------------------------------------------------
/detect_video.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from absl import app, flags, logging
 3 | from absl.flags import FLAGS
 4 | import cv2
 5 | import tensorflow as tf
 6 | from yolov3_tf2.models import (
 7 |     YoloV3, YoloV3Tiny
 8 | )
 9 | from yolov3_tf2.dataset import transform_images
10 | from yolov3_tf2.utils import draw_outputs
11 | 
12 | 
13 | flags.DEFINE_string('classes', './data/coco.names', 'path to classes file')
14 | flags.DEFINE_string('weights', './checkpoints/yolov3.tf',
15 |                     'path to weights file')
16 | flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')
17 | flags.DEFINE_integer('size', 416, 'resize images to')
18 | flags.DEFINE_string('video', './data/video.mp4',
19 |                     'path to video file or number for webcam)')
20 | flags.DEFINE_string('output', None, 'path to output video')
21 | flags.DEFINE_string('output_format', 'XVID', 'codec used in VideoWriter when saving video to file')
22 | flags.DEFINE_integer('num_classes', 80, 'number of classes in the model')
23 | 
24 | 
25 | def main(_argv):
26 |     physical_devices = tf.config.experimental.list_physical_devices('GPU')
27 |     for physical_device in physical_devices:
28 |         tf.config.experimental.set_memory_growth(physical_device, True)
29 | 
30 |     if FLAGS.tiny:
31 |         yolo = YoloV3Tiny(classes=FLAGS.num_classes)
32 |     else:
33 |         yolo = YoloV3(classes=FLAGS.num_classes)
34 | 
35 |     yolo.load_weights(FLAGS.weights)
36 |     logging.info('weights loaded')
37 | 
38 |     class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
39 |     logging.info('classes loaded')
40 | 
41 |     times = []
42 | 
43 |     try:
44 |         vid = cv2.VideoCapture(int(FLAGS.video))
45 |     except:
46 |         vid = cv2.VideoCapture(FLAGS.video)
47 | 
48 |     out = None
49 | 
50 |     if FLAGS.output:
51 |         # by default VideoCapture returns float instead of int
52 |         width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
53 |         height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
54 |         fps = int(vid.get(cv2.CAP_PROP_FPS))
55 |         codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
56 |         out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
57 | 
58 |     while True:
59 |         _, img = vid.read()
60 | 
61 |         if img is None:
62 |             logging.warning("Empty Frame")
63 |             time.sleep(0.1)
64 |             continue
65 | 
66 |         img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
67 |         img_in = tf.expand_dims(img_in, 0)
68 |         img_in = transform_images(img_in, FLAGS.size)
69 | 
70 |         t1 = time.time()
71 |         boxes, scores, classes, nums = yolo.predict(img_in)
72 |         t2 = time.time()
73 |         times.append(t2-t1)
74 |         times = times[-20:]
75 | 
76 |         img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
77 |         img = cv2.putText(img, "Time: {:.2f}ms".format(sum(times)/len(times)*1000), (0, 30),
78 |                           cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
79 |         if FLAGS.output:
80 |             out.write(img)
81 |         cv2.imshow('output', img)
82 |         if cv2.waitKey(1) == ord('q'):
83 |             break
84 | 
85 |     cv2.destroyAllWindows()
86 | 
87 | 
88 | if __name__ == '__main__':
89 |     try:
90 |         app.run(main)
91 |     except SystemExit:
92 |         pass
93 | 


--------------------------------------------------------------------------------
/docs/training_voc.md:
--------------------------------------------------------------------------------
  1 | # Training Instruction
  2 | 
  3 | ## VOC 2012 Dataset from Scratch
  4 | 
  5 | Full instruction on how to train using VOC 2012 from scratch
  6 | 
  7 | Requirement:
  8 |   1. Able to detect image using pretrained darknet model
  9 |   2. Many Gigabytes of Disk Space
 10 |   3. High Speed Internet Connection Preferred
 11 |   4. GPU Preferred
 12 | 
 13 | 
 14 | ### 1. Download Dataset
 15 | 
 16 | You can read the full description of dataset [here](http://host.robots.ox.ac.uk/pascal/VOC/)
 17 | ```bash
 18 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar -O ./data/voc2012_raw.tar
 19 | mkdir -p ./data/voc2012_raw
 20 | tar -xf ./data/voc2012_raw.tar -C ./data/voc2012_raw
 21 | ls ./data/voc2012_raw/VOCdevkit/VOC2012 # Explore the dataset
 22 | ```
 23 | 
 24 | ### 2. Transform Dataset
 25 | 
 26 | See tools/voc2012.py for implementation, this format is based on [tensorflow object detection API](https://github.com/tensorflow/models/tree/master/research/object_detection). Many fields 
 27 | are not required, I left them there for compatibility with official API.
 28 | 
 29 | ```bash
 30 | python tools/voc2012.py \
 31 |   --data_dir './data/voc2012_raw/VOCdevkit/VOC2012' \
 32 |   --split train \
 33 |   --output_file ./data/voc2012_train.tfrecord
 34 | 
 35 | python tools/voc2012.py \
 36 |   --data_dir './data/voc2012_raw/VOCdevkit/VOC2012' \
 37 |   --split val \
 38 |   --output_file ./data/voc2012_val.tfrecord
 39 | ```
 40 | 
 41 | You can visualize the dataset using this tool
 42 | ```
 43 | python tools/visualize_dataset.py --classes=./data/voc2012.names
 44 | ```
 45 | 
 46 | It will output one random image with label to `output.jpg`
 47 | 
 48 | ### 3. Training
 49 | 
 50 | You can adjust the parameters based on your setup
 51 | 
 52 | #### With Transfer Learning
 53 | 
 54 | This step requires loading the pretrained darknet (feature extractor) weights.
 55 | ```
 56 | wget https://pjreddie.com/media/files/yolov3.weights -O data/yolov3.weights
 57 | python convert.py
 58 | python detect.py --image ./data/meme.jpg # Sanity check
 59 | 
 60 | python train.py \
 61 | 	--dataset ./data/voc2012_train.tfrecord \
 62 | 	--val_dataset ./data/voc2012_val.tfrecord \
 63 | 	--classes ./data/voc2012.names \
 64 | 	--num_classes 20 \
 65 | 	--mode fit --transfer darknet \
 66 | 	--batch_size 16 \
 67 | 	--epochs 10 \
 68 | 	--weights ./checkpoints/yolov3.tf \
 69 | 	--weights_num_classes 80 
 70 | ```
 71 | 
 72 | Original pretrained yolov3 has 80 classes, here we demonstrated how to
 73 | do transfer learning on 20 classes.
 74 | 
 75 | #### Training from random weights (NOT RECOMMENDED)
 76 | Training from scratch is very difficult to converge
 77 | The original paper trained darknet 
 78 | on imagenet before training the whole network as well.
 79 | 
 80 | ```bash
 81 | python train.py \
 82 | 	--dataset ./data/voc2012_train.tfrecord \
 83 | 	--val_dataset ./data/voc2012_val.tfrecord \
 84 | 	--classes ./data/voc2012.names \
 85 | 	--num_classes 20 \
 86 | 	--mode fit --transfer none \
 87 | 	--batch_size 16 \
 88 | 	--epochs 10 \
 89 | ```
 90 | 
 91 | I have tested this works 100% with correct loss and converging over time.
 92 | Each epoch takes around 10 minutes on single AWS p2.xlarge (Nvidia K80 GPU) Instance.
 93 | 
 94 | You might see warnings or error messages during training, they are not critical dont' worry too much about them.
 95 | There might be a long wait time between each epoch becaues we are calculating validation loss.
 96 | 
 97 | ### 4. Inference
 98 | 
 99 | ```bash
100 | # detect from images
101 | python detect.py \
102 | 	--classes ./data/voc2012.names \
103 | 	--num_classes 20 \
104 | 	--weights ./checkpoints/yolov3_train_5.tf \
105 | 	--image ./data/street.jpg
106 | 
107 | # detect from validation set
108 | python detect.py \
109 | 	--classes ./data/voc2012.names \
110 | 	--num_classes 20 \
111 | 	--weights ./checkpoints/yolov3_train_5.tf \
112 | 	--tfrecord ./data/voc2012_val.tfrecord
113 | ```
114 | 
115 | You should see some detect objects in the standard output and the visualization at `output.jpg`.
116 | this is just a proof of concept, so it won't be as good as pretrained models.
117 | In my experience, you might need lower score score thershold if you didn't train it enough.
118 | 
119 | 


--------------------------------------------------------------------------------
/requirements-gpu.txt:
--------------------------------------------------------------------------------
1 | tensorflow-gpu==2.12.0
2 | opencv-python==4.2.0.32
3 | lxml
4 | tqdm
5 | 
6 | -e .
7 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow==2.11.1
2 | opencv-python==4.2.0.32
3 | lxml
4 | tqdm
5 | 
6 | -e .
7 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | setup(name='yolov3_tf2',
4 |       version='0.1',
5 |       url='https://github.com/zzh8829/yolov3-tf2',
6 |       author='Zihao Zhang',
7 |       author_email='zzh8829@gmail.com',
8 |       packages=['yolov3_tf2'])


--------------------------------------------------------------------------------
/tools/export_tflite.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from absl import app, flags, logging
 3 | from absl.flags import FLAGS
 4 | import cv2
 5 | import numpy as np
 6 | import tensorflow as tf
 7 | from yolov3_tf2.models import (
 8 |     YoloV3, YoloV3Tiny
 9 | )
10 | from yolov3_tf2.dataset import transform_images
11 | 
12 | from tensorflow.python.eager import def_function
13 | from tensorflow.python.framework import tensor_spec
14 | from tensorflow.python.util import nest
15 | 
16 | flags.DEFINE_string('weights', './checkpoints/yolov3.tf',
17 |                     'path to weights file')
18 | flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')
19 | flags.DEFINE_string('output', './checkpoints/yolov3.tflite',
20 |                     'path to saved_model')
21 | flags.DEFINE_string('classes', './data/coco.names', 'path to classes file')
22 | flags.DEFINE_string('image', './data/girl.png', 'path to input image')
23 | flags.DEFINE_integer('num_classes', 80, 'number of classes in the model')
24 | flags.DEFINE_integer('size', 416, 'image size')
25 | 
26 | 
27 | def main(_argv):
28 |     if FLAGS.tiny:
29 |         yolo = YoloV3Tiny(size=FLAGS.size, classes=FLAGS.num_classes)
30 |     else:
31 |         yolo = YoloV3(size=FLAGS.size, classes=FLAGS.num_classes)
32 | 
33 |     yolo.load_weights(FLAGS.weights)
34 |     logging.info('weights loaded')
35 | 
36 |     converter = tf.lite.TFLiteConverter.from_keras_model(yolo)
37 | 
38 |     # Fix from https://stackoverflow.com/questions/64490203/tf-lite-non-max-suppression
39 |     converter.experimental_new_converter = True
40 |     converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
41 | 
42 |     tflite_model = converter.convert()
43 |     open(FLAGS.output, 'wb').write(tflite_model)
44 |     logging.info("model saved to: {}".format(FLAGS.output))
45 | 
46 |     interpreter = tf.lite.Interpreter(model_path=FLAGS.output)
47 |     interpreter.allocate_tensors()
48 |     logging.info('tflite model loaded')
49 | 
50 |     input_details = interpreter.get_input_details()
51 |     output_details = interpreter.get_output_details()
52 | 
53 |     class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
54 |     logging.info('classes loaded')
55 | 
56 |     img = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3)
57 |     img = tf.expand_dims(img, 0)
58 |     img = transform_images(img, 416)
59 | 
60 |     t1 = time.time()
61 |     outputs = interpreter.set_tensor(input_details[0]['index'], img)
62 | 
63 |     interpreter.invoke()
64 | 
65 |     output_data = interpreter.get_tensor(output_details[0]['index'])
66 | 
67 |     print(output_data)
68 | 
69 | if __name__ == '__main__':
70 |     app.run(main)
71 | 


--------------------------------------------------------------------------------
/tools/export_tfserving.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from absl import app, flags, logging
 3 | from absl.flags import FLAGS
 4 | import cv2
 5 | import numpy as np
 6 | import tensorflow as tf
 7 | from yolov3_tf2.models import (
 8 |     YoloV3, YoloV3Tiny
 9 | )
10 | from yolov3_tf2.dataset import transform_images
11 | 
12 | from tensorflow.python.eager import def_function
13 | from tensorflow.python.framework import tensor_spec
14 | from tensorflow.python.util import nest
15 | 
16 | flags.DEFINE_string('weights', './checkpoints/yolov3.tf',
17 |                     'path to weights file')
18 | flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')
19 | flags.DEFINE_string('output', './serving/yolov3/1', 'path to saved_model')
20 | flags.DEFINE_string('classes', './data/coco.names', 'path to classes file')
21 | flags.DEFINE_string('image', './data/girl.png', 'path to input image')
22 | flags.DEFINE_integer('num_classes', 80, 'number of classes in the model')
23 | 
24 | 
25 | def main(_argv):
26 |     if FLAGS.tiny:
27 |         yolo = YoloV3Tiny(classes=FLAGS.num_classes)
28 |     else:
29 |         yolo = YoloV3(classes=FLAGS.num_classes)
30 | 
31 |     yolo.load_weights(FLAGS.weights)
32 |     logging.info('weights loaded')
33 | 
34 |     tf.saved_model.save(yolo, FLAGS.output)
35 |     logging.info("model saved to: {}".format(FLAGS.output))
36 | 
37 |     model = tf.saved_model.load(FLAGS.output)
38 |     infer = model.signatures[tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
39 |     logging.info(infer.structured_outputs)
40 | 
41 |     class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
42 |     logging.info('classes loaded')
43 | 
44 |     img = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3)
45 |     img = tf.expand_dims(img, 0)
46 |     img = transform_images(img, 416)
47 | 
48 |     t1 = time.time()
49 |     outputs = infer(img)
50 |     boxes, scores, classes, nums = outputs["yolo_nms"], outputs[
51 |         "yolo_nms_1"], outputs["yolo_nms_2"], outputs["yolo_nms_3"]
52 |     t2 = time.time()
53 |     logging.info('time: {}'.format(t2 - t1))
54 | 
55 |     logging.info('detections:')
56 |     for i in range(nums[0]):
57 |         logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])],
58 |                                            scores[0][i].numpy(),
59 |                                            boxes[0][i].numpy()))
60 | 
61 | 
62 | if __name__ == '__main__':
63 |     try:
64 |         app.run(main)
65 |     except SystemExit:
66 |         pass
67 | 


--------------------------------------------------------------------------------
/tools/visualize_dataset.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from absl import app, flags, logging
 3 | from absl.flags import FLAGS
 4 | import cv2
 5 | import numpy as np
 6 | import tensorflow as tf
 7 | from yolov3_tf2.models import (
 8 |     YoloV3, YoloV3Tiny
 9 | )
10 | from yolov3_tf2.dataset import load_tfrecord_dataset, transform_images
11 | from yolov3_tf2.utils import draw_outputs
12 | 
13 | flags.DEFINE_string('classes', './data/coco.names', 'path to classes file')
14 | flags.DEFINE_integer('size', 416, 'resize images to')
15 | flags.DEFINE_string(
16 |     'dataset', './data/voc2012_train.tfrecord', 'path to dataset')
17 | flags.DEFINE_string('output', './output.jpg', 'path to output image')
18 | 
19 | 
20 | def main(_argv):
21 |     class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
22 |     logging.info('classes loaded')
23 | 
24 |     dataset = load_tfrecord_dataset(FLAGS.dataset, FLAGS.classes, FLAGS.size)
25 |     dataset = dataset.shuffle(512)
26 | 
27 |     for image, labels in dataset.take(1):
28 |         boxes = []
29 |         scores = []
30 |         classes = []
31 |         for x1, y1, x2, y2, label in labels:
32 |             if x1 == 0 and x2 == 0:
33 |                 continue
34 | 
35 |             boxes.append((x1, y1, x2, y2))
36 |             scores.append(1)
37 |             classes.append(label)
38 |         nums = [len(boxes)]
39 |         boxes = [boxes]
40 |         scores = [scores]
41 |         classes = [classes]
42 | 
43 |         logging.info('labels:')
44 |         for i in range(nums[0]):
45 |             logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])],
46 |                                                np.array(scores[0][i]),
47 |                                                np.array(boxes[0][i])))
48 | 
49 |         img = cv2.cvtColor(image.numpy(), cv2.COLOR_RGB2BGR)
50 |         img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
51 |         cv2.imwrite(FLAGS.output, img)
52 |         logging.info('output saved to: {}'.format(FLAGS.output))
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     app.run(main)
57 | 


--------------------------------------------------------------------------------
/tools/voc2012.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import os
  3 | import hashlib
  4 | 
  5 | from absl import app, flags, logging
  6 | from absl.flags import FLAGS
  7 | import tensorflow as tf
  8 | import lxml.etree
  9 | import tqdm
 10 | 
 11 | flags.DEFINE_string('data_dir', './data/voc2012_raw/VOCdevkit/VOC2012/',
 12 |                     'path to raw PASCAL VOC dataset')
 13 | flags.DEFINE_enum('split', 'train', [
 14 |                   'train', 'val'], 'specify train or val spit')
 15 | flags.DEFINE_string('output_file', './data/voc2012_train.tfrecord', 'outpot dataset')
 16 | flags.DEFINE_string('classes', './data/voc2012.names', 'classes file')
 17 | 
 18 | 
 19 | def build_example(annotation, class_map):
 20 |     img_path = os.path.join(
 21 |         FLAGS.data_dir, 'JPEGImages', annotation['filename'])
 22 |     img_raw = open(img_path, 'rb').read()
 23 |     key = hashlib.sha256(img_raw).hexdigest()
 24 | 
 25 |     width = int(annotation['size']['width'])
 26 |     height = int(annotation['size']['height'])
 27 | 
 28 |     xmin = []
 29 |     ymin = []
 30 |     xmax = []
 31 |     ymax = []
 32 |     classes = []
 33 |     classes_text = []
 34 |     truncated = []
 35 |     views = []
 36 |     difficult_obj = []
 37 |     if 'object' in annotation:
 38 |         for obj in annotation['object']:
 39 |             difficult = bool(int(obj['difficult']))
 40 |             difficult_obj.append(int(difficult))
 41 | 
 42 |             xmin.append(float(obj['bndbox']['xmin']) / width)
 43 |             ymin.append(float(obj['bndbox']['ymin']) / height)
 44 |             xmax.append(float(obj['bndbox']['xmax']) / width)
 45 |             ymax.append(float(obj['bndbox']['ymax']) / height)
 46 |             classes_text.append(obj['name'].encode('utf8'))
 47 |             classes.append(class_map[obj['name']])
 48 |             truncated.append(int(obj['truncated']))
 49 |             views.append(obj['pose'].encode('utf8'))
 50 | 
 51 |     example = tf.train.Example(features=tf.train.Features(feature={
 52 |         'image/height': tf.train.Feature(int64_list=tf.train.Int64List(value=[height])),
 53 |         'image/width': tf.train.Feature(int64_list=tf.train.Int64List(value=[width])),
 54 |         'image/filename': tf.train.Feature(bytes_list=tf.train.BytesList(value=[
 55 |             annotation['filename'].encode('utf8')])),
 56 |         'image/source_id': tf.train.Feature(bytes_list=tf.train.BytesList(value=[
 57 |             annotation['filename'].encode('utf8')])),
 58 |         'image/key/sha256': tf.train.Feature(bytes_list=tf.train.BytesList(value=[key.encode('utf8')])),
 59 |         'image/encoded': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw])),
 60 |         'image/format': tf.train.Feature(bytes_list=tf.train.BytesList(value=['jpeg'.encode('utf8')])),
 61 |         'image/object/bbox/xmin': tf.train.Feature(float_list=tf.train.FloatList(value=xmin)),
 62 |         'image/object/bbox/xmax': tf.train.Feature(float_list=tf.train.FloatList(value=xmax)),
 63 |         'image/object/bbox/ymin': tf.train.Feature(float_list=tf.train.FloatList(value=ymin)),
 64 |         'image/object/bbox/ymax': tf.train.Feature(float_list=tf.train.FloatList(value=ymax)),
 65 |         'image/object/class/text': tf.train.Feature(bytes_list=tf.train.BytesList(value=classes_text)),
 66 |         'image/object/class/label': tf.train.Feature(int64_list=tf.train.Int64List(value=classes)),
 67 |         'image/object/difficult': tf.train.Feature(int64_list=tf.train.Int64List(value=difficult_obj)),
 68 |         'image/object/truncated': tf.train.Feature(int64_list=tf.train.Int64List(value=truncated)),
 69 |         'image/object/view': tf.train.Feature(bytes_list=tf.train.BytesList(value=views)),
 70 |     }))
 71 |     return example
 72 | 
 73 | 
 74 | def parse_xml(xml):
 75 |     if not len(xml):
 76 |         return {xml.tag: xml.text}
 77 |     result = {}
 78 |     for child in xml:
 79 |         child_result = parse_xml(child)
 80 |         if child.tag != 'object':
 81 |             result[child.tag] = child_result[child.tag]
 82 |         else:
 83 |             if child.tag not in result:
 84 |                 result[child.tag] = []
 85 |             result[child.tag].append(child_result[child.tag])
 86 |     return {xml.tag: result}
 87 | 
 88 | 
 89 | def main(_argv):
 90 |     class_map = {name: idx for idx, name in enumerate(
 91 |         open(FLAGS.classes).read().splitlines())}
 92 |     logging.info("Class mapping loaded: %s", class_map)
 93 | 
 94 |     writer = tf.io.TFRecordWriter(FLAGS.output_file)
 95 |     image_list = open(os.path.join(
 96 |         FLAGS.data_dir, 'ImageSets', 'Main', '%s.txt' % FLAGS.split)).read().splitlines()
 97 |     logging.info("Image list loaded: %d", len(image_list))
 98 |     for name in tqdm.tqdm(image_list):
 99 |         annotation_xml = os.path.join(
100 |             FLAGS.data_dir, 'Annotations', name + '.xml')
101 |         annotation_xml = lxml.etree.fromstring(open(annotation_xml).read())
102 |         annotation = parse_xml(annotation_xml)['annotation']
103 |         tf_example = build_example(annotation, class_map)
104 |         writer.write(tf_example.SerializeToString())
105 |     writer.close()
106 |     logging.info("Done")
107 | 
108 | 
109 | if __name__ == '__main__':
110 |     app.run(main)
111 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | from absl import app, flags, logging
  2 | from absl.flags import FLAGS
  3 | 
  4 | import tensorflow as tf
  5 | import numpy as np
  6 | import cv2
  7 | import time
  8 | from tensorflow.keras.callbacks import (
  9 |     ReduceLROnPlateau,
 10 |     EarlyStopping,
 11 |     ModelCheckpoint,
 12 |     TensorBoard
 13 | )
 14 | from yolov3_tf2.models import (
 15 |     YoloV3, YoloV3Tiny, YoloLoss,
 16 |     yolo_anchors, yolo_anchor_masks,
 17 |     yolo_tiny_anchors, yolo_tiny_anchor_masks
 18 | )
 19 | from yolov3_tf2.utils import freeze_all
 20 | import yolov3_tf2.dataset as dataset
 21 | 
 22 | flags.DEFINE_string('dataset', '', 'path to dataset')
 23 | flags.DEFINE_string('val_dataset', '', 'path to validation dataset')
 24 | flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')
 25 | flags.DEFINE_string('weights', './checkpoints/yolov3.tf',
 26 |                     'path to weights file')
 27 | flags.DEFINE_string('classes', './data/coco.names', 'path to classes file')
 28 | flags.DEFINE_enum('mode', 'fit', ['fit', 'eager_fit', 'eager_tf'],
 29 |                   'fit: model.fit, '
 30 |                   'eager_fit: model.fit(run_eagerly=True), '
 31 |                   'eager_tf: custom GradientTape')
 32 | flags.DEFINE_enum('transfer', 'none',
 33 |                   ['none', 'darknet', 'no_output', 'frozen', 'fine_tune'],
 34 |                   'none: Training from scratch, '
 35 |                   'darknet: Transfer darknet, '
 36 |                   'no_output: Transfer all but output, '
 37 |                   'frozen: Transfer and freeze all, '
 38 |                   'fine_tune: Transfer all and freeze darknet only')
 39 | flags.DEFINE_integer('size', 416, 'image size')
 40 | flags.DEFINE_integer('epochs', 2, 'number of epochs')
 41 | flags.DEFINE_integer('batch_size', 8, 'batch size')
 42 | flags.DEFINE_float('learning_rate', 1e-3, 'learning rate')
 43 | flags.DEFINE_integer('num_classes', 80, 'number of classes in the model')
 44 | flags.DEFINE_integer('weights_num_classes', None, 'specify num class for `weights` file if different, '
 45 |                      'useful in transfer learning with different number of classes')
 46 | flags.DEFINE_boolean('multi_gpu', False, 'Use if wishing to train with more than 1 GPU.')
 47 | 
 48 | 
 49 | def setup_model():
 50 |     if FLAGS.tiny:
 51 |         model = YoloV3Tiny(FLAGS.size, training=True,
 52 |                            classes=FLAGS.num_classes)
 53 |         anchors = yolo_tiny_anchors
 54 |         anchor_masks = yolo_tiny_anchor_masks
 55 |     else:
 56 |         model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
 57 |         anchors = yolo_anchors
 58 |         anchor_masks = yolo_anchor_masks
 59 | 
 60 |     # Configure the model for transfer learning
 61 |     if FLAGS.transfer == 'none':
 62 |         pass  # Nothing to do
 63 |     elif FLAGS.transfer in ['darknet', 'no_output']:
 64 |         # Darknet transfer is a special case that works
 65 |         # with incompatible number of classes
 66 |         # reset top layers
 67 |         if FLAGS.tiny:
 68 |             model_pretrained = YoloV3Tiny(
 69 |                 FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes)
 70 |         else:
 71 |             model_pretrained = YoloV3(
 72 |                 FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes)
 73 |         model_pretrained.load_weights(FLAGS.weights)
 74 | 
 75 |         if FLAGS.transfer == 'darknet':
 76 |             model.get_layer('yolo_darknet').set_weights(
 77 |                 model_pretrained.get_layer('yolo_darknet').get_weights())
 78 |             freeze_all(model.get_layer('yolo_darknet'))
 79 |         elif FLAGS.transfer == 'no_output':
 80 |             for l in model.layers:
 81 |                 if not l.name.startswith('yolo_output'):
 82 |                     l.set_weights(model_pretrained.get_layer(
 83 |                         l.name).get_weights())
 84 |                     freeze_all(l)
 85 |     else:
 86 |         # All other transfer require matching classes
 87 |         model.load_weights(FLAGS.weights)
 88 |         if FLAGS.transfer == 'fine_tune':
 89 |             # freeze darknet and fine tune other layers
 90 |             darknet = model.get_layer('yolo_darknet')
 91 |             freeze_all(darknet)
 92 |         elif FLAGS.transfer == 'frozen':
 93 |             # freeze everything
 94 |             freeze_all(model)
 95 | 
 96 |     optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
 97 |     loss = [YoloLoss(anchors[mask], classes=FLAGS.num_classes)
 98 |             for mask in anchor_masks]
 99 | 
100 |     model.compile(optimizer=optimizer, loss=loss,
101 |                   run_eagerly=(FLAGS.mode == 'eager_fit'))
102 | 
103 |     return model, optimizer, loss, anchors, anchor_masks
104 | 
105 | 
106 | def main(_argv):
107 |     physical_devices = tf.config.experimental.list_physical_devices('GPU')
108 | 
109 |     # Setup
110 |     if FLAGS.multi_gpu:
111 |         for physical_device in physical_devices:
112 |             tf.config.experimental.set_memory_growth(physical_device, True)
113 | 
114 |         strategy = tf.distribute.MirroredStrategy()
115 |         print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
116 |         BATCH_SIZE = FLAGS.batch_size * strategy.num_replicas_in_sync
117 |         FLAGS.batch_size = BATCH_SIZE
118 | 
119 |         with strategy.scope():
120 |             model, optimizer, loss, anchors, anchor_masks = setup_model()
121 |     else:
122 |         model, optimizer, loss, anchors, anchor_masks = setup_model()
123 | 
124 |     if FLAGS.dataset:
125 |         train_dataset = dataset.load_tfrecord_dataset(
126 |             FLAGS.dataset, FLAGS.classes, FLAGS.size)
127 |     else:
128 |         train_dataset = dataset.load_fake_dataset()
129 |     train_dataset = train_dataset.shuffle(buffer_size=512)
130 |     train_dataset = train_dataset.batch(FLAGS.batch_size)
131 |     train_dataset = train_dataset.map(lambda x, y: (
132 |         dataset.transform_images(x, FLAGS.size),
133 |         dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
134 |     train_dataset = train_dataset.prefetch(
135 |         buffer_size=tf.data.experimental.AUTOTUNE)
136 | 
137 |     if FLAGS.val_dataset:
138 |         val_dataset = dataset.load_tfrecord_dataset(
139 |             FLAGS.val_dataset, FLAGS.classes, FLAGS.size)
140 |     else:
141 |         val_dataset = dataset.load_fake_dataset()
142 |     val_dataset = val_dataset.batch(FLAGS.batch_size)
143 |     val_dataset = val_dataset.map(lambda x, y: (
144 |         dataset.transform_images(x, FLAGS.size),
145 |         dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
146 | 
147 |     if FLAGS.mode == 'eager_tf':
148 |         # Eager mode is great for debugging
149 |         # Non eager graph mode is recommended for real training
150 |         avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
151 |         avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)
152 | 
153 |         for epoch in range(1, FLAGS.epochs + 1):
154 |             for batch, (images, labels) in enumerate(train_dataset):
155 |                 with tf.GradientTape() as tape:
156 |                     outputs = model(images, training=True)
157 |                     regularization_loss = tf.reduce_sum(model.losses)
158 |                     pred_loss = []
159 |                     for output, label, loss_fn in zip(outputs, labels, loss):
160 |                         pred_loss.append(loss_fn(label, output))
161 |                     total_loss = tf.reduce_sum(pred_loss) + regularization_loss
162 | 
163 |                 grads = tape.gradient(total_loss, model.trainable_variables)
164 |                 optimizer.apply_gradients(
165 |                     zip(grads, model.trainable_variables))
166 | 
167 |                 logging.info("{}_train_{}, {}, {}".format(
168 |                     epoch, batch, total_loss.numpy(),
169 |                     list(map(lambda x: np.sum(x.numpy()), pred_loss))))
170 |                 avg_loss.update_state(total_loss)
171 | 
172 |             for batch, (images, labels) in enumerate(val_dataset):
173 |                 outputs = model(images)
174 |                 regularization_loss = tf.reduce_sum(model.losses)
175 |                 pred_loss = []
176 |                 for output, label, loss_fn in zip(outputs, labels, loss):
177 |                     pred_loss.append(loss_fn(label, output))
178 |                 total_loss = tf.reduce_sum(pred_loss) + regularization_loss
179 | 
180 |                 logging.info("{}_val_{}, {}, {}".format(
181 |                     epoch, batch, total_loss.numpy(),
182 |                     list(map(lambda x: np.sum(x.numpy()), pred_loss))))
183 |                 avg_val_loss.update_state(total_loss)
184 | 
185 |             logging.info("{}, train: {}, val: {}".format(
186 |                 epoch,
187 |                 avg_loss.result().numpy(),
188 |                 avg_val_loss.result().numpy()))
189 | 
190 |             avg_loss.reset_states()
191 |             avg_val_loss.reset_states()
192 |             model.save_weights(
193 |                 'checkpoints/yolov3_train_{}.tf'.format(epoch))
194 |     else:
195 | 
196 |         callbacks = [
197 |             ReduceLROnPlateau(verbose=1),
198 |             EarlyStopping(patience=3, verbose=1),
199 |             ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
200 |                             verbose=1, save_weights_only=True),
201 |             TensorBoard(log_dir='logs')
202 |         ]
203 | 
204 |         start_time = time.time()
205 |         history = model.fit(train_dataset,
206 |                             epochs=FLAGS.epochs,
207 |                             callbacks=callbacks,
208 |                             validation_data=val_dataset)
209 |         end_time = time.time() - start_time
210 |         print(f'Total Training Time: {end_time}')
211 | 
212 | 
213 | if __name__ == '__main__':
214 |     try:
215 |         app.run(main)
216 |     except SystemExit:
217 |         pass
218 | 


--------------------------------------------------------------------------------
/yolov3_tf2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzh8829/yolov3-tf2/2784050f2fc1ff060f4c8b3ac2db231370569aa8/yolov3_tf2/__init__.py


--------------------------------------------------------------------------------
/yolov3_tf2/dataset.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from absl.flags import FLAGS
  3 | 
  4 | @tf.function
  5 | def transform_targets_for_output(y_true, grid_size, anchor_idxs):
  6 |     # y_true: (N, boxes, (x1, y1, x2, y2, class, best_anchor))
  7 |     N = tf.shape(y_true)[0]
  8 | 
  9 |     # y_true_out: (N, grid, grid, anchors, [x1, y1, x2, y2, obj, class])
 10 |     y_true_out = tf.zeros(
 11 |         (N, grid_size, grid_size, tf.shape(anchor_idxs)[0], 6))
 12 | 
 13 |     anchor_idxs = tf.cast(anchor_idxs, tf.int32)
 14 | 
 15 |     indexes = tf.TensorArray(tf.int32, 1, dynamic_size=True)
 16 |     updates = tf.TensorArray(tf.float32, 1, dynamic_size=True)
 17 |     idx = 0
 18 |     for i in tf.range(N):
 19 |         for j in tf.range(tf.shape(y_true)[1]):
 20 |             if tf.equal(y_true[i][j][2], 0):
 21 |                 continue
 22 |             anchor_eq = tf.equal(
 23 |                 anchor_idxs, tf.cast(y_true[i][j][5], tf.int32))
 24 | 
 25 |             if tf.reduce_any(anchor_eq):
 26 |                 box = y_true[i][j][0:4]
 27 |                 box_xy = (y_true[i][j][0:2] + y_true[i][j][2:4]) / 2
 28 | 
 29 |                 anchor_idx = tf.cast(tf.where(anchor_eq), tf.int32)
 30 |                 grid_xy = tf.cast(box_xy // (1/grid_size), tf.int32)
 31 | 
 32 |                 # grid[y][x][anchor] = (tx, ty, bw, bh, obj, class)
 33 |                 indexes = indexes.write(
 34 |                     idx, [i, grid_xy[1], grid_xy[0], anchor_idx[0][0]])
 35 |                 updates = updates.write(
 36 |                     idx, [box[0], box[1], box[2], box[3], 1, y_true[i][j][4]])
 37 |                 idx += 1
 38 | 
 39 |     # tf.print(indexes.stack())
 40 |     # tf.print(updates.stack())
 41 | 
 42 |     return tf.tensor_scatter_nd_update(
 43 |         y_true_out, indexes.stack(), updates.stack())
 44 | 
 45 | 
 46 | def transform_targets(y_train, anchors, anchor_masks, size):
 47 |     y_outs = []
 48 |     grid_size = size // 32
 49 | 
 50 |     # calculate anchor index for true boxes
 51 |     anchors = tf.cast(anchors, tf.float32)
 52 |     anchor_area = anchors[..., 0] * anchors[..., 1]
 53 |     box_wh = y_train[..., 2:4] - y_train[..., 0:2]
 54 |     box_wh = tf.tile(tf.expand_dims(box_wh, -2),
 55 |                      (1, 1, tf.shape(anchors)[0], 1))
 56 |     box_area = box_wh[..., 0] * box_wh[..., 1]
 57 |     intersection = tf.minimum(box_wh[..., 0], anchors[..., 0]) * \
 58 |         tf.minimum(box_wh[..., 1], anchors[..., 1])
 59 |     iou = intersection / (box_area + anchor_area - intersection)
 60 |     anchor_idx = tf.cast(tf.argmax(iou, axis=-1), tf.float32)
 61 |     anchor_idx = tf.expand_dims(anchor_idx, axis=-1)
 62 | 
 63 |     y_train = tf.concat([y_train, anchor_idx], axis=-1)
 64 | 
 65 |     for anchor_idxs in anchor_masks:
 66 |         y_outs.append(transform_targets_for_output(
 67 |             y_train, grid_size, anchor_idxs))
 68 |         grid_size *= 2
 69 | 
 70 |     return tuple(y_outs)
 71 | 
 72 | 
 73 | def transform_images(x_train, size):
 74 |     x_train = tf.image.resize(x_train, (size, size))
 75 |     x_train = x_train / 255
 76 |     return x_train
 77 | 
 78 | 
 79 | # https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md#conversion-script-outline-conversion-script-outline
 80 | # Commented out fields are not required in our project
 81 | IMAGE_FEATURE_MAP = {
 82 |     # 'image/width': tf.io.FixedLenFeature([], tf.int64),
 83 |     # 'image/height': tf.io.FixedLenFeature([], tf.int64),
 84 |     # 'image/filename': tf.io.FixedLenFeature([], tf.string),
 85 |     # 'image/source_id': tf.io.FixedLenFeature([], tf.string),
 86 |     # 'image/key/sha256': tf.io.FixedLenFeature([], tf.string),
 87 |     'image/encoded': tf.io.FixedLenFeature([], tf.string),
 88 |     # 'image/format': tf.io.FixedLenFeature([], tf.string),
 89 |     'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
 90 |     'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
 91 |     'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
 92 |     'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
 93 |     'image/object/class/text': tf.io.VarLenFeature(tf.string),
 94 |     # 'image/object/class/label': tf.io.VarLenFeature(tf.int64),
 95 |     # 'image/object/difficult': tf.io.VarLenFeature(tf.int64),
 96 |     # 'image/object/truncated': tf.io.VarLenFeature(tf.int64),
 97 |     # 'image/object/view': tf.io.VarLenFeature(tf.string),
 98 | }
 99 | 
100 | 
101 | def parse_tfrecord(tfrecord, class_table, size):
102 |     x = tf.io.parse_single_example(tfrecord, IMAGE_FEATURE_MAP)
103 |     x_train = tf.image.decode_jpeg(x['image/encoded'], channels=3)
104 |     x_train = tf.image.resize(x_train, (size, size))
105 | 
106 |     class_text = tf.sparse.to_dense(
107 |         x['image/object/class/text'], default_value='')
108 |     labels = tf.cast(class_table.lookup(class_text), tf.float32)
109 |     y_train = tf.stack([tf.sparse.to_dense(x['image/object/bbox/xmin']),
110 |                         tf.sparse.to_dense(x['image/object/bbox/ymin']),
111 |                         tf.sparse.to_dense(x['image/object/bbox/xmax']),
112 |                         tf.sparse.to_dense(x['image/object/bbox/ymax']),
113 |                         labels], axis=1)
114 | 
115 |     paddings = [[0, FLAGS.yolo_max_boxes - tf.shape(y_train)[0]], [0, 0]]
116 |     y_train = tf.pad(y_train, paddings)
117 | 
118 |     return x_train, y_train
119 | 
120 | 
121 | def load_tfrecord_dataset(file_pattern, class_file, size=416):
122 |     LINE_NUMBER = -1  # TODO: use tf.lookup.TextFileIndex.LINE_NUMBER
123 |     class_table = tf.lookup.StaticHashTable(tf.lookup.TextFileInitializer(
124 |         class_file, tf.string, 0, tf.int64, LINE_NUMBER, delimiter="\n"), -1)
125 | 
126 |     files = tf.data.Dataset.list_files(file_pattern)
127 |     dataset = files.flat_map(tf.data.TFRecordDataset)
128 |     return dataset.map(lambda x: parse_tfrecord(x, class_table, size))
129 | 
130 | 
131 | def load_fake_dataset():
132 |     x_train = tf.image.decode_jpeg(
133 |         open('./data/girl.png', 'rb').read(), channels=3)
134 |     x_train = tf.expand_dims(x_train, axis=0)
135 | 
136 |     labels = [
137 |         [0.18494931, 0.03049111, 0.9435849,  0.96302897, 0],
138 |         [0.01586703, 0.35938117, 0.17582396, 0.6069674, 56],
139 |         [0.09158827, 0.48252046, 0.26967454, 0.6403017, 67]
140 |     ] + [[0, 0, 0, 0, 0]] * 5
141 |     y_train = tf.convert_to_tensor(labels, tf.float32)
142 |     y_train = tf.expand_dims(y_train, axis=0)
143 | 
144 |     return tf.data.Dataset.from_tensor_slices((x_train, y_train))
145 | 


--------------------------------------------------------------------------------
/yolov3_tf2/models.py:
--------------------------------------------------------------------------------
  1 | from absl import flags
  2 | from absl.flags import FLAGS
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | from tensorflow.keras import Model
  6 | from tensorflow.keras.layers import (
  7 |     Add,
  8 |     Concatenate,
  9 |     Conv2D,
 10 |     Input,
 11 |     Lambda,
 12 |     LeakyReLU,
 13 |     MaxPool2D,
 14 |     UpSampling2D,
 15 |     ZeroPadding2D,
 16 |     BatchNormalization,
 17 | )
 18 | from tensorflow.keras.regularizers import l2
 19 | from tensorflow.keras.losses import (
 20 |     binary_crossentropy,
 21 |     sparse_categorical_crossentropy
 22 | )
 23 | from .utils import broadcast_iou
 24 | 
 25 | flags.DEFINE_integer('yolo_max_boxes', 100,
 26 |                      'maximum number of boxes per image')
 27 | flags.DEFINE_float('yolo_iou_threshold', 0.5, 'iou threshold')
 28 | flags.DEFINE_float('yolo_score_threshold', 0.5, 'score threshold')
 29 | 
 30 | yolo_anchors = np.array([(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
 31 |                          (59, 119), (116, 90), (156, 198), (373, 326)],
 32 |                         np.float32) / 416
 33 | yolo_anchor_masks = np.array([[6, 7, 8], [3, 4, 5], [0, 1, 2]])
 34 | 
 35 | yolo_tiny_anchors = np.array([(10, 14), (23, 27), (37, 58),
 36 |                               (81, 82), (135, 169),  (344, 319)],
 37 |                              np.float32) / 416
 38 | yolo_tiny_anchor_masks = np.array([[3, 4, 5], [0, 1, 2]])
 39 | 
 40 | 
 41 | def DarknetConv(x, filters, size, strides=1, batch_norm=True):
 42 |     if strides == 1:
 43 |         padding = 'same'
 44 |     else:
 45 |         x = ZeroPadding2D(((1, 0), (1, 0)))(x)  # top left half-padding
 46 |         padding = 'valid'
 47 |     x = Conv2D(filters=filters, kernel_size=size,
 48 |                strides=strides, padding=padding,
 49 |                use_bias=not batch_norm, kernel_regularizer=l2(0.0005))(x)
 50 |     if batch_norm:
 51 |         x = BatchNormalization()(x)
 52 |         x = LeakyReLU(alpha=0.1)(x)
 53 |     return x
 54 | 
 55 | 
 56 | def DarknetResidual(x, filters):
 57 |     prev = x
 58 |     x = DarknetConv(x, filters // 2, 1)
 59 |     x = DarknetConv(x, filters, 3)
 60 |     x = Add()([prev, x])
 61 |     return x
 62 | 
 63 | 
 64 | def DarknetBlock(x, filters, blocks):
 65 |     x = DarknetConv(x, filters, 3, strides=2)
 66 |     for _ in range(blocks):
 67 |         x = DarknetResidual(x, filters)
 68 |     return x
 69 | 
 70 | 
 71 | def Darknet(name=None):
 72 |     x = inputs = Input([None, None, 3])
 73 |     x = DarknetConv(x, 32, 3)
 74 |     x = DarknetBlock(x, 64, 1)
 75 |     x = DarknetBlock(x, 128, 2)  # skip connection
 76 |     x = x_36 = DarknetBlock(x, 256, 8)  # skip connection
 77 |     x = x_61 = DarknetBlock(x, 512, 8)
 78 |     x = DarknetBlock(x, 1024, 4)
 79 |     return tf.keras.Model(inputs, (x_36, x_61, x), name=name)
 80 | 
 81 | 
 82 | def DarknetTiny(name=None):
 83 |     x = inputs = Input([None, None, 3])
 84 |     x = DarknetConv(x, 16, 3)
 85 |     x = MaxPool2D(2, 2, 'same')(x)
 86 |     x = DarknetConv(x, 32, 3)
 87 |     x = MaxPool2D(2, 2, 'same')(x)
 88 |     x = DarknetConv(x, 64, 3)
 89 |     x = MaxPool2D(2, 2, 'same')(x)
 90 |     x = DarknetConv(x, 128, 3)
 91 |     x = MaxPool2D(2, 2, 'same')(x)
 92 |     x = x_8 = DarknetConv(x, 256, 3)  # skip connection
 93 |     x = MaxPool2D(2, 2, 'same')(x)
 94 |     x = DarknetConv(x, 512, 3)
 95 |     x = MaxPool2D(2, 1, 'same')(x)
 96 |     x = DarknetConv(x, 1024, 3)
 97 |     return tf.keras.Model(inputs, (x_8, x), name=name)
 98 | 
 99 | 
100 | def YoloConv(filters, name=None):
101 |     def yolo_conv(x_in):
102 |         if isinstance(x_in, tuple):
103 |             inputs = Input(x_in[0].shape[1:]), Input(x_in[1].shape[1:])
104 |             x, x_skip = inputs
105 | 
106 |             # concat with skip connection
107 |             x = DarknetConv(x, filters, 1)
108 |             x = UpSampling2D(2)(x)
109 |             x = Concatenate()([x, x_skip])
110 |         else:
111 |             x = inputs = Input(x_in.shape[1:])
112 | 
113 |         x = DarknetConv(x, filters, 1)
114 |         x = DarknetConv(x, filters * 2, 3)
115 |         x = DarknetConv(x, filters, 1)
116 |         x = DarknetConv(x, filters * 2, 3)
117 |         x = DarknetConv(x, filters, 1)
118 |         return Model(inputs, x, name=name)(x_in)
119 |     return yolo_conv
120 | 
121 | 
122 | def YoloConvTiny(filters, name=None):
123 |     def yolo_conv(x_in):
124 |         if isinstance(x_in, tuple):
125 |             inputs = Input(x_in[0].shape[1:]), Input(x_in[1].shape[1:])
126 |             x, x_skip = inputs
127 | 
128 |             # concat with skip connection
129 |             x = DarknetConv(x, filters, 1)
130 |             x = UpSampling2D(2)(x)
131 |             x = Concatenate()([x, x_skip])
132 |         else:
133 |             x = inputs = Input(x_in.shape[1:])
134 |             x = DarknetConv(x, filters, 1)
135 | 
136 |         return Model(inputs, x, name=name)(x_in)
137 |     return yolo_conv
138 | 
139 | 
140 | def YoloOutput(filters, anchors, classes, name=None):
141 |     def yolo_output(x_in):
142 |         x = inputs = Input(x_in.shape[1:])
143 |         x = DarknetConv(x, filters * 2, 3)
144 |         x = DarknetConv(x, anchors * (classes + 5), 1, batch_norm=False)
145 |         x = Lambda(lambda x: tf.reshape(x, (-1, tf.shape(x)[1], tf.shape(x)[2],
146 |                                             anchors, classes + 5)))(x)
147 |         return tf.keras.Model(inputs, x, name=name)(x_in)
148 |     return yolo_output
149 | 
150 | 
151 | # As tensorflow lite doesn't support tf.size used in tf.meshgrid, 
152 | # we reimplemented a simple meshgrid function that use basic tf function.
153 | def _meshgrid(n_a, n_b):
154 | 
155 |     return [
156 |         tf.reshape(tf.tile(tf.range(n_a), [n_b]), (n_b, n_a)),
157 |         tf.reshape(tf.repeat(tf.range(n_b), n_a), (n_b, n_a))
158 |     ]
159 | 
160 | 
161 | def yolo_boxes(pred, anchors, classes):
162 |     # pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...classes))
163 |     grid_size = tf.shape(pred)[1:3]
164 |     box_xy, box_wh, objectness, class_probs = tf.split(
165 |         pred, (2, 2, 1, classes), axis=-1)
166 | 
167 |     box_xy = tf.sigmoid(box_xy)
168 |     objectness = tf.sigmoid(objectness)
169 |     class_probs = tf.sigmoid(class_probs)
170 |     pred_box = tf.concat((box_xy, box_wh), axis=-1)  # original xywh for loss
171 | 
172 |     # !!! grid[x][y] == (y, x)
173 |     grid = _meshgrid(grid_size[1],grid_size[0])
174 |     grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2)  # [gx, gy, 1, 2]
175 | 
176 |     box_xy = (box_xy + tf.cast(grid, tf.float32)) / \
177 |         tf.cast(grid_size, tf.float32)
178 |     box_wh = tf.exp(box_wh) * anchors
179 | 
180 |     box_x1y1 = box_xy - box_wh / 2
181 |     box_x2y2 = box_xy + box_wh / 2
182 |     bbox = tf.concat([box_x1y1, box_x2y2], axis=-1)
183 | 
184 |     return bbox, objectness, class_probs, pred_box
185 | 
186 | 
187 | def yolo_nms(outputs, anchors, masks, classes):
188 |     # boxes, conf, type
189 |     b, c, t = [], [], []
190 | 
191 |     for o in outputs:
192 |         b.append(tf.reshape(o[0], (tf.shape(o[0])[0], -1, tf.shape(o[0])[-1])))
193 |         c.append(tf.reshape(o[1], (tf.shape(o[1])[0], -1, tf.shape(o[1])[-1])))
194 |         t.append(tf.reshape(o[2], (tf.shape(o[2])[0], -1, tf.shape(o[2])[-1])))
195 | 
196 |     bbox = tf.concat(b, axis=1)
197 |     confidence = tf.concat(c, axis=1)
198 |     class_probs = tf.concat(t, axis=1)
199 | 
200 |     # If we only have one class, do not multiply by class_prob (always 0.5)
201 |     if classes == 1:
202 |         scores = confidence
203 |     else:
204 |         scores = confidence * class_probs
205 | 
206 |     dscores = tf.squeeze(scores, axis=0)
207 |     scores = tf.reduce_max(dscores,[1])
208 |     bbox = tf.reshape(bbox,(-1,4))
209 |     classes = tf.argmax(dscores,1)
210 |     selected_indices, selected_scores = tf.image.non_max_suppression_with_scores(
211 |         boxes=bbox,
212 |         scores=scores,
213 |         max_output_size=FLAGS.yolo_max_boxes,
214 |         iou_threshold=FLAGS.yolo_iou_threshold,
215 |         score_threshold=FLAGS.yolo_score_threshold,
216 |         soft_nms_sigma=0.5
217 |     )
218 |     
219 |     num_valid_nms_boxes = tf.shape(selected_indices)[0]
220 | 
221 |     selected_indices = tf.concat([selected_indices,tf.zeros(FLAGS.yolo_max_boxes-num_valid_nms_boxes, tf.int32)], 0)
222 |     selected_scores = tf.concat([selected_scores,tf.zeros(FLAGS.yolo_max_boxes-num_valid_nms_boxes,tf.float32)], -1)
223 | 
224 |     boxes=tf.gather(bbox, selected_indices)
225 |     boxes = tf.expand_dims(boxes, axis=0)
226 |     scores=selected_scores
227 |     scores = tf.expand_dims(scores, axis=0)
228 |     classes = tf.gather(classes,selected_indices)
229 |     classes = tf.expand_dims(classes, axis=0)
230 |     valid_detections=num_valid_nms_boxes
231 |     valid_detections = tf.expand_dims(valid_detections, axis=0)
232 | 
233 |     return boxes, scores, classes, valid_detections
234 | 
235 | 
236 | def YoloV3(size=None, channels=3, anchors=yolo_anchors,
237 |            masks=yolo_anchor_masks, classes=80, training=False):
238 |     x = inputs = Input([size, size, channels], name='input')
239 | 
240 |     x_36, x_61, x = Darknet(name='yolo_darknet')(x)
241 | 
242 |     x = YoloConv(512, name='yolo_conv_0')(x)
243 |     output_0 = YoloOutput(512, len(masks[0]), classes, name='yolo_output_0')(x)
244 | 
245 |     x = YoloConv(256, name='yolo_conv_1')((x, x_61))
246 |     output_1 = YoloOutput(256, len(masks[1]), classes, name='yolo_output_1')(x)
247 | 
248 |     x = YoloConv(128, name='yolo_conv_2')((x, x_36))
249 |     output_2 = YoloOutput(128, len(masks[2]), classes, name='yolo_output_2')(x)
250 | 
251 |     if training:
252 |         return Model(inputs, (output_0, output_1, output_2), name='yolov3')
253 | 
254 |     boxes_0 = Lambda(lambda x: yolo_boxes(x, anchors[masks[0]], classes),
255 |                      name='yolo_boxes_0')(output_0)
256 |     boxes_1 = Lambda(lambda x: yolo_boxes(x, anchors[masks[1]], classes),
257 |                      name='yolo_boxes_1')(output_1)
258 |     boxes_2 = Lambda(lambda x: yolo_boxes(x, anchors[masks[2]], classes),
259 |                      name='yolo_boxes_2')(output_2)
260 | 
261 |     outputs = Lambda(lambda x: yolo_nms(x, anchors, masks, classes),
262 |                      name='yolo_nms')((boxes_0[:3], boxes_1[:3], boxes_2[:3]))
263 | 
264 |     return Model(inputs, outputs, name='yolov3')
265 | 
266 | 
267 | def YoloV3Tiny(size=None, channels=3, anchors=yolo_tiny_anchors,
268 |                masks=yolo_tiny_anchor_masks, classes=80, training=False):
269 |     x = inputs = Input([size, size, channels], name='input')
270 | 
271 |     x_8, x = DarknetTiny(name='yolo_darknet')(x)
272 | 
273 |     x = YoloConvTiny(256, name='yolo_conv_0')(x)
274 |     output_0 = YoloOutput(256, len(masks[0]), classes, name='yolo_output_0')(x)
275 | 
276 |     x = YoloConvTiny(128, name='yolo_conv_1')((x, x_8))
277 |     output_1 = YoloOutput(128, len(masks[1]), classes, name='yolo_output_1')(x)
278 | 
279 |     if training:
280 |         return Model(inputs, (output_0, output_1), name='yolov3')
281 | 
282 |     boxes_0 = Lambda(lambda x: yolo_boxes(x, anchors[masks[0]], classes),
283 |                      name='yolo_boxes_0')(output_0)
284 |     boxes_1 = Lambda(lambda x: yolo_boxes(x, anchors[masks[1]], classes),
285 |                      name='yolo_boxes_1')(output_1)
286 |     outputs = Lambda(lambda x: yolo_nms(x, anchors, masks, classes),
287 |                      name='yolo_nms')((boxes_0[:3], boxes_1[:3]))
288 |     return Model(inputs, outputs, name='yolov3_tiny')
289 | 
290 | 
291 | def YoloLoss(anchors, classes=80, ignore_thresh=0.5):
292 |     def yolo_loss(y_true, y_pred):
293 |         # 1. transform all pred outputs
294 |         # y_pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...cls))
295 |         pred_box, pred_obj, pred_class, pred_xywh = yolo_boxes(
296 |             y_pred, anchors, classes)
297 |         pred_xy = pred_xywh[..., 0:2]
298 |         pred_wh = pred_xywh[..., 2:4]
299 | 
300 |         # 2. transform all true outputs
301 |         # y_true: (batch_size, grid, grid, anchors, (x1, y1, x2, y2, obj, cls))
302 |         true_box, true_obj, true_class_idx = tf.split(
303 |             y_true, (4, 1, 1), axis=-1)
304 |         true_xy = (true_box[..., 0:2] + true_box[..., 2:4]) / 2
305 |         true_wh = true_box[..., 2:4] - true_box[..., 0:2]
306 | 
307 |         # give higher weights to small boxes
308 |         box_loss_scale = 2 - true_wh[..., 0] * true_wh[..., 1]
309 | 
310 |         # 3. inverting the pred box equations
311 |         grid_size = tf.shape(y_true)[1]
312 |         grid = tf.meshgrid(tf.range(grid_size), tf.range(grid_size))
313 |         grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2)
314 |         true_xy = true_xy * tf.cast(grid_size, tf.float32) - \
315 |             tf.cast(grid, tf.float32)
316 |         true_wh = tf.math.log(true_wh / anchors)
317 |         true_wh = tf.where(tf.math.is_inf(true_wh),
318 |                            tf.zeros_like(true_wh), true_wh)
319 | 
320 |         # 4. calculate all masks
321 |         obj_mask = tf.squeeze(true_obj, -1)
322 |         # ignore false positive when iou is over threshold
323 |         best_iou = tf.map_fn(
324 |             lambda x: tf.reduce_max(broadcast_iou(x[0], tf.boolean_mask(
325 |                 x[1], tf.cast(x[2], tf.bool))), axis=-1),
326 |             (pred_box, true_box, obj_mask),
327 |             tf.float32)
328 |         ignore_mask = tf.cast(best_iou < ignore_thresh, tf.float32)
329 | 
330 |         # 5. calculate all losses
331 |         xy_loss = obj_mask * box_loss_scale * \
332 |             tf.reduce_sum(tf.square(true_xy - pred_xy), axis=-1)
333 |         wh_loss = obj_mask * box_loss_scale * \
334 |             tf.reduce_sum(tf.square(true_wh - pred_wh), axis=-1)
335 |         obj_loss = binary_crossentropy(true_obj, pred_obj)
336 |         obj_loss = obj_mask * obj_loss + \
337 |             (1 - obj_mask) * ignore_mask * obj_loss
338 |         # TODO: use binary_crossentropy instead
339 |         class_loss = obj_mask * sparse_categorical_crossentropy(
340 |             true_class_idx, pred_class)
341 | 
342 |         # 6. sum over (batch, gridx, gridy, anchors) => (batch, 1)
343 |         xy_loss = tf.reduce_sum(xy_loss, axis=(1, 2, 3))
344 |         wh_loss = tf.reduce_sum(wh_loss, axis=(1, 2, 3))
345 |         obj_loss = tf.reduce_sum(obj_loss, axis=(1, 2, 3))
346 |         class_loss = tf.reduce_sum(class_loss, axis=(1, 2, 3))
347 | 
348 |         return xy_loss + wh_loss + obj_loss + class_loss
349 |     return yolo_loss
350 | 


--------------------------------------------------------------------------------
/yolov3_tf2/utils.py:
--------------------------------------------------------------------------------
  1 | from absl import logging
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | import cv2
  5 | 
  6 | YOLOV3_LAYER_LIST = [
  7 |     'yolo_darknet',
  8 |     'yolo_conv_0',
  9 |     'yolo_output_0',
 10 |     'yolo_conv_1',
 11 |     'yolo_output_1',
 12 |     'yolo_conv_2',
 13 |     'yolo_output_2',
 14 | ]
 15 | 
 16 | YOLOV3_TINY_LAYER_LIST = [
 17 |     'yolo_darknet',
 18 |     'yolo_conv_0',
 19 |     'yolo_output_0',
 20 |     'yolo_conv_1',
 21 |     'yolo_output_1',
 22 | ]
 23 | 
 24 | 
 25 | def load_darknet_weights(model, weights_file, tiny=False):
 26 |     wf = open(weights_file, 'rb')
 27 |     major, minor, revision, seen, _ = np.fromfile(wf, dtype=np.int32, count=5)
 28 | 
 29 |     if tiny:
 30 |         layers = YOLOV3_TINY_LAYER_LIST
 31 |     else:
 32 |         layers = YOLOV3_LAYER_LIST
 33 | 
 34 |     for layer_name in layers:
 35 |         sub_model = model.get_layer(layer_name)
 36 |         for i, layer in enumerate(sub_model.layers):
 37 |             if not layer.name.startswith('conv2d'):
 38 |                 continue
 39 |             batch_norm = None
 40 |             if i + 1 < len(sub_model.layers) and \
 41 |                     sub_model.layers[i + 1].name.startswith('batch_norm'):
 42 |                 batch_norm = sub_model.layers[i + 1]
 43 | 
 44 |             logging.info("{}/{} {}".format(
 45 |                 sub_model.name, layer.name, 'bn' if batch_norm else 'bias'))
 46 | 
 47 |             filters = layer.filters
 48 |             size = layer.kernel_size[0]
 49 |             in_dim = layer.get_input_shape_at(0)[-1]
 50 | 
 51 |             if batch_norm is None:
 52 |                 conv_bias = np.fromfile(wf, dtype=np.float32, count=filters)
 53 |             else:
 54 |                 # darknet [beta, gamma, mean, variance]
 55 |                 bn_weights = np.fromfile(
 56 |                     wf, dtype=np.float32, count=4 * filters)
 57 |                 # tf [gamma, beta, mean, variance]
 58 |                 bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]]
 59 | 
 60 |             # darknet shape (out_dim, in_dim, height, width)
 61 |             conv_shape = (filters, in_dim, size, size)
 62 |             conv_weights = np.fromfile(
 63 |                 wf, dtype=np.float32, count=np.product(conv_shape))
 64 |             # tf shape (height, width, in_dim, out_dim)
 65 |             conv_weights = conv_weights.reshape(
 66 |                 conv_shape).transpose([2, 3, 1, 0])
 67 | 
 68 |             if batch_norm is None:
 69 |                 layer.set_weights([conv_weights, conv_bias])
 70 |             else:
 71 |                 layer.set_weights([conv_weights])
 72 |                 batch_norm.set_weights(bn_weights)
 73 | 
 74 |     assert len(wf.read()) == 0, 'failed to read all data'
 75 |     wf.close()
 76 | 
 77 | 
 78 | def broadcast_iou(box_1, box_2):
 79 |     # box_1: (..., (x1, y1, x2, y2))
 80 |     # box_2: (N, (x1, y1, x2, y2))
 81 | 
 82 |     # broadcast boxes
 83 |     box_1 = tf.expand_dims(box_1, -2)
 84 |     box_2 = tf.expand_dims(box_2, 0)
 85 |     # new_shape: (..., N, (x1, y1, x2, y2))
 86 |     new_shape = tf.broadcast_dynamic_shape(tf.shape(box_1), tf.shape(box_2))
 87 |     box_1 = tf.broadcast_to(box_1, new_shape)
 88 |     box_2 = tf.broadcast_to(box_2, new_shape)
 89 | 
 90 |     int_w = tf.maximum(tf.minimum(box_1[..., 2], box_2[..., 2]) -
 91 |                        tf.maximum(box_1[..., 0], box_2[..., 0]), 0)
 92 |     int_h = tf.maximum(tf.minimum(box_1[..., 3], box_2[..., 3]) -
 93 |                        tf.maximum(box_1[..., 1], box_2[..., 1]), 0)
 94 |     int_area = int_w * int_h
 95 |     box_1_area = (box_1[..., 2] - box_1[..., 0]) * \
 96 |         (box_1[..., 3] - box_1[..., 1])
 97 |     box_2_area = (box_2[..., 2] - box_2[..., 0]) * \
 98 |         (box_2[..., 3] - box_2[..., 1])
 99 |     return int_area / (box_1_area + box_2_area - int_area)
100 | 
101 | 
102 | def draw_outputs(img, outputs, class_names):
103 |     boxes, objectness, classes, nums = outputs
104 |     boxes, objectness, classes, nums = boxes[0], objectness[0], classes[0], nums[0]
105 |     wh = np.flip(img.shape[0:2])
106 |     for i in range(nums):
107 |         x1y1 = tuple((np.array(boxes[i][0:2]) * wh).astype(np.int32))
108 |         x2y2 = tuple((np.array(boxes[i][2:4]) * wh).astype(np.int32))
109 |         img = cv2.rectangle(img, x1y1, x2y2, (255, 0, 0), 2)
110 |         img = cv2.putText(img, '{} {:.4f}'.format(
111 |             class_names[int(classes[i])], objectness[i]),
112 |             x1y1, cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
113 |     return img
114 | 
115 | 
116 | def draw_labels(x, y, class_names):
117 |     img = x.numpy()
118 |     boxes, classes = tf.split(y, (4, 1), axis=-1)
119 |     classes = classes[..., 0]
120 |     wh = np.flip(img.shape[0:2])
121 |     for i in range(len(boxes)):
122 |         x1y1 = tuple((np.array(boxes[i][0:2]) * wh).astype(np.int32))
123 |         x2y2 = tuple((np.array(boxes[i][2:4]) * wh).astype(np.int32))
124 |         img = cv2.rectangle(img, x1y1, x2y2, (255, 0, 0), 2)
125 |         img = cv2.putText(img, class_names[classes[i]],
126 |                           x1y1, cv2.FONT_HERSHEY_COMPLEX_SMALL,
127 |                           1, (0, 0, 255), 2)
128 |     return img
129 | 
130 | 
131 | def freeze_all(model, frozen=True):
132 |     model.trainable = not frozen
133 |     if isinstance(model, tf.keras.Model):
134 |         for l in model.layers:
135 |             freeze_all(l, frozen)
136 | 


--------------------------------------------------------------------------------