├── .DS_Store
├── .gitignore
├── __init__.py
├── callbacks
    ├── __init__.py
    └── model_checkpoint.py
├── configs
    ├── ssd300_mobilenetv1.json
    ├── ssd300_mobilenetv1_coco2017-train.json
    ├── ssd300_mobilenetv2.json
    ├── ssd300_mobilenetv2_coco2017-train.json
    ├── ssd300_vgg16.json
    ├── ssd300_vgg16_pascal-voc-07-12.json
    ├── ssd300_vgg16_pascal-voc-2007.json
    ├── ssd320_mobilenetv2_coco2017-train.json
    ├── tbpp384_vgg16.json
    └── tbpp768_vgg16.json
├── convert.py
├── custom_layers
    ├── __init__.py
    ├── decode_ssd_predictions.py
    ├── decode_tbpp_predictions.py
    ├── default_boxes.py
    └── l2_normalization.py
├── data_generators
    ├── __init__.py
    ├── ssd_data_generator.py
    └── tbpp_data_generator.py
├── display_default_boxes.py
├── evaluate.py
├── evaluate.sh
├── inference.py
├── inference.sh
├── losses
    ├── __init__.py
    ├── smooth_l1_loss.py
    ├── softmax_loss.py
    ├── ssd_loss.py
    └── tbpp_loss.py
├── networks
    ├── __init__.py
    ├── base_networks
    │   ├── __init__.py
    │   └── truncated_vgg16.py
    ├── ssd_mobilenet.py
    ├── ssd_mobilenetv2.py
    ├── ssd_vgg16.py
    └── tbpp_vgg16.py
├── playground.py
├── requirements.txt
├── test.py
├── test.sh
├── train.py
├── train.sh
├── utils
    ├── __init__.py
    ├── augmentation_utils
    │   ├── __init__.py
    │   ├── bboxes_filter.py
    │   ├── random_brightness.py
    │   ├── random_contrast.py
    │   ├── random_crop.py
    │   ├── random_crop_quad.py
    │   ├── random_expand.py
    │   ├── random_expand_quad.py
    │   ├── random_horizontal_flip.py
    │   ├── random_horizontal_flip_quad.py
    │   ├── random_hue.py
    │   ├── random_lighting_noise.py
    │   ├── random_saturation.py
    │   ├── random_vertical_flip.py
    │   ├── random_vertical_flip_quad.py
    │   └── resize_to_fixed_size.py
    ├── bbox_utils
    │   ├── __init__.py
    │   ├── center_to_corner.py
    │   ├── center_to_vertices.py
    │   ├── corner_to_center.py
    │   ├── iou.py
    │   └── object_coverage.py
    ├── command_line_utils
    │   ├── __init__.py
    │   └── str2bool.py
    ├── data_utils
    │   ├── __init__.py
    │   ├── coco_text.py
    │   └── get_samples_from_split.py
    ├── display_tbpp_data_sample.py
    ├── inference_utils
    │   ├── __init__.py
    │   ├── ssd_mobilenetv1.py
    │   ├── ssd_mobilenetv2.py
    │   ├── ssd_vgg16.py
    │   └── tbpp_vgg16.py
    ├── one_hot_class_label.py
    ├── pascal_voc_utils
    │   ├── __init__.py
    │   └── read_label.py
    ├── prepare_coco_dataset.py
    ├── prepare_cocotextv2_dataset.py
    ├── prepare_icdar-2013_dataset.py
    ├── prepare_icdar-2015_dataset.py
    ├── prepare_midv500_dataset.py
    ├── prepare_pascal-voc-2007-2012_dataset.py
    ├── prepare_pascal_voc_2007_dataset.py
    ├── prepare_pascal_voc_2012_dataset.py
    ├── prepare_svt_dataset.py
    ├── prepare_synthtext_dataset.py
    ├── ssd_utils
    │   ├── __init__.py
    │   ├── decode_predictions.py
    │   ├── encode_bboxes.py
    │   ├── generate_default_boxes_for_feature_map.py
    │   ├── get_number_default_boxes.py
    │   ├── match_gt_boxes_to_default_boxes.py
    │   └── read_sample.py
    ├── textboxes_utils
    │   ├── __init__.py
    │   ├── decode_predictions.py
    │   ├── encode_textboxes.py
    │   ├── get_bboxes_from_quads.py
    │   ├── get_num_quads.py
    │   ├── get_samples.py
    │   ├── read_sample.py
    │   └── sort_quads_vertices.py
    ├── training_utils
    │   ├── __init__.py
    │   ├── ssd_mobilenetv1.py
    │   ├── ssd_mobilenetv2.py
    │   ├── ssd_vgg16.py
    │   └── tbpp_vgg16.py
    └── visualize_training_metrics.py
└── webcam.py


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Socret360/object-detection-in-keras/0b582a46f41623a1e1166c1e2b050f55788c3003/.DS_Store


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Dataset
 10 | data/
 11 | 
 12 | # VSCode
 13 | .vscode/
 14 | 
 15 | # Distribution / packaging
 16 | .Python
 17 | env/
 18 | build/
 19 | develop-eggs/
 20 | dist/
 21 | downloads/
 22 | eggs/
 23 | .eggs/
 24 | lib/
 25 | lib64/
 26 | parts/
 27 | sdist/
 28 | var/
 29 | wheels/
 30 | *.egg-info/
 31 | .installed.cfg
 32 | *.egg
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | .hypothesis/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | 
 63 | # Flask stuff:
 64 | instance/
 65 | .webassets-cache
 66 | 
 67 | # Scrapy stuff:
 68 | .scrapy
 69 | 
 70 | # Sphinx documentation
 71 | docs/_build/
 72 | 
 73 | # PyBuilder
 74 | target/
 75 | 
 76 | # Jupyter Notebook
 77 | .ipynb_checkpoints
 78 | 
 79 | # pyenv
 80 | .python-version
 81 | 
 82 | # celery beat schedule file
 83 | celerybeat-schedule
 84 | 
 85 | # SageMath parsed files
 86 | *.sage.py
 87 | 
 88 | # dotenv
 89 | .env
 90 | 
 91 | # virtualenv
 92 | .venv
 93 | venv/
 94 | ENV/
 95 | 
 96 | # Spyder project settings
 97 | .spyderproject
 98 | .spyproject
 99 | 
100 | # Rope project settings
101 | .ropeproject
102 | 
103 | # mkdocs documentation
104 | /site
105 | 
106 | # mypy
107 | .mypy_cache/
108 | 
109 | # experiments data
110 | experiments/
111 | 
112 | #datasets
113 | datasets/
114 | 
115 | .idea/
116 | 
117 | temp.py
118 | 
119 | output/
120 | data/
121 | base_networks/pretrained_weights
122 | sample_data
123 | experiment.py
124 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Socret360/object-detection-in-keras/0b582a46f41623a1e1166c1e2b050f55788c3003/__init__.py


--------------------------------------------------------------------------------
/callbacks/__init__.py:
--------------------------------------------------------------------------------
1 | from .model_checkpoint import ModelCheckpoint
2 | 


--------------------------------------------------------------------------------
/callbacks/model_checkpoint.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from matplotlib import pyplot as plt
 3 | from tensorflow.keras.callbacks import Callback
 4 | 
 5 | 
 6 | class ModelCheckpoint(Callback):
 7 |     """ A callback to save a model checkpoint every n batches (iterations) or n epochs.
 8 | 
 9 |     Args:
10 |         - output_dir: Path to output directory in which to save the checkpoint.
11 |     """
12 | 
13 |     def __init__(self, output_dir, epoch_frequency, iteration_frequency):
14 |         self.output_dir = output_dir
15 |         self.iteration_frequency = iteration_frequency
16 |         self.epoch_frequency = epoch_frequency
17 |         self.iterations = 1
18 |         self.epochs = 1
19 |         self.losses_by_iteration = []
20 |         self.losses_by_epoch = []
21 | 
22 |     def on_epoch_end(self, epoch, logs={}):
23 |         print(logs)
24 |         if self.epoch_frequency is not None:
25 |             loss = logs["loss"]
26 |             self.losses_by_epoch.append(loss)
27 |             if self.epochs % self.epoch_frequency == 0:
28 |                 loss = '%.4f' % loss
29 |                 name = f"cp_ep_{self.epochs}_loss_{loss}.h5"
30 |                 self.model.save_weights(os.path.join(self.output_dir, name))
31 |                 plt.plot(list(range(1, self.epochs+1)), self.losses_by_epoch)
32 |                 plt.title('training loss')
33 |                 plt.ylabel('loss')
34 |                 plt.xlabel('epoch')
35 |                 plt.savefig(os.path.join(self.output_dir, "log.png"))
36 |         self.epochs += 1
37 | 


--------------------------------------------------------------------------------
/configs/ssd300_mobilenetv1.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "model": {
  3 |         "name": "ssd_mobilenetv1",
  4 |         "input_size": 300,
  5 |         "l2_regularization": 0.0005,
  6 |         "kernel_initializer": "he_normal",
  7 |         "width_multiplier": 1,
  8 |         "depth_multiplier": 1,
  9 |         "default_boxes": {
 10 |             "extra_box_for_ar_1": true,
 11 |             "clip_boxes": true,
 12 |             "variances": [
 13 |                 0.1,
 14 |                 0.1,
 15 |                 0.2,
 16 |                 0.2
 17 |             ],
 18 |             "min_scale": 0.2,
 19 |             "max_scale": 0.9,
 20 |             "layers": [
 21 |                 {
 22 |                     "name": "conv_pw_11_relu",
 23 |                     "size": 18,
 24 |                     "offset": [
 25 |                         0.5,
 26 |                         0.5
 27 |                     ],
 28 |                     "aspect_ratios": [
 29 |                         2.0,
 30 |                         3.0
 31 |                     ]
 32 |                 },
 33 |                 {
 34 |                     "name": "conv_pw_13_relu",
 35 |                     "size": 9,
 36 |                     "offset": [
 37 |                         0.5,
 38 |                         0.5
 39 |                     ],
 40 |                     "aspect_ratios": [
 41 |                         2.0,
 42 |                         3.0
 43 |                     ]
 44 |                 },
 45 |                 {
 46 |                     "name": "conv14_2/relu",
 47 |                     "size": 5,
 48 |                     "offset": [
 49 |                         0.5,
 50 |                         0.5
 51 |                     ],
 52 |                     "aspect_ratios": [
 53 |                         2.0,
 54 |                         3.0
 55 |                     ]
 56 |                 },
 57 |                 {
 58 |                     "name": "conv15_2/relu",
 59 |                     "size": 3,
 60 |                     "offset": [
 61 |                         0.5,
 62 |                         0.5
 63 |                     ],
 64 |                     "aspect_ratios": [
 65 |                         2.0,
 66 |                         3.0
 67 |                     ]
 68 |                 },
 69 |                 {
 70 |                     "name": "conv16_2/relu",
 71 |                     "size": 2,
 72 |                     "offset": [
 73 |                         0.5,
 74 |                         0.5
 75 |                     ],
 76 |                     "aspect_ratios": [
 77 |                         2.0,
 78 |                         3.0
 79 |                     ]
 80 |                 },
 81 |                 {
 82 |                     "name": "conv17_2/relu",
 83 |                     "size": 1,
 84 |                     "offset": [
 85 |                         0.5,
 86 |                         0.5
 87 |                     ],
 88 |                     "aspect_ratios": [
 89 |                         2.0,
 90 |                         3.0
 91 |                     ]
 92 |                 }
 93 |             ]
 94 |         }
 95 |     },
 96 |     "training": {
 97 |         "match_threshold": 0.5,
 98 |         "neutral_threshold": 0.3,
 99 |         "min_negative_boxes": 0,
100 |         "negative_boxes_ratio": 3,
101 |         "alpha": 1
102 |     }
103 | }
104 | 


--------------------------------------------------------------------------------
/configs/ssd300_mobilenetv1_coco2017-train.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "model": {
  3 |         "name": "ssd300_mobilenetv1",
  4 |         "input_size": 300,
  5 |         "l2_regularization": 0.0005,
  6 |         "kernel_initializer": "he_normal",
  7 |         "width_multiplier": 1,
  8 |         "depth_multiplier": 1,
  9 |         "default_boxes": {
 10 |             "extra_box_for_ar_1": true,
 11 |             "clip_boxes": true,
 12 |             "variances": [
 13 |                 0.1,
 14 |                 0.1,
 15 |                 0.2,
 16 |                 0.2
 17 |             ],
 18 |             "min_scale": 0.15,
 19 |             "max_scale": 0.9,
 20 |             "layers": [
 21 |                 {
 22 |                     "name": "conv_pw_11_relu",
 23 |                     "size": 18,
 24 |                     "offset": [
 25 |                         0.5,
 26 |                         0.5
 27 |                     ],
 28 |                     "aspect_ratios": [
 29 |                         1.0,
 30 |                         2.0,
 31 |                         0.5,
 32 |                         3.0,
 33 |                         0.33
 34 |                     ]
 35 |                 },
 36 |                 {
 37 |                     "name": "conv_pw_13_relu",
 38 |                     "size": 9,
 39 |                     "offset": [
 40 |                         0.5,
 41 |                         0.5
 42 |                     ],
 43 |                     "aspect_ratios": [
 44 |                         1.0,
 45 |                         2.0,
 46 |                         0.5,
 47 |                         3.0,
 48 |                         0.33
 49 |                     ]
 50 |                 },
 51 |                 {
 52 |                     "name": "conv14_2/relu",
 53 |                     "size": 5,
 54 |                     "offset": [
 55 |                         0.5,
 56 |                         0.5
 57 |                     ],
 58 |                     "aspect_ratios": [
 59 |                         1.0,
 60 |                         2.0,
 61 |                         0.5,
 62 |                         3.0,
 63 |                         0.33
 64 |                     ]
 65 |                 },
 66 |                 {
 67 |                     "name": "conv15_2/relu",
 68 |                     "size": 3,
 69 |                     "offset": [
 70 |                         0.5,
 71 |                         0.5
 72 |                     ],
 73 |                     "aspect_ratios": [
 74 |                         1.0,
 75 |                         2.0,
 76 |                         0.5
 77 |                     ]
 78 |                 },
 79 |                 {
 80 |                     "name": "conv16_2/relu",
 81 |                     "size": 2,
 82 |                     "offset": [
 83 |                         0.5,
 84 |                         0.5
 85 |                     ],
 86 |                     "aspect_ratios": [
 87 |                         1.0,
 88 |                         2.0,
 89 |                         0.5
 90 |                     ]
 91 |                 },
 92 |                 {
 93 |                     "name": "conv17_2/relu",
 94 |                     "size": 1,
 95 |                     "offset": [
 96 |                         0.5,
 97 |                         0.5
 98 |                     ],
 99 |                     "aspect_ratios": [
100 |                         1.0,
101 |                         2.0,
102 |                         0.5
103 |                     ]
104 |                 }
105 |             ]
106 |         }
107 |     },
108 |     "training": {
109 |         "match_threshold": 0.5,
110 |         "neutral_threshold": 0.3,
111 |         "min_negative_boxes": 0,
112 |         "negative_boxes_ratio": 3,
113 |         "alpha": 1
114 |     }
115 | }


--------------------------------------------------------------------------------
/configs/ssd300_mobilenetv2.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "model": {
  3 |         "name": "ssd_mobilenetv2",
  4 |         "input_size": 300,
  5 |         "l2_regularization": 0.0005,
  6 |         "kernel_initializer": "he_normal",
  7 |         "width_multiplier": 0.5,
  8 |         "default_boxes": {
  9 |             "extra_box_for_ar_1": true,
 10 |             "clip_boxes": true,
 11 |             "variances": [
 12 |                 0.1,
 13 |                 0.1,
 14 |                 0.2,
 15 |                 0.2
 16 |             ],
 17 |             "min_scale": 0.2,
 18 |             "max_scale": 0.9,
 19 |             "layers": [
 20 |                 {
 21 |                     "name": "block_13_expand_relu",
 22 |                     "size": 19,
 23 |                     "offset": [
 24 |                         0.5,
 25 |                         0.5
 26 |                     ],
 27 |                     "aspect_ratios": [
 28 |                         1.0,
 29 |                         2.0,
 30 |                         0.5,
 31 |                         3.0,
 32 |                         0.33
 33 |                     ]
 34 |                 },
 35 |                 {
 36 |                     "name": "block_16_project_BN",
 37 |                     "size": 10,
 38 |                     "offset": [
 39 |                         0.5,
 40 |                         0.5
 41 |                     ],
 42 |                     "aspect_ratios": [
 43 |                         1.0,
 44 |                         2.0,
 45 |                         0.5,
 46 |                         3.0,
 47 |                         0.33
 48 |                     ]
 49 |                 },
 50 |                 {
 51 |                     "name": "conv17_2/relu",
 52 |                     "size": 5,
 53 |                     "offset": [
 54 |                         0.5,
 55 |                         0.5
 56 |                     ],
 57 |                     "aspect_ratios": [
 58 |                         1.0,
 59 |                         2.0,
 60 |                         0.5,
 61 |                         3.0,
 62 |                         0.33
 63 |                     ]
 64 |                 },
 65 |                 {
 66 |                     "name": "conv18_2/relu",
 67 |                     "size": 3,
 68 |                     "offset": [
 69 |                         0.5,
 70 |                         0.5
 71 |                     ],
 72 |                     "aspect_ratios": [
 73 |                         1.0,
 74 |                         2.0,
 75 |                         0.5,
 76 |                         3.0,
 77 |                         0.33
 78 |                     ]
 79 |                 },
 80 |                 {
 81 |                     "name": "conv19_2/relu",
 82 |                     "size": 2,
 83 |                     "offset": [
 84 |                         0.5,
 85 |                         0.5
 86 |                     ],
 87 |                     "aspect_ratios": [
 88 |                         1.0,
 89 |                         2.0,
 90 |                         0.5,
 91 |                         3.0,
 92 |                         0.33
 93 |                     ]
 94 |                 },
 95 |                 {
 96 |                     "name": "conv20_2/relu",
 97 |                     "size": 1,
 98 |                     "offset": [
 99 |                         0.5,
100 |                         0.5
101 |                     ],
102 |                     "aspect_ratios": [
103 |                         1.0,
104 |                         2.0,
105 |                         0.5,
106 |                         3.0,
107 |                         0.33
108 |                     ]
109 |                 }
110 |             ]
111 |         }
112 |     },
113 |     "training": {
114 |         "match_threshold": 0.5,
115 |         "neutral_threshold": 0.3,
116 |         "min_negative_boxes": 0,
117 |         "negative_boxes_ratio": 3,
118 |         "alpha": 1
119 |     }
120 | }


--------------------------------------------------------------------------------
/configs/ssd300_mobilenetv2_coco2017-train.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "model": {
  3 |         "name": "ssd_mobilenetv2",
  4 |         "input_size": 300,
  5 |         "l2_regularization": 0.0005,
  6 |         "kernel_initializer": "he_normal",
  7 |         "width_multiplier": 0.5,
  8 |         "default_boxes": {
  9 |             "extra_box_for_ar_1": true,
 10 |             "clip_boxes": true,
 11 |             "variances": [
 12 |                 0.1,
 13 |                 0.1,
 14 |                 0.2,
 15 |                 0.2
 16 |             ],
 17 |             "min_scale": 0.15,
 18 |             "max_scale": 0.9,
 19 |             "layers": [
 20 |                 {
 21 |                     "name": "block_13_expand_relu",
 22 |                     "size": 19,
 23 |                     "offset": [
 24 |                         0.5,
 25 |                         0.5
 26 |                     ],
 27 |                     "aspect_ratios": [
 28 |                         1.0,
 29 |                         2.0,
 30 |                         0.5,
 31 |                         3.0,
 32 |                         0.33
 33 |                     ]
 34 |                 },
 35 |                 {
 36 |                     "name": "block_16_project_BN",
 37 |                     "size": 10,
 38 |                     "offset": [
 39 |                         0.5,
 40 |                         0.5
 41 |                     ],
 42 |                     "aspect_ratios": [
 43 |                         1.0,
 44 |                         2.0,
 45 |                         0.5,
 46 |                         3.0,
 47 |                         0.33
 48 |                     ]
 49 |                 },
 50 |                 {
 51 |                     "name": "conv17_2/relu",
 52 |                     "size": 5,
 53 |                     "offset": [
 54 |                         0.5,
 55 |                         0.5
 56 |                     ],
 57 |                     "aspect_ratios": [
 58 |                         1.0,
 59 |                         2.0,
 60 |                         0.5,
 61 |                         3.0,
 62 |                         0.33
 63 |                     ]
 64 |                 },
 65 |                 {
 66 |                     "name": "conv18_2/relu",
 67 |                     "size": 3,
 68 |                     "offset": [
 69 |                         0.5,
 70 |                         0.5
 71 |                     ],
 72 |                     "aspect_ratios": [
 73 |                         1.0,
 74 |                         2.0,
 75 |                         0.5,
 76 |                         3.0,
 77 |                         0.33
 78 |                     ]
 79 |                 },
 80 |                 {
 81 |                     "name": "conv19_2/relu",
 82 |                     "size": 2,
 83 |                     "offset": [
 84 |                         0.5,
 85 |                         0.5
 86 |                     ],
 87 |                     "aspect_ratios": [
 88 |                         1.0,
 89 |                         2.0,
 90 |                         0.5,
 91 |                         3.0,
 92 |                         0.33
 93 |                     ]
 94 |                 },
 95 |                 {
 96 |                     "name": "conv20_2/relu",
 97 |                     "size": 1,
 98 |                     "offset": [
 99 |                         0.5,
100 |                         0.5
101 |                     ],
102 |                     "aspect_ratios": [
103 |                         1.0,
104 |                         2.0,
105 |                         0.5,
106 |                         3.0,
107 |                         0.33
108 |                     ]
109 |                 }
110 |             ]
111 |         }
112 |     },
113 |     "training": {
114 |         "match_threshold": 0.5,
115 |         "neutral_threshold": 0.3,
116 |         "min_negative_boxes": 0,
117 |         "negative_boxes_ratio": 3,
118 |         "alpha": 1
119 |     }
120 | }


--------------------------------------------------------------------------------
/configs/ssd300_vgg16.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "model": {
  3 |         "name": "ssd_vgg16",
  4 |         "input_size": 300,
  5 |         "l2_regularization": 0.0005,
  6 |         "kernel_initializer": "he_normal",
  7 |         "default_boxes": {
  8 |             "extra_box_for_ar_1": true,
  9 |             "clip_boxes": false,
 10 |             "variances": [
 11 |                 0.1,
 12 |                 0.1,
 13 |                 0.2,
 14 |                 0.2
 15 |             ],
 16 |             "min_scale": 0.2,
 17 |             "max_scale": 0.9,
 18 |             "layers": [
 19 |                 {
 20 |                     "name": "conv4_3",
 21 |                     "size": 38,
 22 |                     "offset": [
 23 |                         0.5,
 24 |                         0.5
 25 |                     ],
 26 |                     "aspect_ratios": [
 27 |                         1.0,
 28 |                         2.0,
 29 |                         0.5
 30 |                     ]
 31 |                 },
 32 |                 {
 33 |                     "name": "fc7",
 34 |                     "size": 19,
 35 |                     "offset": [
 36 |                         0.5,
 37 |                         0.5
 38 |                     ],
 39 |                     "aspect_ratios": [
 40 |                         1.0,
 41 |                         2.0,
 42 |                         0.5,
 43 |                         3.0,
 44 |                         0.33
 45 |                     ]
 46 |                 },
 47 |                 {
 48 |                     "name": "conv8_2",
 49 |                     "size": 10,
 50 |                     "offset": [
 51 |                         0.5,
 52 |                         0.5
 53 |                     ],
 54 |                     "aspect_ratios": [
 55 |                         1.0,
 56 |                         2.0,
 57 |                         0.5,
 58 |                         3.0,
 59 |                         0.33
 60 |                     ]
 61 |                 },
 62 |                 {
 63 |                     "name": "conv9_2",
 64 |                     "size": 5,
 65 |                     "offset": [
 66 |                         0.5,
 67 |                         0.5
 68 |                     ],
 69 |                     "aspect_ratios": [
 70 |                         1.0,
 71 |                         2.0,
 72 |                         0.5,
 73 |                         3.0,
 74 |                         0.33
 75 |                     ]
 76 |                 },
 77 |                 {
 78 |                     "name": "conv10_2",
 79 |                     "size": 3,
 80 |                     "offset": [
 81 |                         0.5,
 82 |                         0.5
 83 |                     ],
 84 |                     "aspect_ratios": [
 85 |                         1.0,
 86 |                         2.0,
 87 |                         0.5
 88 |                     ]
 89 |                 },
 90 |                 {
 91 |                     "name": "conv11_2",
 92 |                     "size": 1,
 93 |                     "offset": [
 94 |                         0.5,
 95 |                         0.5
 96 |                     ],
 97 |                     "aspect_ratios": [
 98 |                         1.0,
 99 |                         2.0,
100 |                         0.5
101 |                     ]
102 |                 }
103 |             ]
104 |         }
105 |     },
106 |     "training": {
107 |         "match_threshold": 0.5,
108 |         "neutral_threshold": 0.3,
109 |         "min_negative_boxes": 0,
110 |         "negative_boxes_ratio": 3,
111 |         "alpha": 1
112 |     }
113 | }


--------------------------------------------------------------------------------
/configs/ssd300_vgg16_pascal-voc-07-12.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "model": {
  3 |         "name": "ssd_vgg16",
  4 |         "input_size": 300,
  5 |         "l2_regularization": 0.0005,
  6 |         "kernel_initializer": "he_normal",
  7 |         "base_network_trainable": true,
  8 |         "default_boxes": {
  9 |             "extra_box_for_ar_1": true,
 10 |             "clip_boxes": true,
 11 |             "variances": [
 12 |                 0.1,
 13 |                 0.1,
 14 |                 0.2,
 15 |                 0.2
 16 |             ],
 17 |             "min_scale": 0.1,
 18 |             "max_scale": 1.05,
 19 |             "layers": [
 20 |                 {
 21 |                     "name": "conv4_3",
 22 |                     "size": 38,
 23 |                     "offset": [
 24 |                         0.5,
 25 |                         0.5
 26 |                     ],
 27 |                     "aspect_ratios": [
 28 |                         1.0,
 29 |                         2.0,
 30 |                         0.5
 31 |                     ]
 32 |                 },
 33 |                 {
 34 |                     "name": "fc7",
 35 |                     "size": 19,
 36 |                     "offset": [
 37 |                         0.5,
 38 |                         0.5
 39 |                     ],
 40 |                     "aspect_ratios": [
 41 |                         1.0,
 42 |                         2.0,
 43 |                         0.5,
 44 |                         3.0,
 45 |                         0.33
 46 |                     ]
 47 |                 },
 48 |                 {
 49 |                     "name": "conv8_2",
 50 |                     "size": 10,
 51 |                     "offset": [
 52 |                         0.5,
 53 |                         0.5
 54 |                     ],
 55 |                     "aspect_ratios": [
 56 |                         1.0,
 57 |                         2.0,
 58 |                         0.5,
 59 |                         3.0,
 60 |                         0.33
 61 |                     ]
 62 |                 },
 63 |                 {
 64 |                     "name": "conv9_2",
 65 |                     "size": 5,
 66 |                     "offset": [
 67 |                         0.5,
 68 |                         0.5
 69 |                     ],
 70 |                     "aspect_ratios": [
 71 |                         1.0,
 72 |                         2.0,
 73 |                         0.5,
 74 |                         3.0,
 75 |                         0.33
 76 |                     ]
 77 |                 },
 78 |                 {
 79 |                     "name": "conv10_2",
 80 |                     "size": 3,
 81 |                     "offset": [
 82 |                         0.5,
 83 |                         0.5
 84 |                     ],
 85 |                     "aspect_ratios": [
 86 |                         1.0,
 87 |                         2.0,
 88 |                         0.5
 89 |                     ]
 90 |                 },
 91 |                 {
 92 |                     "name": "conv11_2",
 93 |                     "size": 1,
 94 |                     "offset": [
 95 |                         0.5,
 96 |                         0.5
 97 |                     ],
 98 |                     "aspect_ratios": [
 99 |                         1.0,
100 |                         2.0,
101 |                         0.5
102 |                     ]
103 |                 }
104 |             ]
105 |         }
106 |     },
107 |     "training": {
108 |         "match_threshold": 0.5,
109 |         "neutral_threshold": 0.3,
110 |         "min_negative_boxes": 0,
111 |         "negative_boxes_ratio": 3,
112 |         "alpha": 1,
113 |         "optimizer": {
114 |             "name": "adam",
115 |             "beta_1": 0.9,
116 |             "beta_2": 0.999,
117 |             "epsilon": 1e-08,
118 |             "decay": 0.0,
119 |             "momentum": 0.9,
120 |             "nesterov": false
121 |         }
122 |     }
123 | }


--------------------------------------------------------------------------------
/configs/ssd300_vgg16_pascal-voc-2007.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "model": {
  3 |         "name": "ssd_vgg16",
  4 |         "input_size": 300,
  5 |         "l2_regularization": 0.0005,
  6 |         "kernel_initializer": "he_normal",
  7 |         "base_network_trainable": false,
  8 |         "default_boxes": {
  9 |             "extra_box_for_ar_1": true,
 10 |             "clip_boxes": true,
 11 |             "variances": [
 12 |                 0.1,
 13 |                 0.1,
 14 |                 0.2,
 15 |                 0.2
 16 |             ],
 17 |             "min_scale": 0.1,
 18 |             "max_scale": 1.05,
 19 |             "layers": [
 20 |                 {
 21 |                     "name": "conv4_3",
 22 |                     "size": 38,
 23 |                     "offset": [
 24 |                         0.5,
 25 |                         0.5
 26 |                     ],
 27 |                     "aspect_ratios": [
 28 |                         1.0,
 29 |                         2.0,
 30 |                         0.5
 31 |                     ]
 32 |                 },
 33 |                 {
 34 |                     "name": "fc7",
 35 |                     "size": 19,
 36 |                     "offset": [
 37 |                         0.5,
 38 |                         0.5
 39 |                     ],
 40 |                     "aspect_ratios": [
 41 |                         1.0,
 42 |                         2.0,
 43 |                         0.5,
 44 |                         3.0,
 45 |                         0.33
 46 |                     ]
 47 |                 },
 48 |                 {
 49 |                     "name": "conv8_2",
 50 |                     "size": 10,
 51 |                     "offset": [
 52 |                         0.5,
 53 |                         0.5
 54 |                     ],
 55 |                     "aspect_ratios": [
 56 |                         1.0,
 57 |                         2.0,
 58 |                         0.5,
 59 |                         3.0,
 60 |                         0.33
 61 |                     ]
 62 |                 },
 63 |                 {
 64 |                     "name": "conv9_2",
 65 |                     "size": 5,
 66 |                     "offset": [
 67 |                         0.5,
 68 |                         0.5
 69 |                     ],
 70 |                     "aspect_ratios": [
 71 |                         1.0,
 72 |                         2.0,
 73 |                         0.5,
 74 |                         3.0,
 75 |                         0.33
 76 |                     ]
 77 |                 },
 78 |                 {
 79 |                     "name": "conv10_2",
 80 |                     "size": 3,
 81 |                     "offset": [
 82 |                         0.5,
 83 |                         0.5
 84 |                     ],
 85 |                     "aspect_ratios": [
 86 |                         1.0,
 87 |                         2.0,
 88 |                         0.5
 89 |                     ]
 90 |                 },
 91 |                 {
 92 |                     "name": "conv11_2",
 93 |                     "size": 1,
 94 |                     "offset": [
 95 |                         0.5,
 96 |                         0.5
 97 |                     ],
 98 |                     "aspect_ratios": [
 99 |                         1.0,
100 |                         2.0,
101 |                         0.5
102 |                     ]
103 |                 }
104 |             ]
105 |         }
106 |     },
107 |     "training": {
108 |         "match_threshold": 0.5,
109 |         "neutral_threshold": 0.3,
110 |         "min_negative_boxes": 0,
111 |         "negative_boxes_ratio": 3,
112 |         "alpha": 1,
113 |         "optimizer": {
114 |             "name": "adam",
115 |             "beta_1": 0.9,
116 |             "beta_2": 0.999,
117 |             "epsilon": 1e-08,
118 |             "decay": 0.0
119 |         }
120 |     }
121 | }


--------------------------------------------------------------------------------
/configs/ssd320_mobilenetv2_coco2017-train.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "model": {
  3 |         "name": "ssd_mobilenetv2",
  4 |         "input_size": 300,
  5 |         "l2_regularization": 0.0005,
  6 |         "kernel_initializer": "he_normal",
  7 |         "width_multiplier": 0.5,
  8 |         "default_boxes": {
  9 |             "extra_box_for_ar_1": true,
 10 |             "clip_boxes": true,
 11 |             "variances": [
 12 |                 0.1,
 13 |                 0.1,
 14 |                 0.2,
 15 |                 0.2
 16 |             ],
 17 |             "min_scale": 0.15,
 18 |             "max_scale": 0.9,
 19 |             "layers": [
 20 |                 {
 21 |                     "name": "block_13_expand_relu",
 22 |                     "size": 19,
 23 |                     "offset": [
 24 |                         0.5,
 25 |                         0.5
 26 |                     ],
 27 |                     "aspect_ratios": [
 28 |                         1.0,
 29 |                         2.0,
 30 |                         0.5,
 31 |                         3.0,
 32 |                         0.33
 33 |                     ]
 34 |                 },
 35 |                 {
 36 |                     "name": "block_16_project_BN",
 37 |                     "size": 10,
 38 |                     "offset": [
 39 |                         0.5,
 40 |                         0.5
 41 |                     ],
 42 |                     "aspect_ratios": [
 43 |                         1.0,
 44 |                         2.0,
 45 |                         0.5,
 46 |                         3.0,
 47 |                         0.33
 48 |                     ]
 49 |                 },
 50 |                 {
 51 |                     "name": "conv17_2/relu",
 52 |                     "size": 5,
 53 |                     "offset": [
 54 |                         0.5,
 55 |                         0.5
 56 |                     ],
 57 |                     "aspect_ratios": [
 58 |                         1.0,
 59 |                         2.0,
 60 |                         0.5,
 61 |                         3.0,
 62 |                         0.33
 63 |                     ]
 64 |                 },
 65 |                 {
 66 |                     "name": "conv18_2/relu",
 67 |                     "size": 3,
 68 |                     "offset": [
 69 |                         0.5,
 70 |                         0.5
 71 |                     ],
 72 |                     "aspect_ratios": [
 73 |                         1.0,
 74 |                         2.0,
 75 |                         0.5,
 76 |                         3.0,
 77 |                         0.33
 78 |                     ]
 79 |                 },
 80 |                 {
 81 |                     "name": "conv19_2/relu",
 82 |                     "size": 2,
 83 |                     "offset": [
 84 |                         0.5,
 85 |                         0.5
 86 |                     ],
 87 |                     "aspect_ratios": [
 88 |                         1.0,
 89 |                         2.0,
 90 |                         0.5,
 91 |                         3.0,
 92 |                         0.33
 93 |                     ]
 94 |                 },
 95 |                 {
 96 |                     "name": "conv20_2/relu",
 97 |                     "size": 1,
 98 |                     "offset": [
 99 |                         0.5,
100 |                         0.5
101 |                     ],
102 |                     "aspect_ratios": [
103 |                         1.0,
104 |                         2.0,
105 |                         0.5,
106 |                         3.0,
107 |                         0.33
108 |                     ]
109 |                 }
110 |             ]
111 |         }
112 |     },
113 |     "training": {
114 |         "match_threshold": 0.5,
115 |         "neutral_threshold": 0.3,
116 |         "min_negative_boxes": 0,
117 |         "negative_boxes_ratio": 3,
118 |         "alpha": 1
119 |     }
120 | }


--------------------------------------------------------------------------------
/configs/tbpp384_vgg16.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "model": {
  3 |         "name": "tbpp_vgg16",
  4 |         "input_size": 384,
  5 |         "l2_regularization": 0.0005,
  6 |         "kernel_initializer": "he_normal",
  7 |         "default_boxes": {
  8 |             "extra_box_for_ar_1": true,
  9 |             "clip_boxes": true,
 10 |             "variances": [
 11 |                 0.1,
 12 |                 0.1,
 13 |                 0.2,
 14 |                 0.2
 15 |             ],
 16 |             "min_scale": 0.2,
 17 |             "max_scale": 0.9,
 18 |             "layers": [
 19 |                 {
 20 |                     "name": "conv4_3",
 21 |                     "size": 48,
 22 |                     "offset": [
 23 |                         0.5,
 24 |                         0.7
 25 |                     ],
 26 |                     "aspect_ratios": [
 27 |                         1,
 28 |                         2,
 29 |                         3,
 30 |                         5,
 31 |                         0.5,
 32 |                         0.33,
 33 |                         0.2
 34 |                     ]
 35 |                 },
 36 |                 {
 37 |                     "name": "fc7",
 38 |                     "size": 24,
 39 |                     "offset": [
 40 |                         0.5,
 41 |                         0.7
 42 |                     ],
 43 |                     "aspect_ratios": [
 44 |                         1,
 45 |                         2,
 46 |                         3,
 47 |                         5,
 48 |                         0.5,
 49 |                         0.33,
 50 |                         0.2
 51 |                     ]
 52 |                 },
 53 |                 {
 54 |                     "name": "conv8_2",
 55 |                     "size": 12,
 56 |                     "offset": [
 57 |                         0.5,
 58 |                         0.7
 59 |                     ],
 60 |                     "aspect_ratios": [
 61 |                         1,
 62 |                         2,
 63 |                         3,
 64 |                         5,
 65 |                         0.5,
 66 |                         0.33,
 67 |                         0.2
 68 |                     ]
 69 |                 },
 70 |                 {
 71 |                     "name": "conv9_2",
 72 |                     "size": 6,
 73 |                     "offset": [
 74 |                         0.5,
 75 |                         0.7
 76 |                     ],
 77 |                     "aspect_ratios": [
 78 |                         1,
 79 |                         2,
 80 |                         3,
 81 |                         5,
 82 |                         0.5,
 83 |                         0.33,
 84 |                         0.2
 85 |                     ]
 86 |                 },
 87 |                 {
 88 |                     "name": "conv10_2",
 89 |                     "size": 4,
 90 |                     "offset": [
 91 |                         0.5,
 92 |                         0.7
 93 |                     ],
 94 |                     "aspect_ratios": [
 95 |                         1,
 96 |                         2,
 97 |                         3,
 98 |                         5,
 99 |                         0.5,
100 |                         0.33,
101 |                         0.2
102 |                     ]
103 |                 },
104 |                 {
105 |                     "name": "conv11_2",
106 |                     "size": 2,
107 |                     "offset": [
108 |                         0.5,
109 |                         0.7
110 |                     ],
111 |                     "aspect_ratios": [
112 |                         1,
113 |                         2,
114 |                         3,
115 |                         5,
116 |                         0.5,
117 |                         0.33,
118 |                         0.2
119 |                     ]
120 |                 }
121 |             ]
122 |         }
123 |     },
124 |     "training": {
125 |         "match_threshold": 0.5,
126 |         "neutral_threshold": 0.3,
127 |         "min_negative_boxes": 0,
128 |         "negative_boxes_ratio": 3,
129 |         "alpha": 0.2
130 |     }
131 | }


--------------------------------------------------------------------------------
/configs/tbpp768_vgg16.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "model": {
  3 |         "name": "tbpp_vgg16",
  4 |         "input_size": 768,
  5 |         "l2_regularization": 0.0005,
  6 |         "kernel_initializer": "he_normal",
  7 |         "default_boxes": {
  8 |             "extra_box_for_ar_1": true,
  9 |             "clip_boxes": true,
 10 |             "variances": [
 11 |                 0.1,
 12 |                 0.1,
 13 |                 0.2,
 14 |                 0.2
 15 |             ],
 16 |             "min_scale": 0.2,
 17 |             "max_scale": 0.9,
 18 |             "layers": [
 19 |                 {
 20 |                     "name": "conv4_3",
 21 |                     "size": 48,
 22 |                     "offset": [
 23 |                         0.5,
 24 |                         0.7
 25 |                     ],
 26 |                     "aspect_ratios": [
 27 |                         1,
 28 |                         2,
 29 |                         3,
 30 |                         5,
 31 |                         0.5,
 32 |                         0.33,
 33 |                         0.2
 34 |                     ]
 35 |                 },
 36 |                 {
 37 |                     "name": "fc7",
 38 |                     "size": 24,
 39 |                     "offset": [
 40 |                         0.5,
 41 |                         0.7
 42 |                     ],
 43 |                     "aspect_ratios": [
 44 |                         1,
 45 |                         2,
 46 |                         3,
 47 |                         5,
 48 |                         0.5,
 49 |                         0.33,
 50 |                         0.2
 51 |                     ]
 52 |                 },
 53 |                 {
 54 |                     "name": "conv8_2",
 55 |                     "size": 12,
 56 |                     "offset": [
 57 |                         0.5,
 58 |                         0.7
 59 |                     ],
 60 |                     "aspect_ratios": [
 61 |                         1,
 62 |                         2,
 63 |                         3,
 64 |                         5,
 65 |                         0.5,
 66 |                         0.33,
 67 |                         0.2
 68 |                     ]
 69 |                 },
 70 |                 {
 71 |                     "name": "conv9_2",
 72 |                     "size": 6,
 73 |                     "offset": [
 74 |                         0.5,
 75 |                         0.7
 76 |                     ],
 77 |                     "aspect_ratios": [
 78 |                         1,
 79 |                         2,
 80 |                         3,
 81 |                         5,
 82 |                         0.5,
 83 |                         0.33,
 84 |                         0.2
 85 |                     ]
 86 |                 },
 87 |                 {
 88 |                     "name": "conv10_2",
 89 |                     "size": 4,
 90 |                     "offset": [
 91 |                         0.5,
 92 |                         0.7
 93 |                     ],
 94 |                     "aspect_ratios": [
 95 |                         1,
 96 |                         2,
 97 |                         3,
 98 |                         5,
 99 |                         0.5,
100 |                         0.33,
101 |                         0.2
102 |                     ]
103 |                 },
104 |                 {
105 |                     "name": "conv11_2",
106 |                     "size": 2,
107 |                     "offset": [
108 |                         0.5,
109 |                         0.7
110 |                     ],
111 |                     "aspect_ratios": [
112 |                         1,
113 |                         2,
114 |                         3,
115 |                         5,
116 |                         0.5,
117 |                         0.33,
118 |                         0.2
119 |                     ]
120 |                 }
121 |             ]
122 |         }
123 |     },
124 |     "training": {
125 |         "match_threshold": 0.5,
126 |         "neutral_threshold": 0.3,
127 |         "min_negative_boxes": 0,
128 |         "negative_boxes_ratio": 3,
129 |         "alpha": 0.2
130 |     }
131 | }


--------------------------------------------------------------------------------
/convert.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import argparse
 4 | import tensorflow as tf
 5 | from networks import SSD_VGG16, SSD_MOBILENET, SSD_MOBILENETV2
 6 | 
 7 | SUPPORTED_TYPES = [
 8 |     "keras",
 9 |     "tflite"
10 | ]
11 | 
12 | parser = argparse.ArgumentParser(
13 |     description='Converts a supported model into tflite.')
14 | parser.add_argument('config', type=str, help='path to config file.')
15 | parser.add_argument('weights', type=str, help='path to the weight file.')
16 | parser.add_argument('output_dir', type=str, help='path to the output folder.')
17 | parser.add_argument('--label_maps', type=str, help='path to label maps file.')
18 | parser.add_argument('--output_type', type=str,
19 |                     help='the type of the output model. One of type: "keras", "tflite"', default="keras")
20 | parser.add_argument('--num_predictions', type=int,
21 |                     help='the number of detections to be output as final detections', default=10)
22 | args = parser.parse_args()
23 | 
24 | assert os.path.exists(args.label_maps), "label_maps file does not exist"
25 | assert os.path.exists(args.config), "config file does not exist"
26 | assert args.num_predictions > 0, "num_predictions must be larger than zero"
27 | assert args.output_type in SUPPORTED_TYPES, f"{args.output_type} is not supported yet. Please choose one of type {SUPPORTED_TYPES}"
28 | 
29 | if not os.path.exists(args.output_dir):
30 |     os.makedirs(args.output_dir)
31 | 
32 | 
33 | with open(args.config, "r") as config_file:
34 |     config = json.load(config_file)
35 | 
36 | model_config = config["model"]
37 | 
38 | if model_config["name"] == "ssd_vgg16":
39 |     with open(args.label_maps, "r") as file:
40 |         label_maps = [line.strip("\n") for line in file.readlines()]
41 |     model = SSD_VGG16(
42 |         config,
43 |         label_maps,
44 |         is_training=False,
45 |         num_predictions=args.num_predictions)
46 | 
47 | elif model_config["name"] == "ssd_mobilenetv1":
48 |     with open(args.label_maps, "r") as file:
49 |         label_maps = [line.strip("\n") for line in file.readlines()]
50 |     model = SSD_MOBILENET(
51 |         config,
52 |         label_maps,
53 |         is_training=False,
54 |         num_predictions=args.num_predictions)
55 | elif model_config["name"] == "ssd_mobilenetv2":
56 |     with open(args.label_maps, "r") as file:
57 |         label_maps = [line.strip("\n") for line in file.readlines()]
58 |     model = SSD_MOBILENETV2(
59 |         config,
60 |         label_maps,
61 |         is_training=False,
62 |         num_predictions=args.num_predictions)
63 | else:
64 |     print(
65 |         f"model with name ${model_config['name']} has not been implemented yet")
66 |     exit()
67 | 
68 | model.load_weights(args.weights)
69 | 
70 | config_file_name = os.path.basename(args.config)
71 | config_file_name = config_file_name[:config_file_name.index(".")]
72 | if args.output_type == "keras":
73 |     model.save(os.path.join(args.output_dir, f"{config_file_name}.h5"))
74 | elif args.output_type == "tflite":
75 |     tflite_converter = tf.lite.TFLiteConverter.from_keras_model(model)
76 |     tflite_converter.optimizations = [tf.lite.Optimize.DEFAULT]
77 |     tflite_converter.target_spec.supported_ops = [
78 |         tf.lite.OpsSet.TFLITE_BUILTINS,
79 |         tf.lite.OpsSet.SELECT_TF_OPS,
80 |         tf.float16,
81 |     ]
82 |     tflite_model = tflite_converter.convert()
83 |     open(os.path.join(args.output_dir, f"{config_file_name}.tflite"), 'wb').write(
84 |         tflite_model)
85 | 


--------------------------------------------------------------------------------
/custom_layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .decode_tbpp_predictions import DecodeTBPPPredictions
2 | from .decode_ssd_predictions import DecodeSSDPredictions
3 | from .l2_normalization import L2Normalization
4 | from .default_boxes import DefaultBoxes
5 | 


--------------------------------------------------------------------------------
/custom_layers/decode_ssd_predictions.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.keras.layers import Layer
 3 | from utils import ssd_utils
 4 | 
 5 | 
 6 | class DecodeSSDPredictions(Layer):
 7 |     def __init__(
 8 |         self,
 9 |         input_size,
10 |         nms_max_output_size=400,
11 |         confidence_threshold=0.01,
12 |         iou_threshold=0.45,
13 |         num_predictions=10,
14 |         **kwargs
15 |     ):
16 |         self.input_size = input_size
17 |         self.nms_max_output_size = nms_max_output_size
18 |         self.confidence_threshold = confidence_threshold
19 |         self.iou_threshold = iou_threshold
20 |         self.num_predictions = num_predictions
21 |         super(DecodeSSDPredictions, self).__init__(**kwargs)
22 | 
23 |     def build(self, input_shape):
24 |         super(DecodeSSDPredictions, self).build(input_shape)
25 | 
26 |     def call(self, inputs):
27 |         y_pred = ssd_utils.decode_predictions(
28 |             y_pred=inputs,
29 |             input_size=self.input_size,
30 |             nms_max_output_size=self.nms_max_output_size,
31 |             confidence_threshold=self.confidence_threshold,
32 |             iou_threshold=self.iou_threshold,
33 |             num_predictions=self.num_predictions
34 |         )
35 |         return y_pred
36 | 
37 |     def get_config(self):
38 |         config = {
39 |             'input_size': self.input_size,
40 |             'nms_max_output_size': self.nms_max_output_size,
41 |             'confidence_threshold': self.confidence_threshold,
42 |             'iou_threshold': self.iou_threshold,
43 |             'num_predictions': self.num_predictions,
44 |         }
45 |         base_config = super(DecodeSSDPredictions, self).get_config()
46 |         return dict(list(base_config.items()) + list(config.items()))
47 | 
48 |     @classmethod
49 |     def from_config(cls, config):
50 |         return cls(**config)
51 | 


--------------------------------------------------------------------------------
/custom_layers/decode_tbpp_predictions.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.keras.layers import Layer
 3 | from utils import textboxes_utils
 4 | 
 5 | 
 6 | class DecodeTBPPPredictions(Layer):
 7 |     def __init__(
 8 |         self,
 9 |         input_size,
10 |         nms_max_output_size=400,
11 |         confidence_threshold=0.01,
12 |         iou_threshold=0.45,
13 |         num_predictions=10,
14 |         **kwargs
15 |     ):
16 |         self.input_size = input_size
17 |         self.nms_max_output_size = nms_max_output_size
18 |         self.confidence_threshold = confidence_threshold
19 |         self.iou_threshold = iou_threshold
20 |         self.num_predictions = num_predictions
21 |         super(DecodeTBPPPredictions, self).__init__(**kwargs)
22 | 
23 |     def build(self, input_shape):
24 |         super(DecodeTBPPPredictions, self).build(input_shape)
25 | 
26 |     def call(self, inputs):
27 |         y_pred = textboxes_utils.decode_predictions(
28 |             y_pred=inputs,
29 |             input_size=self.input_size,
30 |             nms_max_output_size=self.nms_max_output_size,
31 |             confidence_threshold=self.confidence_threshold,
32 |             iou_threshold=self.iou_threshold,
33 |             num_predictions=self.num_predictions
34 |         )
35 |         return y_pred
36 | 
37 |     def get_config(self):
38 |         config = {
39 |             'input_size': self.input_size,
40 |             'nms_max_output_size': self.nms_max_output_size,
41 |             'confidence_threshold': self.confidence_threshold,
42 |             'iou_threshold': self.iou_threshold,
43 |             'num_predictions': self.num_predictions,
44 |         }
45 |         base_config = super(DecodeTBPPPredictions, self).get_config()
46 |         return dict(list(base_config.items()) + list(config.items()))
47 | 
48 |     @classmethod
49 |     def from_config(cls, config):
50 |         return cls(**config)
51 | 


--------------------------------------------------------------------------------
/custom_layers/default_boxes.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from tensorflow.keras.layers import Layer
  4 | from utils.ssd_utils import get_number_default_boxes, generate_default_boxes_for_feature_map
  5 | 
  6 | 
  7 | class DefaultBoxes(Layer):
  8 |     """ A custom keras layer that generates default boxes for a given feature map.
  9 | 
 10 |     Args:
 11 |         - image_shape: The shape of the input image
 12 |         - scale: The current scale for the default box.
 13 |         - next_scale: The next scale for the default box.
 14 |         - aspect_ratios: The aspect ratios for the default boxes.
 15 |         - offset: The offset for the center of the default boxes. Defaults to center of each grid cell.
 16 |         - variances: The normalization values for each bounding boxes properties (cx, cy, width, height).
 17 |         - extra_box_for_ar_1: Whether to add an extra box for default box with aspect ratio 1.
 18 |     Returns:
 19 |         - A tensor of shape (batch_size, feature_map_size, feature_map_size, num_default_boxes, 8)
 20 | 
 21 |     Raises:
 22 |         - feature map height does not equal to feature map width
 23 |         - image width does not equals to image height
 24 | 
 25 |     Code References:
 26 |         - https://github.com/pierluigiferrari/ssd_keras/blob/master/keras_layers/keras_layer_AnchorBoxes.py
 27 | 
 28 |     Paper References:
 29 |         - Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C. Y., & Berg, A. C. (2016).
 30 |           SSD: Single Shot MultiBox Detector. https://arxiv.org/abs/1512.02325
 31 |     """
 32 | 
 33 |     def __init__(
 34 |         self,
 35 |         image_shape,
 36 |         scale,
 37 |         next_scale,
 38 |         aspect_ratios,
 39 |         variances,
 40 |         offset=(0.5, 0.5),
 41 |         extra_box_for_ar_1=True,
 42 |         clip_boxes=True,
 43 |         **kwargs
 44 |     ):
 45 |         self.image_shape = image_shape
 46 |         self.scale = scale
 47 |         self.next_scale = next_scale
 48 |         self.aspect_ratios = aspect_ratios
 49 |         self.extra_box_for_ar_1 = extra_box_for_ar_1
 50 |         self.clip_boxes = clip_boxes,
 51 |         self.variances = variances
 52 |         self.offset = offset
 53 |         super(DefaultBoxes, self).__init__(**kwargs)
 54 | 
 55 |     def build(self, input_shape):
 56 |         _, feature_map_height, feature_map_width, _ = input_shape
 57 |         image_height, image_width, _ = self.image_shape
 58 | 
 59 |         assert feature_map_height == feature_map_width, "feature map width must be equal to feature map height"
 60 |         assert image_height == image_width, "image width must be equal to image height"
 61 | 
 62 |         self.feature_map_size = min(feature_map_height, feature_map_width)
 63 |         self.image_size = min(image_height, image_width)
 64 |         super(DefaultBoxes, self).build(input_shape)
 65 | 
 66 |     def call(self, inputs):
 67 |         default_boxes = generate_default_boxes_for_feature_map(
 68 |             feature_map_size=self.feature_map_size,
 69 |             image_size=self.image_size,
 70 |             offset=self.offset,
 71 |             scale=self.scale,
 72 |             next_scale=self.next_scale,
 73 |             aspect_ratios=self.aspect_ratios,
 74 |             variances=self.variances,
 75 |             extra_box_for_ar_1=self.extra_box_for_ar_1,
 76 |             clip_boxes=self.clip_boxes,
 77 |         )
 78 |         default_boxes = np.expand_dims(default_boxes, axis=0)
 79 |         default_boxes = tf.constant(default_boxes, dtype='float32')
 80 |         default_boxes = tf.tile(default_boxes, (tf.shape(inputs)[0], 1, 1, 1, 1))
 81 |         return default_boxes
 82 | 
 83 |     def get_config(self):
 84 |         config = {
 85 |             "image_shape": self.image_shape,
 86 |             "scale": self.scale,
 87 |             "next_scale": self.next_scale,
 88 |             "aspect_ratios": self.aspect_ratios,
 89 |             "extra_box_for_ar_1": self.extra_box_for_ar_1,
 90 |             "clip_boxes": self.clip_boxes,
 91 |             "variances": self.variances,
 92 |             "offset": self.offset,
 93 |             "feature_map_size": self.feature_map_size,
 94 |             "image_size": self.image_size
 95 |         }
 96 |         base_config = super(DefaultBoxes, self).get_config()
 97 |         return dict(list(base_config.items()) + list(config.items()))
 98 | 
 99 |     @classmethod
100 |     def from_config(cls, config):
101 |         return cls(**config)
102 | 


--------------------------------------------------------------------------------
/custom_layers/l2_normalization.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from tensorflow.keras.layers import Layer
 4 | 
 5 | 
 6 | class L2Normalization(Layer):
 7 |     """ A custom layer that performs l2 normalization on its inputs with learnable parameter gamma.
 8 |     Note:
 9 |     1. This is implementation is taken from https://github.com/pierluigiferrari/ssd_keras/blob/master/keras_layers/keras_layer_L2Normalization.py with slight modifications:
10 |         - axis variable is passed as parameter instead of fixed value
11 |         - K.variable is replaced with tf.Variable
12 |         - fixed dtype mismatched by specifying dtype=np.float32
13 |     2. get_config & from_config is necessary to make the layer serializable
14 |     3. we need to multiply self.gamma_init with np.ones((input_shape[self.axis],), dtype=np.float32)
15 |        to turn gamma into the shape of (input_shape[self.axis],) which will allow us to broadcast those values
16 |        when multiplying with the output in the call function.
17 | 
18 |     Args:
19 |         - gamma_init: The initial scaling parameter. Defaults to 20 following the SSD paper.
20 |         - axis: the axis to apply the scaling to
21 | 
22 |     Returns:
23 |         - A scaled tensor with the same shape as input_shape
24 | 
25 |     Code References:
26 |         - https://github.com/pierluigiferrari/ssd_keras/blob/master/keras_layers/keras_layer_L2Normalization.py
27 | 
28 |     Paper References:
29 |         - Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C. Y., & Berg, A. C. (2016).
30 |           SSD: Single Shot MultiBox Detector. https://arxiv.org/abs/1512.02325
31 |         - Liu, W., Rabinovich, A., & Berg, A. C. (2016).
32 |           ParseNet: Looking Wider to See Better. International Conference on Learning Representation (ICLR) 2016.
33 |           https://arxiv.org/abs/1506.04579
34 |     """
35 | 
36 |     def __init__(self, gamma_init=20, axis=-1, **kwargs):
37 |         self.axis = axis
38 |         self.gamma_init = gamma_init
39 |         super(L2Normalization, self).__init__(**kwargs)
40 | 
41 |     def build(self, input_shape):
42 |         gamma = self.gamma_init * np.ones((input_shape[self.axis],), dtype=np.float32)
43 |         self.gamma = tf.Variable(gamma, trainable=True)
44 |         super(L2Normalization, self).build(input_shape)
45 | 
46 |     def call(self, inputs):
47 |         return tf.math.l2_normalize(inputs, self.axis) * self.gamma
48 | 
49 |     def get_config(self):
50 |         config = {'gamma_init': self.gamma_init, 'axis': self.axis}
51 |         base_config = super(L2Normalization, self).get_config()
52 |         return dict(list(base_config.items()) + list(config.items()))
53 | 
54 |     @classmethod
55 |     def from_config(cls, config):
56 |         return cls(**config)
57 | 


--------------------------------------------------------------------------------
/data_generators/__init__.py:
--------------------------------------------------------------------------------
1 | from .ssd_data_generator import SSD_DATA_GENERATOR
2 | from .tbpp_data_generator import TBPP_DATA_GENERATOR
3 | 


--------------------------------------------------------------------------------
/display_default_boxes.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import json
 4 | import argparse
 5 | import numpy as np
 6 | from glob import glob
 7 | from xml.dom import minidom
 8 | import xml.etree.cElementTree as ET
 9 | from pycocotools.coco import COCO
10 | from utils import ssd_utils
11 | 
12 | parser = argparse.ArgumentParser(description='Displays default boxes in a selected image.')
13 | parser.add_argument('config', type=str, help='path to config file.')
14 | parser.add_argument('image', type=str, help='path to image file.')
15 | args = parser.parse_args()
16 | 
17 | print("loading config file")
18 | with open(args.config, "r") as config_file:
19 |     config = json.load(config_file)
20 | 
21 | 
22 | model_config = config["model"]
23 | default_boxes_config = model_config["default_boxes"]
24 | input_size = model_config["input_size"]
25 | extra_box_for_ar_1 = default_boxes_config["extra_box_for_ar_1"]
26 | clip_boxes = default_boxes_config["clip_boxes"]
27 | 
28 | print("loading image file")
29 | image = cv2.imread(args.image)
30 | image = cv2.resize(image, (input_size, input_size))
31 | 
32 | print("generating default boxes")
33 | scales = np.linspace(
34 |     default_boxes_config["min_scale"],
35 |     default_boxes_config["max_scale"],
36 |     len(default_boxes_config["layers"])
37 | )
38 | mbox_conf_layers = []
39 | mbox_loc_layers = []
40 | mbox_default_boxes_layers = []
41 | for i, layer in enumerate(default_boxes_config["layers"]):
42 |     temp_image = image.copy()
43 |     print(f"displaying default boxes for layer: {layer['name']}")
44 |     layer_default_boxes = ssd_utils.generate_default_boxes_for_feature_map(
45 |         feature_map_size=layer["size"],
46 |         image_size=input_size,
47 |         offset=layer["offset"],
48 |         scale=scales[i],
49 |         next_scale=scales[i+1] if i+1 <= len(default_boxes_config["layers"]) - 1 else 1,
50 |         aspect_ratios=layer["aspect_ratios"],
51 |         variances=default_boxes_config["variances"],
52 |         extra_box_for_ar_1=extra_box_for_ar_1,
53 |         clip_boxes=clip_boxes
54 |     )
55 | 
56 |     grid_size = input_size / layer["size"]
57 |     offset = layer["offset"]
58 |     offset_x, offset_y = offset
59 | 
60 |     cx = np.linspace(offset_x * grid_size, input_size - (offset_x * grid_size), layer["size"])
61 |     cy = np.linspace(offset_y * grid_size, input_size - (offset_y * grid_size), layer["size"])
62 | 
63 |     for n in range(len(cx)):
64 |         for m in range(len(cy)):
65 |             cv2.circle(
66 |                 temp_image,
67 |                 (int(cx[n]), int(cy[m])),
68 |                 1,
69 |                 (255, 0, 0),
70 |                 1
71 |             )
72 | 
73 |     middle_cell = layer['size']//2
74 |     target_cell = 0 if middle_cell == 0 else middle_cell
75 | 
76 |     for default_box in layer_default_boxes[target_cell][target_cell]:
77 |         cx = default_box[0] * input_size
78 |         cy = default_box[1] * input_size
79 |         w = default_box[2] * input_size
80 |         h = default_box[3] * input_size
81 |         cv2.rectangle(
82 |             temp_image,
83 |             (int(cx-(w/2)), int(cy-(h/2))),
84 |             (int(cx+(w/2)), int(cy+(h/2))),
85 |             (0, 255, 0),
86 |             3
87 |         )
88 |     cv2.imshow(f"layer: {layer['name']}", temp_image)
89 |     if cv2.waitKey(0) == ord('q'):
90 |         cv2.destroyAllWindows()
91 | 


--------------------------------------------------------------------------------
/evaluate.sh:
--------------------------------------------------------------------------------
1 | python evaluate.py \
2 | /Users/socretlee/CodingDrive/other/object-detection-in-keras/data/pascal-voc-2007/images \
3 | /Users/socretlee/CodingDrive/other/object-detection-in-keras/data/pascal-voc-2007/labels \
4 | /Users/socretlee/CodingDrive/other/object-detection-in-keras/output/cp_229_loss-5.06_valloss-5.17.h5 \
5 | /Users/socretlee/CodingDrive/other/object-detection-in-keras/data/pascal-voc-2007/test.txt \
6 | --label_maps=/Users/socretlee/CodingDrive/other/object-detection-in-keras/data/pascal-voc-2007/label_maps.txt \
7 | --output_dir=output/evaluations/cp_229_loss-5.06_valloss-5.17.h5 \
8 | --iou_threshold=0.5


--------------------------------------------------------------------------------
/inference.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import os
  3 | import json
  4 | import argparse
  5 | import numpy as np
  6 | from glob import glob
  7 | from utils import inference_utils
  8 | 
  9 | parser = argparse.ArgumentParser(
 10 |     description='run inference on an input image.')
 11 | parser.add_argument('images', type=str,
 12 |                     help='glob string for list of images.')
 13 | parser.add_argument('config', type=str, help='path to config file.')
 14 | parser.add_argument('weights', type=str, help='path to the weight file.')
 15 | parser.add_argument('--label_maps', type=str, help='path to label maps file.')
 16 | parser.add_argument('--confidence_threshold', type=float,
 17 |                     help='the confidence score a detection should match in order to be counted.', default=0.9)
 18 | parser.add_argument('--num_predictions', type=int,
 19 |                     help='the number of detections to be output as final detections', default=10)
 20 | args = parser.parse_args()
 21 | 
 22 | # assert os.path.exists(args.input_image), "config file does not exist"
 23 | assert os.path.exists(args.config), "config file does not exist"
 24 | assert args.num_predictions > 0, "num_predictions must be larger than zero"
 25 | assert args.confidence_threshold > 0, "confidence_threshold must be larger than zero."
 26 | assert args.confidence_threshold <= 1, "confidence_threshold must be smaller than or equal to 1."
 27 | with open(args.config, "r") as config_file:
 28 |     config = json.load(config_file)
 29 | 
 30 | input_size = config["model"]["input_size"]
 31 | model_config = config["model"]
 32 | 
 33 | if model_config["name"] == "ssd_vgg16":
 34 |     model, process_input_fn, label_maps = inference_utils.ssd_vgg16(config, args)
 35 | elif model_config["name"] == "ssd_mobilenetv1":
 36 |     model, process_input_fn, label_maps = inference_utils.ssd_mobilenetv1(config, args)
 37 | elif model_config["name"] == "ssd_mobilenetv2":
 38 |     model, process_input_fn, label_maps = inference_utils.ssd_mobilenetv2(config, args)
 39 | elif model_config["name"] == "tbpp_vgg16":
 40 |     model, process_input_fn, label_maps = inference_utils.tbpp_vgg16(config, args)
 41 | else:
 42 |     print(
 43 |         f"model with name ${model_config['name']} has not been implemented yet")
 44 |     exit()
 45 | 
 46 | model.load_weights(args.weights)
 47 | 
 48 | 
 49 | for idx, input_image in enumerate(list(glob(args.images))):
 50 |     image = cv2.imread(input_image)  # read image in bgr format
 51 |     # image = cv2.resize(image, (0, 0), fx=0.3, fy=0.3)
 52 |     image = np.array(image, dtype=np.float)
 53 |     image = np.uint8(image)
 54 | 
 55 |     display_image = image.copy()
 56 |     image_height, image_width, _ = image.shape
 57 |     height_scale, width_scale = input_size/image_height, input_size/image_width
 58 | 
 59 |     image = cv2.resize(image, (input_size, input_size))
 60 |     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 61 |     image = process_input_fn(image)
 62 | 
 63 |     image = np.expand_dims(image, axis=0)
 64 |     y_pred = model.predict(image)
 65 | 
 66 |     for i, pred in enumerate(y_pred[0]):
 67 |         classname = label_maps[int(pred[0]) - 1].upper()
 68 |         confidence_score = pred[1]
 69 | 
 70 |         score = f"{'%.2f' % (confidence_score * 100)}%"
 71 |         print(f"-- {classname}: {score}")
 72 | 
 73 |         if confidence_score <= 1 and confidence_score > args.confidence_threshold:
 74 |             xmin = max(int(pred[2] / width_scale), 1)
 75 |             ymin = max(int(pred[3] / height_scale), 1)
 76 |             xmax = min(int(pred[4] / width_scale), image_width-1)
 77 |             ymax = min(int(pred[5] / height_scale), image_height-1)
 78 | 
 79 |             cv2.putText(
 80 |                 display_image,
 81 |                 classname,
 82 |                 (int(xmin), int(ymin)),
 83 |                 cv2.FONT_HERSHEY_PLAIN,
 84 |                 1,
 85 |                 (100, 100, 255),
 86 |                 1,
 87 |                 2
 88 |             )
 89 | 
 90 |             cv2.rectangle(
 91 |                 display_image,
 92 |                 (xmin, ymin),
 93 |                 (xmax, ymax),
 94 |                 (255, 0, 0),
 95 |                 2
 96 |             )
 97 | 
 98 |     print("\n")
 99 | 
100 |     cv2.imshow("output", display_image)
101 | 
102 |     if cv2.waitKey(0) == ord('q'):
103 |         cv2.destroyAllWindows()
104 |     elif cv2.waitKey(0) == ord('s'):
105 |         print("saving sample")
106 |         cv2.destroyAllWindows()
107 | 


--------------------------------------------------------------------------------
/inference.sh:
--------------------------------------------------------------------------------
1 | python inference.py \
2 | "data/pascal-voc-2007/images/*" \
3 | configs/ssd300_vgg16_pascal-voc-2007.json \
4 | /Users/socretlee/Google\ Drive/1-projects/ssd300_vgg16_pascal-voc-2007_trainval/cp_166_loss-5.24_valloss-5.99.h5 \
5 | --label_maps=/Users/socretlee/CodingDrive/other/object-detection-in-keras/data/pascal-voc-2007/label_maps.txt \
6 | --confidence_threshold=0.8 \
7 | --num_predictions=100


--------------------------------------------------------------------------------
/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from .smooth_l1_loss import SMOOTH_L1_LOSS
2 | from .softmax_loss import SOFTMAX_LOSS
3 | from .ssd_loss import SSD_LOSS
4 | from .tbpp_loss import TBPP_LOSS
5 | 


--------------------------------------------------------------------------------
/losses/smooth_l1_loss.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class SMOOTH_L1_LOSS:
 5 |     """ Compute smooth l1 loss between the predicted bounding boxes and the ground truth bounding boxes.
 6 | 
 7 |     Args:
 8 |         - y_true: The ground truth bounding boxes.
 9 |         - y_pred: The predicted bounding boxes.
10 | 
11 |     Code References:
12 |         - https://github.com/pierluigiferrari/ssd_keras/blob/master/keras_loss_function/keras_ssd_loss.py
13 | 
14 |     Paper References:
15 |         - Girshick, R. (2015). Fast-RCNN. https://arxiv.org/pdf/1504.08083.pdf
16 |         - Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C.Y., & Berg, A. C. (2016).
17 |           SSD: Single Shot MultiBox Detector. https://arxiv.org/abs/1512.02325
18 |     """
19 | 
20 |     def compute(self, y_true, y_pred):
21 |         abs_loss = tf.abs(y_true - y_pred)
22 |         square_loss = 0.5 * (y_true - y_pred) ** 2
23 |         res = tf.where(tf.less(abs_loss, 1.0), square_loss, abs_loss - 0.5)
24 |         return tf.reduce_sum(res, axis=-1)
25 | 


--------------------------------------------------------------------------------
/losses/softmax_loss.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class SOFTMAX_LOSS:
 5 |     """ Calculates the softmax loss between the predicted classes and ground truth classes.
 6 | 
 7 |     Args:
 8 |         - y_true: The ground truth classes.
 9 |         - y_pred: The predicted classes.
10 | 
11 |     Code Reference:
12 |         - https://github.com/pierluigiferrari/ssd_keras/blob/master/keras_loss_function/keras_ssd_loss.py
13 |     """
14 | 
15 |     def compute(self, y_true, y_pred):
16 |         y_pred = tf.maximum(y_pred, 1e-15)
17 |         return -1 * tf.reduce_sum(y_true * tf.math.log(y_pred), axis=-1)
18 | 


--------------------------------------------------------------------------------
/losses/ssd_loss.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from .smooth_l1_loss import SMOOTH_L1_LOSS
 3 | from .softmax_loss import SOFTMAX_LOSS
 4 | 
 5 | 
 6 | class SSD_LOSS:
 7 |     """ Loss function as defined in the SSD paper.
 8 | 
 9 |     Args:
10 |         - alpha: weight term from the SSD paper. Defaults to 1.
11 |         - min_negative_boxes: the minimum number of negative boxes allowed in the loss calculation. Defaults to 0.
12 |         - negative_boxes_ratio: the ratio of negative boxes to positive boxes. Defaults to 3 (3 times the possible boxes).
13 | 
14 |     Returns:
15 |         - A tensor of shape (batch_size,) where each item in the tensor represents the loss for each batch item.
16 | 
17 |     Paper References:
18 |         - Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C.Y., & Berg, A. C. (2016).
19 |           SSD: Single Shot MultiBox Detector. https://arxiv.org/abs/1512.02325
20 | 
21 |     Code References:
22 |         - https://github.com/pierluigiferrari/ssd_keras/blob/master/keras_loss_function/keras_ssd_loss.py
23 |     """
24 | 
25 |     def __init__(
26 |         self,
27 |         alpha=1,
28 |         min_negative_boxes=0,
29 |         negative_boxes_ratio=3,
30 |     ):
31 |         self.alpha = alpha
32 |         self.min_negative_boxes = min_negative_boxes
33 |         self.negative_boxes_ratio = negative_boxes_ratio
34 |         self.smooth_l1_loss = SMOOTH_L1_LOSS()
35 |         self.softmax_loss = SOFTMAX_LOSS()
36 | 
37 |     def compute(self, y_true, y_pred):
38 |         # calculate smooth l1 loss and softmax loss for all boxes
39 |         batch_size = tf.shape(y_true)[0]
40 |         num_boxes = tf.shape(y_true)[1]
41 |         #
42 |         bbox_true = y_true[:, :, -12:-8]
43 |         bbox_pred = y_pred[:, :, -12:-8]
44 |         class_true = y_true[:, :, :-12]
45 |         class_pred = y_pred[:, :, :-12]
46 |         #
47 |         regression_loss = self.smooth_l1_loss.compute(bbox_true, bbox_pred)
48 |         classification_loss = self.softmax_loss.compute(class_true, class_pred)
49 |         #
50 |         negatives = class_true[:, :, 0]  # (batch_size, num_boxes)
51 |         positives = tf.reduce_max(class_true[:, :, 1:], axis=-1)  # (batch_size, num_boxes)
52 |         num_positives = tf.cast(tf.reduce_sum(positives), tf.int32)
53 |         #
54 |         pos_regression_loss = tf.reduce_sum(regression_loss * positives, axis=-1)
55 |         pos_classification_loss = tf.reduce_sum(classification_loss * positives, axis=-1)
56 |         #
57 |         neg_classification_loss = classification_loss * negatives
58 |         num_neg_classification_loss = tf.math.count_nonzero(neg_classification_loss, dtype=tf.int32)
59 |         num_neg_classification_loss_keep = tf.minimum(
60 |             tf.maximum(self.negative_boxes_ratio * num_positives, self.min_negative_boxes),
61 |             num_neg_classification_loss
62 |         )
63 | 
64 |         def f1():
65 |             return tf.zeros([batch_size])
66 | 
67 |         def f2():
68 |             neg_classification_loss_1d = tf.reshape(neg_classification_loss, [-1])
69 |             _, indices = tf.nn.top_k(
70 |                 neg_classification_loss_1d,
71 |                 k=num_neg_classification_loss_keep,
72 |                 sorted=False
73 |             )
74 |             negatives_keep = tf.scatter_nd(
75 |                 indices=tf.expand_dims(indices, axis=1),
76 |                 updates=tf.ones_like(indices, dtype=tf.int32),
77 |                 shape=tf.shape(neg_classification_loss_1d)
78 |             )
79 |             negatives_keep = tf.cast(tf.reshape(negatives_keep, [batch_size, num_boxes]), tf.float32)
80 |             neg_class_loss = tf.reduce_sum(classification_loss * negatives_keep, axis=-1)
81 |             return neg_class_loss
82 | 
83 |         neg_classification_loss = tf.cond(tf.equal(num_neg_classification_loss, tf.constant(0)), f1, f2)
84 |         classification_loss = pos_classification_loss + neg_classification_loss
85 | 
86 |         total = (classification_loss + self.alpha * pos_regression_loss) / tf.maximum(1.0, tf.cast(num_positives, tf.float32))
87 |         total = total * tf.cast(batch_size, tf.float32)
88 |         return total
89 | 


--------------------------------------------------------------------------------
/losses/tbpp_loss.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from .smooth_l1_loss import SMOOTH_L1_LOSS
 3 | from .softmax_loss import SOFTMAX_LOSS
 4 | 
 5 | 
 6 | class TBPP_LOSS:
 7 |     """ Loss function as defined in the SSD paper.
 8 | 
 9 |     Args:
10 |         - alpha: weight term from the SSD paper. Defaults to 1.
11 |         - min_negative_boxes: the minimum number of negative boxes allowed in the loss calculation. Defaults to 0.
12 |         - negative_boxes_ratio: the ratio of negative boxes to positive boxes. Defaults to 3 (3 times the possible boxes).
13 | 
14 |     Returns:
15 |         - A tensor of shape (batch_size,) where each item in the tensor represents the loss for each batch item.
16 | 
17 |     Paper References:
18 |         - Liao, M., Shi, B., & Bai, X. (2018). TextBoxes++: A Single-Shot Oriented Scene Text Detector. https://arxiv.org/abs/1512.02325
19 |     """
20 | 
21 |     def __init__(
22 |         self,
23 |         alpha=1,
24 |         min_negative_boxes=0,
25 |         negative_boxes_ratio=3,
26 |     ):
27 |         self.alpha = alpha
28 |         self.min_negative_boxes = min_negative_boxes
29 |         self.negative_boxes_ratio = negative_boxes_ratio
30 |         self.smooth_l1_loss = SMOOTH_L1_LOSS()
31 |         self.softmax_loss = SOFTMAX_LOSS()
32 | 
33 |     def compute(self, y_true, y_pred):
34 |         # calculate smooth l1 loss and softmax loss for all boxes
35 |         batch_size = tf.shape(y_true)[0]
36 |         num_boxes = tf.shape(y_true)[1]
37 |         #
38 |         textboxes_true = y_true[:, :, -20:-8]
39 |         textboxes_pred = y_pred[:, :, -12:]
40 |         class_true = y_true[:, :, :-20]
41 |         class_pred = y_pred[:, :, :-12]
42 |         #
43 |         regression_loss = self.smooth_l1_loss.compute(textboxes_true, textboxes_pred)
44 |         # tf.print(regression_loss[0, 0])
45 |         classification_loss = self.softmax_loss.compute(class_true, class_pred)
46 |         # tf.print(classification_loss[0, 0])
47 |         #
48 |         negatives = class_true[:, :, 0]  # (batch_size, num_boxes)
49 |         positives = tf.reduce_max(class_true[:, :, 1:], axis=-1)  # (batch_size, num_boxes)
50 |         num_positives = tf.cast(tf.reduce_sum(positives), tf.int32)
51 |         #
52 |         pos_regression_loss = tf.reduce_sum(regression_loss * positives, axis=-1)
53 |         pos_classification_loss = tf.reduce_sum(classification_loss * positives, axis=-1)
54 |         #
55 |         neg_classification_loss = classification_loss * negatives
56 |         num_neg_classification_loss = tf.math.count_nonzero(neg_classification_loss, dtype=tf.int32)
57 |         num_neg_classification_loss_keep = tf.minimum(
58 |             tf.maximum(self.negative_boxes_ratio * num_positives, self.min_negative_boxes),
59 |             num_neg_classification_loss
60 |         )
61 | 
62 |         def f1():
63 |             return tf.zeros([batch_size])
64 | 
65 |         def f2():
66 |             neg_classification_loss_1d = tf.reshape(neg_classification_loss, [-1])
67 |             _, indices = tf.nn.top_k(
68 |                 neg_classification_loss_1d,
69 |                 k=num_neg_classification_loss_keep,
70 |                 sorted=False
71 |             )
72 |             negatives_keep = tf.scatter_nd(
73 |                 indices=tf.expand_dims(indices, axis=1),
74 |                 updates=tf.ones_like(indices, dtype=tf.int32),
75 |                 shape=tf.shape(neg_classification_loss_1d)
76 |             )
77 |             negatives_keep = tf.cast(tf.reshape(negatives_keep, [batch_size, num_boxes]), tf.float32)
78 |             neg_class_loss = tf.reduce_sum(classification_loss * negatives_keep, axis=-1)
79 |             return neg_class_loss
80 | 
81 |         neg_classification_loss = tf.cond(tf.equal(num_neg_classification_loss, tf.constant(0)), f1, f2)
82 |         classification_loss = pos_classification_loss + neg_classification_loss
83 | 
84 |         total = (classification_loss + self.alpha * pos_regression_loss) / tf.maximum(1.0, tf.cast(num_positives, tf.float32))
85 |         total = total * tf.cast(batch_size, tf.float32)
86 |         return total
87 | 


--------------------------------------------------------------------------------
/networks/__init__.py:
--------------------------------------------------------------------------------
1 | from .ssd_vgg16 import SSD_VGG16
2 | from .ssd_mobilenet import SSD_MOBILENET
3 | from .ssd_mobilenetv2 import SSD_MOBILENETV2
4 | from .tbpp_vgg16 import TBPP_VGG16
5 | 


--------------------------------------------------------------------------------
/networks/base_networks/__init__.py:
--------------------------------------------------------------------------------
1 | from .truncated_vgg16 import TRUNCATED_VGG16
2 | 


--------------------------------------------------------------------------------
/networks/base_networks/truncated_vgg16.py:
--------------------------------------------------------------------------------
 1 | from custom_layers.l2_normalization import L2Normalization
 2 | from tensorflow.keras.models import Model
 3 | from tensorflow.keras.layers import MaxPool2D, Conv2D, Reshape, Concatenate, Activation, Input, ZeroPadding2D, MaxPooling2D
 4 | from tensorflow.python.keras.utils import data_utils
 5 | 
 6 | WEIGHTS_PATH_NO_TOP = ('https://storage.googleapis.com/tensorflow/'
 7 |                        'keras-applications/vgg16/'
 8 |                        'vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5')
 9 | 
10 | 
11 | def TRUNCATED_VGG16(
12 |     input_shape=None,
13 |     kernel_initializer=None,
14 |     kernel_regularizer=None,
15 | ):
16 |     """ A truncated version of VGG16 configuration D
17 |     """
18 |     input_layer = Input(shape=input_shape, name="input")
19 |     # block 1
20 |     conv1_1 = Conv2D(
21 |         64, (3, 3), activation='relu', padding='same', name='block1_conv1',
22 |         kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer)(input_layer)
23 |     conv1_2 = Conv2D(
24 |         64, (3, 3), activation='relu', padding='same', name='block1_conv2',
25 |         kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer)(conv1_1)
26 |     pool1 = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool', padding="same")(conv1_2)
27 | 
28 |     # block 2
29 |     conv2_1 = Conv2D(
30 |         128, (3, 3), activation='relu', padding='same', name='block2_conv1',
31 |         kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer)(pool1)
32 |     conv2_2 = Conv2D(
33 |         128, (3, 3), activation='relu', padding='same', name='block2_conv2',
34 |         kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer)(conv2_1)
35 |     pool2 = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool', padding="same")(conv2_2)
36 | 
37 |     # block 3
38 |     conv3_1 = Conv2D(
39 |         256, (3, 3), activation='relu', padding='same', name='block3_conv1',
40 |         kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer)(pool2)
41 |     conv3_2 = Conv2D(
42 |         256, (3, 3), activation='relu', padding='same', name='block3_conv2',
43 |         kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer)(conv3_1)
44 |     conv3_3 = Conv2D(
45 |         256, (3, 3), activation='relu', padding='same', name='block3_conv3',
46 |         kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer)(conv3_2)
47 |     pool3 = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool', padding="same")(conv3_3)
48 | 
49 |     # block 4
50 |     conv4_1 = Conv2D(
51 |         512, (3, 3), activation='relu', padding='same', name='block4_conv1',
52 |         kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer)(pool3)
53 |     conv4_2 = Conv2D(
54 |         512, (3, 3), activation='relu', padding='same', name='block4_conv2',
55 |         kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer)(conv4_1)
56 |     conv4_3 = Conv2D(
57 |         512, (3, 3), activation='relu', padding='same', name='block4_conv3',
58 |         kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer)(conv4_2)
59 |     pool4 = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool', padding="same")(conv4_3)
60 | 
61 |     # block 5
62 |     conv5_1 = Conv2D(
63 |         512, (3, 3), activation='relu', padding='same', name='block5_conv1',
64 |         kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer)(pool4)
65 |     conv5_2 = Conv2D(
66 |         512, (3, 3), activation='relu', padding='same', name='block5_conv2',
67 |         kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer)(conv5_1)
68 |     conv5_3 = Conv2D(
69 |         512, (3, 3), activation='relu', padding='same', name='block5_conv3',
70 |         kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer)(conv5_2)
71 | 
72 |     model = Model(inputs=input_layer, outputs=conv5_3)
73 | 
74 |     weights_path = data_utils.get_file(
75 |         'vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5',
76 |         WEIGHTS_PATH_NO_TOP,
77 |         cache_subdir='models',
78 |         file_hash='6d6bbae143d832006294945121d1f1fc')
79 | 
80 |     model.load_weights(weights_path, by_name=True)
81 | 
82 |     return model
83 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib==3.3.3
2 | numpy==1.19.4
3 | opencv-python==4.4.0.46
4 | Pillow==8.0.1
5 | tensorflow==2.4.0
6 | tensorflow-estimator==2.4.0
7 | tf-estimator-nightly==2.5.0.dev2021010101
8 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import os
 3 | import json
 4 | import argparse
 5 | import tensorflow as tf
 6 | from tensorflow.keras.applications import vgg16, mobilenet, mobilenet_v2
 7 | import numpy as np
 8 | from glob import glob
 9 | from networks import SSD_VGG16, SSD_MOBILENET, SSD_MOBILENETV2
10 | from utils import inference_utils, textboxes_utils, command_line_utils
11 | 
12 | 
13 | parser = argparse.ArgumentParser(
14 |     description='run inference on an input image.')
15 | parser.add_argument('test_file', type=str, help='path to the test set file.')
16 | parser.add_argument('images_dir', type=str, help='path to images dir.')
17 | parser.add_argument('labels_dir', type=str, help='path to labels dir.')
18 | parser.add_argument('config', type=str, help='path to config file.')
19 | parser.add_argument('weights', type=str, help='path to config file.')
20 | parser.add_argument('--label_maps', type=str, help='path to label maps file.')
21 | parser.add_argument('--num_predictions', type=int,
22 |                     help='the number of detections to be output as final detections', default=10)
23 | parser.add_argument('--output_dir', type=str,
24 |                     help='ouput', default="output")
25 | args = parser.parse_args()
26 | 
27 | assert os.path.exists(args.config), "config file does not exist"
28 | assert args.num_predictions > 0, "num_predictions must be larger than zero"
29 | # assert args.confidence_threshold > 0, "confidence_threshold must be larger than zero."
30 | # assert args.confidence_threshold <= 1, "confidence_threshold must be smaller than or equal to 1."
31 | 
32 | with open(args.config, "r") as config_file:
33 |     config = json.load(config_file)
34 | 
35 | if not os.path.exists(args.output_dir):
36 |     os.makedirs(args.output_dir)
37 | 
38 | input_size = config["model"]["input_size"]
39 | model_config = config["model"]
40 | 
41 | if model_config["name"] == "ssd_mobilenetv2":
42 |     model, process_input_fn, label_maps = inference_utils.inference_ssd_mobilenetv2(
43 |         config, args)
44 | elif model_config["name"] == "ssd_vgg16":
45 |     model, process_input_fn, label_maps = inference_utils.ssd_vgg16(config, args)
46 | else:
47 |     print(
48 |         f"model with name ${model_config['name']} has not been implemented yet")
49 |     exit()
50 | 
51 | model.load_weights(args.weights)
52 | 
53 | with open(args.test_file, "r") as test_set_file:
54 |     tests = test_set_file.readlines()
55 |     for idx, sample in enumerate(tests):
56 |         print(f"{idx+1}/{len(tests)}")
57 |         image_file, label_file = sample.split(" ")
58 |         filename = image_file[:image_file.index(".")]
59 |         image = cv2.imread(os.path.join(args.images_dir, image_file))
60 |         image = np.array(image, dtype=np.float)
61 |         image = np.uint8(image)
62 |         image_height, image_width, _ = image.shape
63 |         height_scale, width_scale = input_size/image_height, input_size/image_width
64 | 
65 |         image = cv2.resize(image, (input_size, input_size))
66 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
67 |         image = process_input_fn(image)
68 | 
69 |         image = np.expand_dims(image, axis=0)
70 |         y_pred = model.predict(image)[0]
71 | 
72 |         with open(os.path.join(args.output_dir, f"{filename}.txt"), "w") as outfile:
73 |             for i, pred in enumerate(y_pred):
74 |                 classname = label_maps[int(pred[0]) - 1].lower()
75 |                 confidence_score = pred[1]
76 |                 pred[[2, 4]] /= width_scale
77 |                 pred[[3, 5]] /= height_scale
78 |                 outfile.write(f"{classname} {confidence_score} {int(pred[2])} {int(pred[3])} {int(pred[4])} {int(pred[5])}\n")
79 | 


--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
1 | python test.py \
2 | /Users/socretlee/CodingDrive/other/object-detection-in-keras/data/easy_example_tests/test.txt \
3 | /Users/socretlee/CodingDrive/other/object-detection-in-keras/data/easy_example_tests/images \
4 | /Users/socretlee/CodingDrive/other/object-detection-in-keras/data/easy_example_tests/labels \
5 | configs/ssd300_vgg16_pascal-voc-2007.json \
6 | /Users/socretlee/CodingDrive/other/object-detection-in-keras/data/cp_275_loss-3.30_valloss-3.84.h5 \
7 | --label_maps=/Users/socretlee/CodingDrive/other/object-detection-in-keras/data/easy_example_tests/label_maps.txt \
8 | --output_dir=/Users/socretlee/CodingDrive/other/object-detection-in-keras/output/cp_275_loss-3.30_valloss-3.84.h5 \
9 | --num_predictions=3


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | from utils import training_utils, command_line_utils
  2 | from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger, TerminateOnNaN, LearningRateScheduler
  3 | import argparse
  4 | import json
  5 | import os
  6 | 
  7 | 
  8 | parser = argparse.ArgumentParser(
  9 |     description='Start the training process of a particular network.')
 10 | parser.add_argument('config', type=str, help='path to config file.')
 11 | parser.add_argument('images_dir', type=str, help='path to images dir.')
 12 | parser.add_argument('labels_dir', type=str, help='path to labels dir.')
 13 | #
 14 | parser.add_argument('--training_split', type=str,
 15 |                     help='path to training split file.')
 16 | parser.add_argument('--validation_split', type=str,
 17 |                     help='path to validation split file.')
 18 | #
 19 | parser.add_argument('--label_maps', type=str, help='path to label maps file.')
 20 | #
 21 | parser.add_argument('--checkpoint', type=str,
 22 |                     help='path to checkpoint weight file.')
 23 | #
 24 | parser.add_argument('--learning_rate', type=float,
 25 |                     help='learning rate used in training.', default=10e-3)
 26 | parser.add_argument('--epochs', type=int,
 27 |                     help='the number of epochs to train', default=100)
 28 | parser.add_argument('--initial_epoch', type=int,
 29 |                     help='the initial epochs to start from', default=0)
 30 | parser.add_argument('--batch_size', type=int,
 31 |                     help='the batch size used in training', default=32)
 32 | parser.add_argument('--shuffle', type=command_line_utils.str2bool, nargs='?',
 33 |                     help='whether to shuffle the dataset when creating the batch', default=True)
 34 | parser.add_argument('--augment', type=command_line_utils.str2bool,
 35 |                     nargs='?', help='whether to augment training samples', default=False)
 36 | parser.add_argument('--schedule_lr', type=command_line_utils.str2bool,
 37 |                     nargs='?', help='whether to use the lr scheduler', default=True)
 38 | parser.add_argument('--show_network_structure', type=command_line_utils.str2bool,
 39 |                     nargs='?', help='whether to print out the network structure when constructing the network', default=False)
 40 | parser.add_argument('--output_dir', type=str,
 41 |                     help='path to config file.', default="output")
 42 | args = parser.parse_args()
 43 | 
 44 | assert os.path.exists(args.config), "config file does not exist"
 45 | assert os.path.exists(args.images_dir), "images_dir does not exist"
 46 | assert os.path.exists(args.labels_dir), "labels_dir does not exist"
 47 | assert args.epochs > 0, "epochs must be larger than zero"
 48 | assert args.batch_size > 0, "batch_size must be larger than 0"
 49 | assert args.learning_rate > 0, "learning_rate must be larger than 0"
 50 | 
 51 | if args.label_maps is not None:
 52 |     assert os.path.exists(args.label_maps), "label_maps file does not exist"
 53 | 
 54 | if not os.path.exists(args.output_dir):
 55 |     os.makedirs(args.output_dir)
 56 | 
 57 | with open(args.config, "r") as config_file:
 58 |     config = json.load(config_file)
 59 | 
 60 | model_config = config["model"]
 61 | 
 62 | if model_config["name"] == "ssd_mobilenetv1":
 63 |     training_utils.ssd_mobilenetv1(config, args)
 64 | elif model_config["name"] == "ssd_mobilenetv2":
 65 |     training_utils.ssd_mobilenetv2(config, args)
 66 | elif model_config["name"] == "ssd_vgg16":
 67 |     # configure callbacks here
 68 |     callbacks = [
 69 |         ModelCheckpoint(
 70 |             filepath=os.path.join(
 71 |                 args.output_dir,
 72 |                 "cp_{epoch:02d}_loss-{loss:.2f}.h5" if args.validation_split is None else "cp_{epoch:02d}_loss-{loss:.2f}_valloss-{val_loss:.2f}.h5"
 73 |             ),
 74 |             save_weights_only=False,
 75 |             save_best_only=True,
 76 |             monitor='loss' if args.validation_split is None else 'val_loss',
 77 |             mode='min'
 78 |         ),
 79 |         CSVLogger(
 80 |             os.path.join(args.output_dir, "training.csv"),
 81 |             append=False
 82 |         ),
 83 |         TerminateOnNaN(),
 84 |     ]
 85 | 
 86 |     if (args.schedule_lr):
 87 |         def lr_schedule(epoch):
 88 |             if epoch < 108:
 89 |                 return args.learning_rate
 90 |             elif epoch < 146:
 91 |                 return 0.0001
 92 |             else:
 93 |                 return 0.00001
 94 |         callbacks.append(LearningRateScheduler(schedule=lr_schedule, verbose=1))
 95 | 
 96 |     training_utils.ssd_vgg16(config, args, callbacks)
 97 | elif model_config["name"] == "tbpp_vgg16":
 98 |     training_utils.tbpp_vgg16(config, args)
 99 | else:
100 |     print(
101 |         f"model with name ${model_config['name']} has not been implemented yet")
102 |     exit()
103 | 


--------------------------------------------------------------------------------
/train.sh:
--------------------------------------------------------------------------------
 1 | python train.py \
 2 | configs/ssd300_vgg16_pascal-voc-07-12.json \
 3 | /Users/socretlee/CodingDrive/other/object-detection-in-keras/data/pascal-voc-07-12/images \
 4 | /Users/socretlee/CodingDrive/other/object-detection-in-keras/data/pascal-voc-07-12/labels \
 5 | --training_split=/Users/socretlee/CodingDrive/other/object-detection-in-keras/data/pascal-voc-07-12/train.txt \
 6 | --validation_split=/Users/socretlee/CodingDrive/other/object-detection-in-keras/data/pascal-voc-07-12/val.txt \
 7 | --label_maps=/Users/socretlee/CodingDrive/other/object-detection-in-keras/data/pascal-voc-07-12/label_maps.txt \
 8 | --learning_rate=0.001 \
 9 | --epochs=100 \
10 | --batch_size=3 \
11 | --shuffle=True \
12 | --augment=True \
13 | --output_dir=output/ssd300_vgg16_pascal-voc-2007


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .one_hot_class_label import one_hot_class_label
2 | 


--------------------------------------------------------------------------------
/utils/augmentation_utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .random_brightness import random_brightness
 2 | from .random_contrast import random_contrast
 3 | from .random_crop_quad import random_crop_quad
 4 | from .random_crop import random_crop
 5 | from .random_expand_quad import random_expand_quad
 6 | from .random_expand import random_expand
 7 | from .random_horizontal_flip_quad import random_horizontal_flip_quad
 8 | from .random_horizontal_flip import random_horizontal_flip
 9 | from .random_hue import random_hue
10 | from .random_lighting_noise import random_lighting_noise
11 | from .random_saturation import random_saturation
12 | from .random_vertical_flip_quad import random_vertical_flip_quad
13 | from .random_vertical_flip import random_vertical_flip
14 | from .resize_to_fixed_size import resize_to_fixed_size
15 | from .bboxes_filter import bboxes_filter
16 | 


--------------------------------------------------------------------------------
/utils/augmentation_utils/bboxes_filter.py:
--------------------------------------------------------------------------------
 1 | def bboxes_filter():
 2 |     """
 3 |     """
 4 |     def _augment(
 5 |         image,
 6 |         bboxes,
 7 |         classes=None
 8 |     ):
 9 |         return image, bboxes, classes
10 |     return _augment
11 | 


--------------------------------------------------------------------------------
/utils/augmentation_utils/random_brightness.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import random
 3 | import numpy as np
 4 | 
 5 | 
 6 | def random_brightness(
 7 |     min_delta=-32,
 8 |     max_delta=32,
 9 |     p=0.5
10 | ):
11 |     """ Changes the brightness of an image by adding/subtracting a delta value to/from each pixel.
12 |     The image format is assumed to be BGR to match Opencv's standard.
13 | 
14 |     Args:
15 |         - image: numpy array representing the input image.
16 |         - bboxes: numpy array representing the bounding boxes.
17 |         - classes: the list of classes associating with each bounding boxes.
18 |         - min_delta: minimum delta value.
19 |         - max_delta: maximum delta value.
20 |         - p: The probability with which the brightness is changed
21 | 
22 |     Returns:
23 |         - image: The modified image
24 |         - bboxes: The unmodified bounding boxes
25 |         - classes: The unmodified bounding boxes
26 | 
27 |     Raises:
28 |         - min_delta is less than -255.0
29 |         - max_delta is larger than 255.0
30 |         - p is smaller than zero
31 |         - p is larger than 1
32 | 
33 |     Webpage References:
34 |         - https://www.telesens.co/2018/06/28/data-augmentation-in-ssd/
35 | 
36 |     Code References:
37 |         - https://github.com/amdegroot/ssd.pytorch/blob/master/utils/augmentations.py
38 |     """
39 |     assert min_delta >= -255.0, "min_delta must be larger than -255.0"
40 |     assert max_delta <= 255.0, "max_delta must be less than 255.0"
41 |     assert p >= 0, "p must be larger than or equal to zero"
42 |     assert p <= 1, "p must be less than or equal to 1"
43 | 
44 |     def _augment(image, bboxes=None, classes=None):
45 |         if (random.random() > p):
46 |             return image, bboxes, classes
47 | 
48 |         temp_image = image.copy()
49 |         d = random.uniform(min_delta, max_delta)
50 |         temp_image += d
51 |         temp_image = np.clip(temp_image, 0, 255)
52 |         return temp_image, bboxes, classes
53 | 
54 |     return _augment
55 | 


--------------------------------------------------------------------------------
/utils/augmentation_utils/random_contrast.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import random
 3 | import numpy as np
 4 | 
 5 | 
 6 | def random_contrast(
 7 |     min_delta=0.5,
 8 |     max_delta=1.5,
 9 |     p=0.5
10 | ):
11 |     """ Changes the contrast of an image by increasing/decreasing each pixel by a factor of delta.
12 |     The image format is assumed to be BGR to match Opencv's standard.
13 | 
14 |     Args:
15 |         - image: numpy array representing the input image.
16 |         - bboxes: numpy array representing the bounding boxes.
17 |         - classes: the list of classes associating with each bounding boxes.
18 |         - min_delta: minimum delta value.
19 |         - max_delta: maximum delta value.
20 |         - p: The probability with which the contrast is changed
21 | 
22 |     Returns:
23 |         - image: The modified image
24 |         - bboxes: The unmodified bounding boxes
25 |         - classes: The unmodified bounding boxes
26 | 
27 |     Raises:
28 |         - min_delta is less than 0
29 |         - max_delta is less than min_delta
30 |         - p is smaller than zero
31 |         - p is larger than 1
32 | 
33 |     Webpage References:
34 |         - https://www.telesens.co/2018/06/28/data-augmentation-in-ssd/
35 | 
36 |     Code References:
37 |         - https://github.com/amdegroot/ssd.pytorch/blob/master/utils/augmentations.py
38 |     """
39 |     assert min_delta >= 0.0, "min_delta must be larger than zero"
40 |     assert max_delta >= min_delta, "max_delta must be larger than min_delta"
41 |     assert p >= 0, "p must be larger than or equal to zero"
42 |     assert p <= 1, "p must be less than or equal to 1"
43 | 
44 |     def _augment(
45 |         image,
46 |         bboxes=None,
47 |         classes=None,
48 |     ):
49 |         if (random.random() > p):
50 |             return image, bboxes, classes
51 | 
52 |         temp_image = image.copy()
53 |         d = random.uniform(min_delta, max_delta)
54 |         temp_image *= d
55 |         temp_image = np.clip(temp_image, 0, 255)
56 |         return temp_image, bboxes, classes
57 | 
58 |     return _augment
59 | 


--------------------------------------------------------------------------------
/utils/augmentation_utils/random_crop.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import random
  3 | import numpy as np
  4 | from utils.bbox_utils import iou
  5 | 
  6 | 
  7 | def random_crop(
  8 |     min_size=0.1,
  9 |     max_size=1,
 10 |     min_ar=1,
 11 |     max_ar=2,
 12 |     overlap_modes=[
 13 |         None,
 14 |         [0.1, None],
 15 |         [0.3, None],
 16 |         [0.7, None],
 17 |         [0.9, None],
 18 |         [None, None],
 19 |     ],
 20 |     max_attempts=100,
 21 |     p=0.5,
 22 | ):
 23 |     """ Randomly crops a patch from the image.
 24 | 
 25 |     Args:
 26 |         - image: numpy array representing the input image.
 27 |         - bboxes: numpy array representing the bounding boxes.
 28 |         - classes: the list of classes associating with each bounding boxes.
 29 |         - min_size: the maximum size a crop can be
 30 |         - max_size: the maximum size a crop can be
 31 |         - min_ar: the minimum aspect ratio a crop can be
 32 |         - max_ar: the maximum aspect ratio a crop can be
 33 |         - overlap_modes: the list of overlapping modes the function can randomly choose from.
 34 |         - max_attempts: the max number of attempts to generate a patch.
 35 | 
 36 |     Returns:
 37 |         - image: the modified image
 38 |         - bboxes: the modified bounding boxes
 39 |         - classes: the modified classes
 40 | 
 41 |     Webpage References:
 42 |         - https://www.telesens.co/2018/06/28/data-augmentation-in-ssd/
 43 | 
 44 |     Code References:
 45 |         - https://github.com/amdegroot/ssd.pytorch/blob/master/utils/augmentations.py
 46 |     """
 47 |     assert p >= 0, "p must be larger than or equal to zero"
 48 |     assert p <= 1, "p must be less than or equal to 1"
 49 |     assert min_size > 0, "min_size must be larger than zero."
 50 |     assert max_size <= 1, "max_size must be less than or equals to one."
 51 |     assert max_size > min_size, "max_size must be larger than min_size."
 52 |     assert max_ar > min_ar, "max_ar must be larger than min_ar."
 53 |     assert max_attempts > 0, "max_attempts must be larger than zero."
 54 | 
 55 |     def _augment(image, bboxes, classes):
 56 | 
 57 |         if (random.random() > p):
 58 |             return image, bboxes, classes
 59 | 
 60 |         height, width, channels = image.shape
 61 |         overlap_mode = random.choice(overlap_modes)
 62 | 
 63 |         if overlap_mode == None:
 64 |             return image, bboxes, classes
 65 | 
 66 |         min_iou, max_iou = overlap_mode
 67 | 
 68 |         if min_iou == None:
 69 |             min_iou = float(-np.inf)
 70 | 
 71 |         if max_iou == None:
 72 |             max_iou = float(np.inf)
 73 | 
 74 |         temp_image = image.copy()
 75 | 
 76 |         for i in range(max_attempts):
 77 |             crop_w = random.uniform(min_size * width, max_size * width)
 78 |             crop_h = random.uniform(min_size * height, max_size * height)
 79 |             crop_ar = crop_h / crop_w
 80 | 
 81 |             if crop_ar < min_ar or crop_ar > max_ar:  # crop ar does not match criteria, next attempt
 82 |                 continue
 83 | 
 84 |             crop_left = random.uniform(0, width-crop_w)
 85 |             crop_top = random.uniform(0, height-crop_h)
 86 | 
 87 |             crop_rect = np.array([crop_left, crop_top, crop_left + crop_w, crop_top + crop_h], dtype=np.float)
 88 |             crop_rect = np.expand_dims(crop_rect, axis=0)
 89 |             crop_rect = np.tile(crop_rect, (bboxes.shape[0], 1))
 90 | 
 91 |             ious = iou(crop_rect, bboxes)
 92 | 
 93 |             if ious.min() < min_iou and ious.max() > max_iou:
 94 |                 continue
 95 | 
 96 |             bbox_centers = np.zeros((bboxes.shape[0], 2), dtype=np.float)
 97 |             bbox_centers[:, 0] = (bboxes[:, 0] + bboxes[:, 2]) / 2
 98 |             bbox_centers[:, 1] = (bboxes[:, 1] + bboxes[:, 3]) / 2
 99 | 
100 |             cx_in_crop = (bbox_centers[:, 0] > crop_left) * (bbox_centers[:, 0] < crop_left + crop_w)
101 |             cy_in_crop = (bbox_centers[:, 1] > crop_top) * (bbox_centers[:, 1] < crop_top + crop_h)
102 |             boxes_in_crop = cx_in_crop * cy_in_crop
103 | 
104 |             if not boxes_in_crop.any():
105 |                 continue
106 | 
107 |             temp_image = temp_image[int(crop_top): int(crop_top+crop_h), int(crop_left): int(crop_left+crop_w), :]
108 |             temp_classes = np.array(classes, dtype=np.object)
109 |             temp_classes = temp_classes[boxes_in_crop]
110 |             temp_bboxes = bboxes[boxes_in_crop]
111 |             crop_rect = np.array([crop_left, crop_top, crop_left + crop_w, crop_top + crop_h], dtype=np.float)
112 |             crop_rect = np.expand_dims(crop_rect, axis=0)
113 |             crop_rect = np.tile(crop_rect, (temp_bboxes.shape[0], 1))
114 |             temp_bboxes[:, :2] = np.maximum(temp_bboxes[:, :2], crop_rect[:, :2])  # if bboxes top left is out of crop then use crop's xmin, ymin
115 |             temp_bboxes[:, :2] -= crop_rect[:, :2]  # translate xmin, ymin to fit crop
116 |             temp_bboxes[:, 2:] = np.minimum(temp_bboxes[:, 2:], crop_rect[:, 2:])
117 |             temp_bboxes[:, 2:] -= crop_rect[:, :2]  # translate xmax, ymax to fit crop
118 | 
119 |             return temp_image, temp_bboxes, temp_classes.tolist()
120 | 
121 |         return image, bboxes, classes
122 | 
123 |     return _augment
124 | 


--------------------------------------------------------------------------------
/utils/augmentation_utils/random_crop_quad.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import random
  3 | import numpy as np
  4 | from utils.bbox_utils import iou, object_coverage
  5 | from utils.textboxes_utils import get_bboxes_from_quads
  6 | 
  7 | def random_crop_quad(
  8 |     image,
  9 |     quads,
 10 |     classes,
 11 |     min_size=0.1,
 12 |     max_size=1,
 13 |     min_ar=1,
 14 |     max_ar=2,
 15 |     overlap_modes=[
 16 |         None,
 17 |         [0.1, None],
 18 |         [0.3, None],
 19 |         [0.7, None],
 20 |         [0.9, None],
 21 |         [None, None],
 22 |     ],
 23 |     max_attempts=100,
 24 |     p=0.5
 25 | ):
 26 |     """ Randomly crops a patch from the image.
 27 | 
 28 |     Args:
 29 |         - image: numpy array representing the input image.
 30 |         - quads: numpy array representing the quads.
 31 |         - classes: the list of classes associating with each quads.
 32 |         - min_size: the maximum size a crop can be
 33 |         - max_size: the maximum size a crop can be
 34 |         - min_ar: the minimum aspect ratio a crop can be
 35 |         - max_ar: the maximum aspect ratio a crop can be
 36 |         - overlap_modes: the list of overlapping modes the function can randomly choose from.
 37 |         - max_attempts: the max number of attempts to generate a patch.
 38 | 
 39 |     Returns:
 40 |         - image: the modified image
 41 |         - quads: the modified quads
 42 |         - classes: the modified classes
 43 |     """
 44 |     assert p >= 0, "p must be larger than or equal to zero"
 45 |     assert p <= 1, "p must be less than or equal to 1"
 46 |     assert min_size > 0, "min_size must be larger than zero."
 47 |     assert max_size <= 1, "max_size must be less than or equals to one."
 48 |     assert max_size > min_size, "max_size must be larger than min_size."
 49 |     assert max_ar > min_ar, "max_ar must be larger than min_ar."
 50 |     assert max_attempts > 0, "max_attempts must be larger than zero."
 51 | 
 52 |     # if (random.random() > p):
 53 |     #     return image, bboxes, classes
 54 | 
 55 |     height, width, channels = image.shape
 56 |     overlap_mode = [0.7, None]
 57 |     # overlap_mode = random.choice(overlap_modes)
 58 | 
 59 |     # if overlap_mode == None:
 60 |     #     return image, bboxes, classes
 61 | 
 62 |     bboxes = get_bboxes_from_quads(quads)
 63 | 
 64 |     min_iou, max_iou = overlap_mode
 65 | 
 66 |     if min_iou == None:
 67 |         min_iou = float(-np.inf)
 68 | 
 69 |     if max_iou == None:
 70 |         max_iou = float(np.inf)
 71 | 
 72 |     temp_image = image.copy()
 73 | 
 74 |     for i in range(max_attempts):
 75 |         crop_w = random.uniform(min_size * width, max_size * width)
 76 |         crop_h = random.uniform(min_size * height, max_size * height)
 77 |         crop_ar = crop_h / crop_w
 78 | 
 79 |         if crop_ar < min_ar or crop_ar > max_ar:  # crop ar does not match criteria, next attempt
 80 |             continue
 81 | 
 82 |         crop_left = random.uniform(0, width-crop_w)
 83 |         crop_top = random.uniform(0, height-crop_h)
 84 | 
 85 |         crop_rect = np.array([crop_left, crop_top, crop_left + crop_w, crop_top + crop_h], dtype=np.float)
 86 |         crop_rect = np.expand_dims(crop_rect, axis=0)
 87 |         crop_rect = np.tile(crop_rect, (bboxes.shape[0], 1))
 88 | 
 89 |         ious = iou(crop_rect, bboxes)
 90 |         obj_coverage = object_coverage(crop_rect, bboxes)
 91 | 
 92 | 
 93 |         if (ious.min() < min_iou and ious.max() > max_iou) or (obj_coverage.min() < min_iou and obj_coverage.max() > max_iou):
 94 |             continue
 95 | 
 96 |         bbox_centers = np.zeros((bboxes.shape[0], 2), dtype=np.float)
 97 |         bbox_centers[:, 0] = (bboxes[:, 0] + bboxes[:, 2]) / 2
 98 |         bbox_centers[:, 1] = (bboxes[:, 1] + bboxes[:, 3]) / 2
 99 | 
100 |         cx_in_crop = (bbox_centers[:, 0] > crop_left) * (bbox_centers[:, 0] < crop_left + crop_w)
101 |         cy_in_crop = (bbox_centers[:, 1] > crop_top) * (bbox_centers[:, 1] < crop_top + crop_h)
102 |         boxes_in_crop = cx_in_crop * cy_in_crop
103 | 
104 |         if not boxes_in_crop.any():
105 |             continue
106 | 
107 |         print(ious, obj_coverage, boxes_in_crop)
108 |         print("======")
109 | 
110 |         temp_image = temp_image[int(crop_top): int(crop_top+crop_h), int(crop_left): int(crop_left+crop_w), :]
111 |         temp_classes = np.array(classes, dtype=np.object)
112 |         temp_classes = temp_classes[boxes_in_crop]
113 |         temp_bboxes = bboxes[boxes_in_crop]
114 |         temp_quads = quads[boxes_in_crop]
115 |         crop_rect = np.array([crop_left, crop_top, crop_left + crop_w, crop_top + crop_h], dtype=np.float)
116 |         crop_rect = np.expand_dims(crop_rect, axis=0)
117 |         crop_rect = np.tile(crop_rect, (temp_bboxes.shape[0], 1))
118 | 
119 |         print(temp_quads.shape)
120 |         temp_bboxes[:, :2] = np.maximum(temp_bboxes[:, :2], crop_rect[:, :2])  # if bboxes top left is out of crop then use crop's xmin, ymin
121 |         temp_bboxes[:, :2] -= crop_rect[:, :2]  # translate xmin, ymin to fit crop
122 |         temp_bboxes[:, 2:] = np.minimum(temp_bboxes[:, 2:], crop_rect[:, 2:])
123 |         temp_bboxes[:, 2:] -= crop_rect[:, :2]  # translate xmax, ymax to fit crop
124 |         return temp_image, temp_quads, temp_classes.tolist()
125 | 
126 |     return image, bboxes, classes
127 | 


--------------------------------------------------------------------------------
/utils/augmentation_utils/random_expand.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | 
 4 | 
 5 | def random_expand(
 6 |     min_ratio=1,
 7 |     max_ratio=16,
 8 |     # mean=[0.406, 0.456, 0.485],  # BGR
 9 |     mean=[104, 117, 123],  # BGR
10 |     # mean=[0.406, 0.456, 0.485],  # BGR
11 |     p=0.5
12 | ):
13 |     """ Randomly expands an image and bounding boxes by a ratio between min_ratio and max_ratio. The image format is assumed to be BGR to match Opencv's standard.
14 | 
15 |     Args:
16 |         - image: numpy array representing the input image.
17 |         - bboxes: numpy array representing the bounding boxes.
18 |         - classes: the list of classes associating with each bounding boxes.
19 |         - min_ratio: The minimum value to expand the image. Defaults to 1.
20 |         - max_ratio: The maximum value to expand the image. Defaults to 4.
21 |         - p: The probability with which the image is expanded
22 | 
23 |     Returns:
24 |         - image: The modified image
25 |         - bboxes: The modified bounding boxes
26 |         - classes: The unmodified bounding boxes
27 | 
28 |     Raises:
29 |         - p is smaller than zero
30 |         - p is larger than 1
31 | 
32 |     Webpage References:
33 |         - https://www.telesens.co/2018/06/28/data-augmentation-in-ssd/
34 | 
35 |     Code References:
36 |         - https://github.com/amdegroot/ssd.pytorch/blob/master/utils/augmentations.py
37 |     """
38 |     assert p >= 0, "p must be larger than or equal to zero"
39 |     assert p <= 1, "p must be less than or equal to 1"
40 |     assert min_ratio > 0, "min_ratio must be larger than zero"
41 |     assert max_ratio > min_ratio, "max_ratio must be larger than min_ratio"
42 | 
43 |     def _augment(image, bboxes, classes):
44 |         if (random.random() > p):
45 |             return image, bboxes, classes
46 | 
47 |         height, width, depth = image.shape
48 |         ratio = random.uniform(min_ratio, max_ratio)
49 |         left = random.uniform(0, width * ratio - width)
50 |         top = random.uniform(0, height * ratio - height)
51 |         temp_image = np.zeros(
52 |             (int(height * ratio), int(width * ratio), depth),
53 |             dtype=image.dtype
54 |         )
55 |         temp_image[:, :, :] = mean
56 |         temp_image[int(top):int(top+height), int(left):int(left+width)] = image
57 |         temp_bboxes = bboxes.copy()
58 |         temp_bboxes[:, :2] += (int(left), int(top))
59 |         temp_bboxes[:, 2:] += (int(left), int(top))
60 |         return temp_image, temp_bboxes, classes
61 | 
62 |     return _augment
63 | 


--------------------------------------------------------------------------------
/utils/augmentation_utils/random_expand_quad.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | 
 4 | 
 5 | def random_expand_quad(
 6 |     image,
 7 |     quads,
 8 |     classes,
 9 |     min_ratio=1,
10 |     max_ratio=4,
11 |     mean=[0.406, 0.456, 0.485],  # BGR
12 |     p=0.5
13 | ):
14 |     """ Randomly expands an image and quadrilaterals by a ratio between min_ratio and max_ratio. The image format is assumed to be BGR to match Opencv's standard.
15 | 
16 |     Args:
17 |         - image: numpy array representing the input image.
18 |         - quads: numpy array representing the quadrilaterals.
19 |         - classes: the list of classes associating with each quadrilaterals.
20 |         - min_ratio: The minimum value to expand the image. Defaults to 1.
21 |         - max_ratio: The maximum value to expand the image. Defaults to 4.
22 |         - p: The probability with which the image is expanded
23 | 
24 |     Returns:
25 |         - image: The modified image
26 |         - quads: The modified quadrilaterals
27 |         - classes: The unmodified quadrilaterals
28 | 
29 |     Raises:
30 |         - p is smaller than zero
31 |         - p is larger than 1
32 |     """
33 |     assert p >= 0, "p must be larger than or equal to zero"
34 |     assert p <= 1, "p must be less than or equal to 1"
35 |     assert min_ratio > 0, "min_ratio must be larger than zero"
36 |     assert max_ratio > min_ratio, "max_ratio must be larger than min_ratio"
37 | 
38 |     if (random.random() > p):
39 |         return image, quads, classes
40 | 
41 |     height, width, depth = image.shape
42 |     ratio = random.uniform(min_ratio, max_ratio)
43 |     left = random.uniform(0, width * ratio - width)
44 |     top = random.uniform(0, height * ratio - height)
45 |     temp_image = np.zeros(
46 |         (int(height * ratio), int(width * ratio), depth),
47 |         dtype=image.dtype
48 |     )
49 |     temp_image[:, :, :] = mean
50 |     temp_image[int(top):int(top+height), int(left):int(left+width)] = image
51 |     temp_quads = quads.copy()
52 |     temp_quads[:, :, 0] = quads[:, :, 0] + int(left)
53 |     temp_quads[:, :, 1] = quads[:, :, 1] + int(top)
54 |     return temp_image, temp_quads, classes
55 | 


--------------------------------------------------------------------------------
/utils/augmentation_utils/random_horizontal_flip.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import random
 4 | 
 5 | 
 6 | def random_horizontal_flip(p=0.5):
 7 |     """ Randomly flipped the image horizontally. The image format is assumed to be BGR to match Opencv's standard.
 8 | 
 9 |     Args:
10 |         - image: numpy array representing the input image.
11 |         - bboxes: numpy array representing the bounding boxes.
12 |         - classes: the list of classes associating with each bounding boxes.
13 |         - p: The probability with which the image is flipped horizontally
14 | 
15 |     Returns:
16 |         - image: The modified image
17 |         - bboxes: The modified bounding boxes
18 |         - classes: The unmodified bounding boxes
19 | 
20 |     Raises:
21 |         - p is smaller than zero
22 |         - p is larger than 1
23 | 
24 |     Webpage References:
25 |         - https://www.kdnuggets.com/2018/09/data-augmentation-bounding-boxes-image-transforms.html/2
26 | 
27 |     Code References:
28 |         - https://github.com/amdegroot/ssd.pytorch/blob/master/utils/augmentations.py
29 |     """
30 |     assert p >= 0, "p must be larger than or equal to zero"
31 |     assert p <= 1, "p must be less than or equal to 1"
32 | 
33 |     def _augment(image, bboxes, classes):
34 | 
35 |         if (random.random() > p):
36 |             return image, bboxes, classes
37 | 
38 |         temp_bboxes = bboxes.copy()
39 |         image_center = np.array(image.shape[:2])[::-1]/2
40 |         image_center = np.hstack((image_center, image_center))
41 |         temp_bboxes[:, [0, 2]] += 2*(image_center[[0, 2]] - temp_bboxes[:, [0, 2]])
42 |         boxes_width = abs(temp_bboxes[:, 0] - temp_bboxes[:, 2])
43 |         temp_bboxes[:, 0] -= boxes_width
44 |         temp_bboxes[:, 2] += boxes_width
45 |         return np.array(cv2.flip(np.uint8(image), 1), dtype=np.float), temp_bboxes, classes
46 | 
47 |     return _augment
48 | 


--------------------------------------------------------------------------------
/utils/augmentation_utils/random_horizontal_flip_quad.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import random
 4 | 
 5 | 
 6 | def random_horizontal_flip_quad(
 7 |     image,
 8 |     quads,
 9 |     classes=None,
10 |     p=0.5
11 | ):
12 |     """ Randomly flipped the image horizontally. The image format is assumed to be BGR to match Opencv's standard.
13 | 
14 |     Args:
15 |         - image: numpy array representing the input image.
16 |         - quads: numpy array representing the quadrilaterals.
17 |         - classes: the list of classes associating with each quadrilaterals.
18 |         - p: The probability with which the image is flipped horizontally
19 | 
20 |     Returns:
21 |         - image: The modified image
22 |         - quads: The modified quadrilaterals
23 |         - classes: The unmodified bounding boxes
24 | 
25 |     Raises:
26 |         - p is smaller than zero
27 |         - p is larger than 1
28 |     """
29 | 
30 |     assert p >= 0, "p must be larger than or equal to zero"
31 |     assert p <= 1, "p must be less than or equal to 1"
32 | 
33 |     # if (random.random() > p):
34 |     #     return image, quads, classes
35 | 
36 |     temp_quads = quads.copy()
37 |     temp_quads[:, :, 0] = image.shape[1] - quads[:, :, 0]
38 |     temp = temp_quads.copy()
39 |     temp_quads[:, 0] = temp[:, 1]
40 |     temp_quads[:, 1] = temp[:, 0]
41 |     temp_quads[:, 2] = temp[:, 3]
42 |     temp_quads[:, 3] = temp[:, 2]
43 |     return np.array(cv2.flip(np.uint8(image), 1), dtype=np.float), temp_quads, classes
44 | 


--------------------------------------------------------------------------------
/utils/augmentation_utils/random_hue.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import random
 3 | import numpy as np
 4 | 
 5 | 
 6 | def random_hue(
 7 |     min_delta=-18,
 8 |     max_delta=18,
 9 |     p=0.5
10 | ):
11 |     """ Changes the Hue of an image by adding/subtracting a delta value
12 |     to/from each value in the Hue channel of the image. The image format
13 |     is assumed to be BGR to match Opencv's standard.
14 | 
15 |     Args:
16 |         - image: numpy array representing the input image.
17 |         - bboxes: numpy array representing the bounding boxes.
18 |         - classes: the list of classes associating with each bounding boxes.
19 |         - min_delta: minimum delta value.
20 |         - max_delta: maximum delta value.
21 |         - p: The probability with which the contrast is changed
22 | 
23 |     Returns:
24 |         - image: The modified image
25 |         - bboxes: The unmodified bounding boxes
26 |         - classes: The unmodified bounding boxes
27 | 
28 |     Raises:
29 |         - min_delta is less than -360.0
30 |         - max_delta is larger than 360.0
31 |         - p is smaller than zero
32 |         - p is larger than 1
33 | 
34 |     Webpage References:
35 |         - https://www.telesens.co/2018/06/28/data-augmentation-in-ssd/
36 | 
37 |     Code References:
38 |         - https://github.com/amdegroot/ssd.pytorch/blob/master/utils/augmentations.py
39 |     """
40 |     assert min_delta >= -360.0, "min_delta must be larger than -360.0"
41 |     assert max_delta <= 360.0, "max_delta must be less than 360.0"
42 |     assert p >= 0, "p must be larger than or equal to zero"
43 |     assert p <= 1, "p must be less than or equal to 1"
44 | 
45 |     def _augment(
46 |         image,
47 |         bboxes=None,
48 |         classes=None
49 |     ):
50 |         if (random.random() > p):
51 |             return image, bboxes, classes
52 | 
53 |         temp_image = cv2.cvtColor(np.uint8(image), cv2.COLOR_BGR2HSV)
54 |         temp_image = np.array(temp_image, dtype=np.float)
55 |         d = random.uniform(min_delta, max_delta)
56 |         temp_image[:, :, 0] += d
57 |         temp_image = np.clip(temp_image, 0, 360)
58 |         temp_image = cv2.cvtColor(np.uint8(temp_image), cv2.COLOR_HSV2BGR)
59 |         temp_image = np.array(temp_image, dtype=np.float)
60 |         return temp_image, bboxes, classes
61 | 
62 |     return _augment
63 | 


--------------------------------------------------------------------------------
/utils/augmentation_utils/random_lighting_noise.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import random
 3 | import numpy as np
 4 | 
 5 | 
 6 | def random_lighting_noise(p=0.5):
 7 |     """ Changes the lighting of the image by randomly swapping the channels.
 8 |     The image format is assumed to be BGR to match Opencv's standard.
 9 | 
10 |     Args:
11 |         - image: numpy array representing the input image.
12 |         - bboxes: numpy array representing the bounding boxes.
13 |         - classes: the list of classes associating with each bounding boxes.
14 |         - p: The probability with which the contrast is changed
15 | 
16 |     Returns:
17 |         - image: The modified image
18 |         - bboxes: The unmodified bounding boxes
19 |         - classes: The unmodified bounding boxes
20 | 
21 |     Raises:
22 |         - p is smaller than zero
23 |         - p is larger than 1
24 | 
25 |     Webpage References:
26 |         - https://www.telesens.co/2018/06/28/data-augmentation-in-ssd/
27 | 
28 |     Code References:
29 |         - https://github.com/amdegroot/ssd.pytorch/blob/master/utils/augmentations.py
30 |     """
31 |     assert p >= 0, "p must be larger than or equal to zero"
32 |     assert p <= 1, "p must be less than or equal to 1"
33 | 
34 |     def _augment(image, bboxes=None, classes=None):
35 |         if (random.random() > p):
36 |             return image, bboxes, classes
37 | 
38 |         temp_image = image.copy()
39 |         perms = [
40 |             (0, 1, 2),
41 |             (0, 2, 1),
42 |             (1, 0, 2),
43 |             (1, 2, 0),
44 |             (2, 0, 1),
45 |             (2, 1, 0)
46 |         ]
47 |         selected_perm = random.randint(0, len(perms) - 1)
48 |         perm = perms[selected_perm]
49 |         temp_image = temp_image[:, :, perm]
50 |         return temp_image, bboxes, classes
51 | 
52 |     return _augment
53 | 


--------------------------------------------------------------------------------
/utils/augmentation_utils/random_saturation.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import random
 3 | import numpy as np
 4 | 
 5 | 
 6 | def random_saturation(
 7 |     min_delta=0.5,
 8 |     max_delta=1.5,
 9 |     p=0.5
10 | ):
11 |     """ Changes the saturation of an image by increasing/decreasing each
12 |     value in the saturation channel by a factor of delta. The image format
13 |     is assumed to be BGR to match Opencv's standard.
14 | 
15 |     Args:
16 |         - image: numpy array representing the input image.
17 |         - bboxes: numpy array representing the bounding boxes.
18 |         - classes: the list of classes associating with each bounding boxes.
19 |         - min_delta: minimum delta value.
20 |         - max_delta: maximum delta value.
21 | 
22 |     Returns:
23 |         - image: The modified image
24 |         - bboxes: The unmodified bounding boxes
25 |         - classes: The unmodified bounding boxes
26 | 
27 |     Raises:
28 |         - min_delta is less than 0
29 |         - max_delta is less than min_delta
30 |         - p is smaller than zero
31 |         - p is larger than 1
32 | 
33 |     Webpage References:
34 |         - https://www.telesens.co/2018/06/28/data-augmentation-in-ssd/
35 | 
36 |     Code References:
37 |         - https://github.com/amdegroot/ssd.pytorch/blob/master/utils/augmentations.py
38 | 
39 |     """
40 |     assert min_delta >= 0.0, "min_delta must be larger than zero"
41 |     assert max_delta >= min_delta, "max_delta must be larger than min_delta"
42 |     assert p >= 0, "p must be larger than or equal to zero"
43 |     assert p <= 1, "p must be less than or equal to 1"
44 | 
45 |     def _augment(image, bboxes=None, classes=None):
46 |         if (random.random() > p):
47 |             return image, bboxes, classes
48 | 
49 |         temp_image = cv2.cvtColor(np.uint8(image), cv2.COLOR_BGR2HSV)
50 |         temp_image = np.array(temp_image, dtype=np.float)
51 |         d = random.uniform(min_delta, max_delta)
52 |         temp_image[:, :, 1] *= d
53 |         temp_image = cv2.cvtColor(np.uint8(temp_image), cv2.COLOR_HSV2BGR)
54 |         temp_image = np.array(temp_image, dtype=np.float)
55 |         return temp_image, bboxes, classes
56 | 
57 |     return _augment
58 | 


--------------------------------------------------------------------------------
/utils/augmentation_utils/random_vertical_flip.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import random
 4 | 
 5 | 
 6 | def random_vertical_flip(
 7 |     image,
 8 |     bboxes,
 9 |     classes,
10 |     p=0.5
11 | ):
12 |     """ Randomly flipped the image vertically. The image format is assumed to be BGR to match Opencv's standard.
13 | 
14 |     Args:
15 |         - image: numpy array representing the input image.
16 |         - bboxes: numpy array representing the bounding boxes.
17 |         - classes: the list of classes associating with each bounding boxes.
18 |         - p: The probability with which the image is flipped vertically
19 | 
20 |     Returns:
21 |         - image: The modified image
22 |         - bboxes: The modified bounding boxes
23 |         - classes: The unmodified bounding boxes
24 | 
25 |     Raises:
26 |         - p is smaller than zero
27 |         - p is larger than 1
28 | 
29 |     Webpage References:
30 |         - https://www.kdnuggets.com/2018/09/data-augmentation-bounding-boxes-image-transforms.html/2
31 | 
32 |     Code References:
33 |         - https://github.com/amdegroot/ssd.pytorch/blob/master/utils/augmentations.py
34 |     """
35 | 
36 |     assert p >= 0, "p must be larger than or equal to zero"
37 |     assert p <= 1, "p must be less than or equal to 1"
38 | 
39 |     if (random.random() > p):
40 |         return image, bboxes, classes
41 | 
42 |     temp_bboxes = bboxes.copy()
43 |     image_center = np.array(image.shape[:2])[::-1]/2
44 |     image_center = np.hstack((image_center, image_center))
45 |     temp_bboxes[:, [1, 3]] += 2*(image_center[[1, 3]] - temp_bboxes[:, [1, 3]])
46 |     boxes_height = abs(temp_bboxes[:, 1] - temp_bboxes[:, 3])
47 |     temp_bboxes[:, 1] -= boxes_height
48 |     temp_bboxes[:, 3] += boxes_height
49 |     return np.array(cv2.flip(np.uint8(image), 0), dtype=np.float), temp_bboxes, classes
50 | 


--------------------------------------------------------------------------------
/utils/augmentation_utils/random_vertical_flip_quad.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import random
 4 | 
 5 | 
 6 | def random_vertical_flip_quad(
 7 |     image,
 8 |     quads,
 9 |     classes,
10 |     p=0.5
11 | ):
12 |     """ Randomly flipped the image vertically. The image format is assumed to be BGR to match Opencv's standard.
13 | 
14 |     Args:
15 |         - image: numpy array representing the input image.
16 |         - quads: numpy array representing the quadrilaterals.
17 |         - classes: the list of classes associating with each quadrilaterals.
18 |         - p: The probability with which the image is flipped vertically
19 | 
20 |     Returns:
21 |         - image: The modified image
22 |         - quads: The modified quadrilaterals
23 |         - classes: The unmodified bounding boxes
24 | 
25 |     Raises:
26 |         - p is smaller than zero
27 |         - p is larger than 1
28 |     """
29 | 
30 |     assert p >= 0, "p must be larger than or equal to zero"
31 |     assert p <= 1, "p must be less than or equal to 1"
32 | 
33 |     # if (random.random() > p):
34 |     #     return image, quads, classes
35 | 
36 |     temp_quads = quads.copy()
37 |     temp_quads[:, :, 1] = image.shape[0] - quads[:, :, 1]
38 |     temp = temp_quads.copy()
39 |     temp_quads[:,  0] = temp[:, 3]
40 |     temp_quads[:,  1] = temp[:, 2]
41 |     temp_quads[:,  2] = temp[:, 1]
42 |     temp_quads[:, 3] = temp[:, 0]
43 |     return np.array(cv2.flip(np.uint8(image), 0), dtype=np.float), temp_quads, classes
44 | 


--------------------------------------------------------------------------------
/utils/augmentation_utils/resize_to_fixed_size.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import random
 3 | import numpy as np
 4 | 
 5 | 
 6 | def resize_to_fixed_size(width, height):
 7 |     """ Resize the input image and bounding boxes to fixed size.
 8 | 
 9 |     Args:
10 |         - image: numpy array representing the input image.
11 |         - bboxes: numpy array representing the bounding boxes.
12 |         - classes: the list of classes associating with each bounding boxes.
13 |         - width: minimum delta value.
14 |         - height: maximum delta value.
15 | 
16 |     Returns:
17 |         - image: The modified image
18 |         - bboxes: The unmodified bounding boxes
19 |         - classes: The unmodified bounding boxes
20 | 
21 |     Raises:
22 |         - width is less than 0
23 |         - height is less than 0
24 |     """
25 |     assert width >= 0, "width must be larger than 0"
26 |     assert height >= 0, "height must be larger than 0"
27 | 
28 |     def _augment(
29 |         image,
30 |         bboxes,
31 |         classes=None
32 |     ):
33 |         temp_image = np.uint8(image)
34 |         o_height, o_width, _ = temp_image.shape
35 |         height_scale, width_scale = height / o_height, width / o_width
36 |         temp_image = cv2.resize(temp_image, (width, height))
37 |         temp_image = np.array(temp_image, dtype=np.float)
38 |         temp_bboxes = bboxes.copy()
39 |         temp_bboxes[:, [0, 2]] *= width_scale
40 |         temp_bboxes[:, [1, 3]] *= height_scale
41 |         temp_bboxes[:, [0, 2]] = np.clip(temp_bboxes[:, [0, 2]], 0, width)
42 |         temp_bboxes[:, [1, 3]] = np.clip(temp_bboxes[:, [1, 3]], 0, height)
43 | 
44 |         return temp_image, temp_bboxes, classes
45 | 
46 |     return _augment
47 | 


--------------------------------------------------------------------------------
/utils/bbox_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .center_to_corner import center_to_corner
2 | from .corner_to_center import corner_to_center
3 | from .iou import iou
4 | from .object_coverage import object_coverage
5 | from .center_to_vertices import center_to_vertices


--------------------------------------------------------------------------------
/utils/bbox_utils/center_to_corner.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def center_to_corner(boxes):
 5 |     """ Convert bounding boxes from center format (cx, cy, width, height) to corner format (xmin, ymin, xmax, ymax)
 6 | 
 7 |     Args:
 8 |         - boxes: numpy array of tensor containing all the boxes to be converted
 9 | 
10 |     Returns:
11 |         - A numpy array or tensor of converted boxes
12 |     """
13 |     temp = boxes.copy()
14 |     temp[..., 0] = boxes[..., 0] - (boxes[..., 2] / 2)  # xmin
15 |     temp[..., 1] = boxes[..., 1] - (boxes[..., 3] / 2)  # ymin
16 |     temp[..., 2] = boxes[..., 0] + (boxes[..., 2] / 2)  # xmax
17 |     temp[..., 3] = boxes[..., 1] + (boxes[..., 3] / 2)  # ymax
18 |     return temp
19 | 


--------------------------------------------------------------------------------
/utils/bbox_utils/center_to_vertices.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def center_to_vertices(boxes):
 5 |     """ Convert bounding boxes from center format (cx, cy, width, height) to vertices format (x1, y1, x2, y2, x3, y3, x4, y4)
 6 |     where (x1, y1) is the top left vertice.
 7 | 
 8 |     Args:
 9 |         - boxes: numpy array of tensor containing all the boxes to be converted
10 | 
11 |     Returns:
12 |         - A numpy array of shape (n, 4, 2)
13 |     """
14 |     temp = np.zeros((boxes.shape[0], 8))
15 |     half_width = boxes[..., 2] / 2
16 |     half_height = boxes[..., 3] / 2
17 |     temp[..., 0] = boxes[..., 0] - half_width
18 |     temp[..., 1] = boxes[..., 1] - half_height
19 |     temp[..., 2] = boxes[..., 0] + half_width
20 |     temp[..., 3] = boxes[..., 1] - half_height
21 |     temp[..., 4] = boxes[..., 0] + half_width
22 |     temp[..., 5] = boxes[..., 1] + half_height
23 |     temp[..., 6] = boxes[..., 0] - half_width
24 |     temp[..., 7] = boxes[..., 1] + half_height
25 |     return np.reshape(temp, (temp.shape[0], 4, 2))
26 | 


--------------------------------------------------------------------------------
/utils/bbox_utils/corner_to_center.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def corner_to_center(boxes):
 5 |     """ Convert bounding boxes from center format (xmin, ymin, xmax, ymax) to corner format (cx, cy, width, height)
 6 | 
 7 |     Args:
 8 |         - boxes: numpy array of tensor containing all the boxes to be converted
 9 | 
10 |     Returns:
11 |         - A numpy array or tensor of converted boxes
12 |     """
13 |     temp = boxes.copy()
14 |     width = np.abs(boxes[..., 0] - boxes[..., 2])
15 |     height = np.abs(boxes[..., 1] - boxes[..., 3])
16 |     temp[..., 0] = boxes[..., 0] + (width / 2)  # cx
17 |     temp[..., 1] = boxes[..., 1] + (height / 2)  # cy
18 |     temp[..., 2] = width  # xmax
19 |     temp[..., 3] = height  # ymax
20 |     return temp
21 | 


--------------------------------------------------------------------------------
/utils/bbox_utils/iou.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def iou(box_group1, box_group2):
 5 |     """ Calculates the intersection over union (aka. Jaccard Index) between two boxes.
 6 |     Boxes are assumed to be in corners format (xmin, ymin, xmax, ymax)
 7 | 
 8 |     Args:
 9 |     - box_group1: boxes in group 1
10 |     - box_group2: boxes in group 2
11 | 
12 |     Returns:
13 |     - A numpy array of shape (len(box_group1), len(box_group2)) where each value represents the iou between a box in box_group1 to a box in box_group2
14 | 
15 |     Raises:
16 |     - The shape of box_group1 and box_group2 are not the same.
17 | 
18 |     Code References:
19 |     - https://stackoverflow.com/questions/28723670/intersection-over-union-between-two-detections/41660682
20 |     """
21 |     assert box_group1.shape == box_group2.shape, "The two boxes array must be the same shape."
22 |     xmin_intersect = np.maximum(box_group1[..., 0], box_group2[..., 0])
23 |     ymin_intersect = np.maximum(box_group1[..., 1], box_group2[..., 1])
24 |     xmax_intersect = np.minimum(box_group1[..., 2], box_group2[..., 2])
25 |     ymax_intersect = np.minimum(box_group1[..., 3], box_group2[..., 3])
26 | 
27 |     intersect = (xmax_intersect - xmin_intersect) * (ymax_intersect - ymin_intersect)
28 |     box_group1_area = (box_group1[..., 2] - box_group1[..., 0]) * (box_group1[..., 3] - box_group1[..., 1])
29 |     box_group2_area = (box_group2[..., 2] - box_group2[..., 0]) * (box_group2[..., 3] - box_group2[..., 1])
30 |     union = box_group1_area + box_group2_area - intersect
31 |     res = intersect / union
32 | 
33 |     # set invalid ious to zeros
34 |     res[xmax_intersect < xmin_intersect] = 0
35 |     res[ymax_intersect < ymin_intersect] = 0
36 |     res[res < 0] = 0
37 |     res[res > 1] = 0
38 |     return res
39 | 


--------------------------------------------------------------------------------
/utils/bbox_utils/object_coverage.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def object_coverage(box_group1, box_group2):
 5 |     assert box_group1.shape == box_group2.shape, "The two boxes array must be the same shape."
 6 |     xmin_intersect = np.maximum(box_group1[..., 0], box_group2[..., 0])
 7 |     ymin_intersect = np.maximum(box_group1[..., 1], box_group2[..., 1])
 8 |     xmax_intersect = np.minimum(box_group1[..., 2], box_group2[..., 2])
 9 |     ymax_intersect = np.minimum(box_group1[..., 3], box_group2[..., 3])
10 | 
11 |     intersect = (xmax_intersect - xmin_intersect) * (ymax_intersect - ymin_intersect)
12 |     box_group2_area = (box_group2[..., 2] - box_group2[..., 0]) * (box_group2[..., 3] - box_group2[..., 1])
13 |     res = intersect / box_group2_area
14 |     
15 |     # set invalid ious to zeros
16 |     res[xmax_intersect < xmin_intersect] = 0
17 |     res[ymax_intersect < ymin_intersect] = 0
18 |     res[res < 0] = 0
19 |     res[res > 1] = 0
20 |     return res
21 | 


--------------------------------------------------------------------------------
/utils/command_line_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .str2bool import str2bool
2 | 


--------------------------------------------------------------------------------
/utils/command_line_utils/str2bool.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | 
 4 | def str2bool(v):
 5 |     if isinstance(v, bool):
 6 |         return v
 7 |     if v.lower() in ('yes', 'true', 't', 'y', '1'):
 8 |         return True
 9 |     elif v.lower() in ('no', 'false', 'f', 'n', '0'):
10 |         return False
11 |     else:
12 |         raise argparse.ArgumentTypeError('Boolean value expected.')
13 | 


--------------------------------------------------------------------------------
/utils/data_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .get_samples_from_split import get_samples_from_split
2 | from .coco_text import COCO_Text
3 | 


--------------------------------------------------------------------------------
/utils/data_utils/get_samples_from_split.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | def get_samples_from_split(split_file, images_dir, labels_dir):
 5 |     """ Create a list of samples that can be feed to a data generator.
 6 | 
 7 |     Args:
 8 |         - split_file: Path to the dataset's split file. (e.g. train.txt, val.txt)
 9 |         - images_dir: Path to images directory.
10 |         - labels_dir: Path to labels directory.
11 | 
12 |     Returns:
13 |         - A list of samples. Each sample is a string containing paths to both the image file and its corresponding label file separated by space.
14 | 
15 |     Raises:
16 |         - split_file does not exist.
17 |         - images_dir is not a directory.
18 |         - labels_dir is not a directory.
19 |     """
20 |     assert os.path.isfile(split_file), "split_file does not exists."
21 |     assert os.path.isdir(images_dir), "images_dir is not a directory."
22 |     assert os.path.isdir(labels_dir), "labels_dir is not a directory."
23 | 
24 |     samples = []
25 |     with open(split_file, "r") as split_file:
26 |         lines = split_file.readlines()
27 |         for line in lines:
28 |             cols = line.split(" ")
29 |             image_filename = cols[0]
30 |             label_filename = cols[1]
31 |             image_file = os.path.join(images_dir, image_filename)
32 |             label_file = os.path.join(labels_dir, label_filename)
33 |             sample = f"{image_file} {label_file}"
34 |             samples.append(sample)
35 |     return samples
36 | 


--------------------------------------------------------------------------------
/utils/display_tbpp_data_sample.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import json
 4 | import argparse
 5 | import numpy as np
 6 | from glob import glob
 7 | from xml.dom import minidom
 8 | import xml.etree.cElementTree as ET
 9 | from pycocotools.coco import COCO
10 | from textboxes_utils import read_sample
11 | 
12 | parser = argparse.ArgumentParser(description='Displays a sample')
13 | parser.add_argument('image', type=str, help='path to image file.')
14 | parser.add_argument('label', type=str, help='path to label file.')
15 | args = parser.parse_args()
16 | 
17 | print("loading image file")
18 | 
19 | image, quads = read_sample(args.image, args.label)
20 | image = np.uint8(image)
21 | 
22 | for quad in quads:
23 |     cv2.polylines(
24 |         image,
25 |         [np.reshape(np.array(quad, dtype=np.int), (-1, 2))],
26 |         True,
27 |         (0, 255, 0),
28 |         1
29 |     )
30 | 
31 | cv2.imshow("image", image)
32 | if cv2.waitKey(0) == ord('q'):
33 |     cv2.destroyAllWindows()
34 | 


--------------------------------------------------------------------------------
/utils/inference_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .ssd_mobilenetv2 import ssd_mobilenetv2
2 | from .ssd_mobilenetv1 import ssd_mobilenetv1
3 | from .ssd_vgg16 import ssd_vgg16
4 | from .tbpp_vgg16 import tbpp_vgg16
5 | 


--------------------------------------------------------------------------------
/utils/inference_utils/ssd_mobilenetv1.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | from networks import SSD_MOBILENET
 4 | from tensorflow.keras.applications import mobilenet
 5 | from utils import ssd_utils
 6 | 
 7 | 
 8 | def ssd_mobilenetv1(config, args):
 9 |     assert args.label_maps is not None, "please specify a label map file"
10 |     assert os.path.exists(args.label_maps), "label_maps file does not exist"
11 |     with open(args.label_maps, "r") as file:
12 |         label_maps = [line.strip("\n") for line in file.readlines()]
13 | 
14 |     model = SSD_MOBILENET(
15 |         config,
16 |         label_maps,
17 |         is_training=False,
18 |         num_predictions=args.num_predictions)
19 |     process_input_fn = mobilenet.preprocess_input
20 |     return model, process_input_fn, label_maps
21 | 


--------------------------------------------------------------------------------
/utils/inference_utils/ssd_mobilenetv2.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | from networks import SSD_MOBILENETV2
 4 | from tensorflow.keras.applications import mobilenet_v2
 5 | from utils import ssd_utils
 6 | 
 7 | 
 8 | def ssd_mobilenetv2(config, args):
 9 |     assert args.label_maps is not None, "please specify a label map file"
10 |     assert os.path.exists(args.label_maps), "label_maps file does not exist"
11 |     with open(args.label_maps, "r") as file:
12 |         label_maps = [line.strip("\n") for line in file.readlines()]
13 | 
14 |     model = SSD_MOBILENETV2(
15 |         config,
16 |         label_maps,
17 |         is_training=False,
18 |         num_predictions=args.num_predictions
19 |     )
20 |     process_input_fn = mobilenet_v2.preprocess_input
21 | 
22 |     return model, process_input_fn, label_maps
23 | 


--------------------------------------------------------------------------------
/utils/inference_utils/ssd_vgg16.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import numpy as np
 4 | from networks import SSD_VGG16
 5 | from tensorflow.keras.applications import vgg16
 6 | from utils import ssd_utils
 7 | 
 8 | 
 9 | def ssd_vgg16(config, args):
10 |     assert args.label_maps is not None, "please specify a label map file"
11 |     assert os.path.exists(args.label_maps), "label_maps file does not exist"
12 |     with open(args.label_maps, "r") as file:
13 |         label_maps = [line.strip("\n") for line in file.readlines()]
14 | 
15 |     model = SSD_VGG16(
16 |         config,
17 |         label_maps,
18 |         is_training=False,
19 |         num_predictions=args.num_predictions
20 |     )
21 |     process_input_fn = vgg16.preprocess_input
22 | 
23 |     return model, process_input_fn, label_maps
24 | 


--------------------------------------------------------------------------------
/utils/inference_utils/tbpp_vgg16.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | from networks import TBPP_VGG16
 4 | from tensorflow.keras.applications import vgg16
 5 | from utils import textboxes_utils
 6 | 
 7 | 
 8 | def tbpp_vgg16(config, args):
 9 |     model = TBPP_VGG16(
10 |         config,
11 |         is_training=False,
12 |         num_predictions=args.num_predictions)
13 |     process_input_fn = vgg16.preprocess_input
14 | 
15 |     return model, process_input_fn, ["text"]
16 | 


--------------------------------------------------------------------------------
/utils/one_hot_class_label.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def one_hot_class_label(classname, label_maps):
 5 |     """ Turn classname to one hot encoded label.
 6 | 
 7 |     Args:
 8 |         - classname: String representing the classname
 9 |         - label_maps: A list of strings containing all the classes
10 | 
11 |     Returns:
12 |         - A numpy array of shape (len(label_maps), )
13 | 
14 |     Raises:
15 |         - Classname does not includes in label maps
16 |     """
17 |     assert classname in label_maps, "classname must be included in label maps"
18 |     temp = np.zeros((len(label_maps)), dtype=np.int)
19 |     temp[label_maps.index(classname)] = 1
20 |     return temp
21 | 


--------------------------------------------------------------------------------
/utils/pascal_voc_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .read_label import read_label
2 | 


--------------------------------------------------------------------------------
/utils/pascal_voc_utils/read_label.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import xml.etree.ElementTree as ET
 3 | 
 4 | 
 5 | def read_label(label_path):
 6 |     assert os.path.exists(label_path), "Label file does not exist."
 7 | 
 8 |     xml_root = ET.parse(label_path).getroot()
 9 |     objects = xml_root.findall("object")
10 |     bboxes, classes = [], []
11 |     for i, obj in enumerate(objects):
12 |         name = obj.find("name").text
13 |         bndbox = obj.find("bndbox")
14 |         # the reason why we use float() is because some value in bndbox are float
15 |         xmin = float(bndbox.find("xmin").text)
16 |         ymin = float(bndbox.find("ymin").text)
17 |         xmax = float(bndbox.find("xmax").text)
18 |         ymax = float(bndbox.find("ymax").text)
19 |         bboxes.append([xmin, ymin, xmax, ymax])
20 |         classes.append(name)
21 | 
22 |     return bboxes, classes
23 | 


--------------------------------------------------------------------------------
/utils/prepare_coco_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import argparse
 4 | from glob import glob
 5 | from xml.dom import minidom
 6 | import xml.etree.cElementTree as ET
 7 | from pycocotools.coco import COCO
 8 | 
 9 | parser = argparse.ArgumentParser(description='Converts the coco dataset to a format suitable for training ssd with this repo.')
10 | parser.add_argument('annotations_file', type=str, help='path to annotations file.')
11 | parser.add_argument('images_dir', type=str, help='path to images dir.')
12 | parser.add_argument('output_dir', type=str, help='path to output dir.')
13 | args = parser.parse_args()
14 | 
15 | assert os.path.exists(args.annotations_file), "annotations_file does not exist"
16 | assert os.path.exists(args.images_dir), "images_dir does not exist"
17 | if not os.path.exists(args.output_dir):
18 |     os.makedirs(args.output_dir)
19 | 
20 | coco = COCO(args.annotations_file)
21 | categories = coco.cats
22 | 
23 | print("writing label maps to label_maps.txt")
24 | with open(os.path.join(args.output_dir, "label_maps.txt"), "w+") as label_maps_file:
25 |     for cat_id in categories:
26 |         label_maps_file.write(f"{categories[cat_id]['name']}\n")
27 | print("-- done")
28 | 
29 | num_samples = 0
30 | print("-- converting coco annotations to xml files")
31 | with open(os.path.join(args.output_dir, "split.txt"), "w+") as split_file:
32 |     images_ids = list(coco.imgs.keys())
33 |     num_images = len(images_ids)
34 |     for i, image_id in enumerate(images_ids):
35 |         print(f"-- image {i+1}/{num_images}")
36 |         annotations = coco.loadAnns(coco.getAnnIds([image_id]))
37 |         image_info = coco.loadImgs([image_id])[0]
38 |         image_filename = image_info["file_name"]
39 |         if len(annotations) == 0:
40 |             print(f"\n---- skipped: {image_filename}\n")
41 |             continue
42 |         xml_root = ET.Element("annotation")
43 |         xml_filename = ET.SubElement(xml_root, "filename").text = image_filename
44 |         xml_size = ET.SubElement(xml_root, "size")
45 |         xml_size_width = ET.SubElement(xml_size, "width").text = str(image_info["width"])
46 |         xml_size_height = ET.SubElement(xml_size, "height").text = str(image_info["height"])
47 |         xml_size_depth = ET.SubElement(xml_size, "depth").text = str(3)
48 |         for annotation in annotations:
49 |             category_id = annotation['category_id']
50 |             bbox = annotation['bbox']
51 |             label = coco.cats[category_id]["name"]
52 |             xml_object = ET.SubElement(xml_root, "object")
53 |             xml_object_name = ET.SubElement(xml_object, "name").text = label
54 |             xml_object_bndbox = ET.SubElement(xml_object, "bndbox")
55 |             xml_object_bndbox_xmin = ET.SubElement(xml_object_bndbox, "xmin").text = str(bbox[0])
56 |             xml_object_bndbox_ymin = ET.SubElement(xml_object_bndbox, "ymin").text = str(bbox[1])
57 |             xml_object_bndbox_xmax = ET.SubElement(xml_object_bndbox, "xmax").text = str(bbox[0] + bbox[2])
58 |             xml_object_bndbox_ymax = ET.SubElement(xml_object_bndbox, "ymax").text = str(bbox[1] + bbox[3])
59 |         xml_tree = ET.ElementTree(xml_root)
60 |         xml_file_name = f"{image_filename[:image_filename.index('.')]}.xml"
61 |         with open(os.path.join(args.output_dir, xml_file_name), "wb+") as xml_file:
62 |             xml_tree.write(xml_file)
63 |             split_file.write(f"{image_filename} {xml_file_name}\n")
64 |             num_samples += 1
65 |     print("-- done")
66 |     print(f"num_samples: {num_samples}")
67 |     print(f"split_file lines: {len(split_file.readlines())}")
68 |     print(f"num files in annotations folder: {len(list(glob(os.path.join(args.output_dir, '*.xml'))))}")
69 | 


--------------------------------------------------------------------------------
/utils/prepare_cocotextv2_dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import argparse
  4 | from glob import glob
  5 | import json
  6 | from xml.dom import minidom
  7 | import xml.etree.cElementTree as ET
  8 | from data_utils import COCO_Text
  9 | import numpy as np
 10 | import shutil
 11 | 
 12 | parser = argparse.ArgumentParser(description='Converts the coco dataset to a format suitable for training ssd with this repo.')
 13 | parser.add_argument('annotations_file', type=str, help='path to annotations file.')
 14 | parser.add_argument('images_dir', type=str, help='path to images dir.')
 15 | parser.add_argument('output_dir', type=str, help='path to output dir.')
 16 | args = parser.parse_args()
 17 | 
 18 | assert os.path.exists(args.annotations_file), "annotations_file does not exist"
 19 | assert os.path.exists(args.images_dir), "images_dir does not exist"
 20 | val_dir = os.path.join(os.path.join(args.output_dir, "val"))
 21 | train_dir = os.path.join(os.path.join(args.output_dir, "train"))
 22 | os.makedirs(os.path.join(val_dir, "images"), exist_ok=True)
 23 | os.makedirs(os.path.join(val_dir, "labels"), exist_ok=True)
 24 | os.makedirs(os.path.join(train_dir, "images"), exist_ok=True)
 25 | os.makedirs(os.path.join(train_dir, "labels"), exist_ok=True)
 26 | 
 27 | coco = COCO_Text(annotation_file=args.annotations_file)
 28 | 
 29 | print("-- copying images for validation sets")
 30 | with open(os.path.join(args.output_dir, "test.txt"), "w") as test_file:
 31 |     for i, image_id in enumerate(coco.val):
 32 |         print(f"image {i+1} / {len(coco.val)}")
 33 |         annotations = coco.loadAnns(coco.getAnnIds([image_id]))
 34 |         image_info = coco.loadImgs([image_id])[0]
 35 |         image_filename = image_info["file_name"]
 36 | 
 37 |         if len(annotations) == 0:
 38 |             continue
 39 | 
 40 |         filter_annotations = []
 41 | 
 42 |         for annotation in annotations:
 43 |             quad = annotation["polygon"]
 44 |             if len(quad) != 8:
 45 |                 continue
 46 |             filter_annotations.append(annotation)
 47 | 
 48 |         if len(filter_annotations) == 0:
 49 |             continue
 50 | 
 51 |         shutil.copy(
 52 |             os.path.join(args.images_dir, image_filename),
 53 |             os.path.join(os.path.join(val_dir, "images"), image_filename)
 54 |         )
 55 | 
 56 |         label_file_name = f"{image_filename[:image_filename.index('.')]}.txt"
 57 | 
 58 |         with open(os.path.join(os.path.join(val_dir, "labels"), label_file_name), "w") as label_file:
 59 |             for annotation in filter_annotations:
 60 |                 quad = annotation["polygon"]
 61 |                 try:
 62 |                     text = annotation["utf8_string"]
 63 |                 except:
 64 |                     text = "###"
 65 |                 for num in quad:
 66 |                     label_file.write(f"{float(num)},")
 67 |                 label_file.write(f"{text}\n")
 68 | 
 69 |         test_file.write(f"{image_filename} {label_file_name}\n")
 70 | 
 71 | print("-- copying images for training sets")
 72 | with open(os.path.join(args.output_dir, "train.txt"), "w") as train_file:
 73 |     for i, image_id in enumerate(coco.train):
 74 |         print(f"image {i+1} / {len(coco.train)}")
 75 |         annotations = coco.loadAnns(coco.getAnnIds([image_id]))
 76 |         image_info = coco.loadImgs([image_id])[0]
 77 |         image_filename = image_info["file_name"]
 78 | 
 79 |         if len(annotations) == 0:
 80 |             continue
 81 | 
 82 |         filter_annotations = []
 83 | 
 84 |         for annotation in annotations:
 85 |             quad = annotation["polygon"]
 86 |             if len(quad) != 8:
 87 |                 continue
 88 |             filter_annotations.append(annotation)
 89 | 
 90 |         if len(filter_annotations) == 0:
 91 |             continue
 92 | 
 93 |         shutil.copy(
 94 |             os.path.join(args.images_dir, image_filename),
 95 |             os.path.join(os.path.join(train_dir, "images"), image_filename)
 96 |         )
 97 | 
 98 |         label_file_name = f"{image_filename[:image_filename.index('.')]}.txt"
 99 | 
100 |         with open(os.path.join(os.path.join(train_dir, "labels"), label_file_name), "w") as label_file:
101 |             for annotation in filter_annotations:
102 |                 quad = annotation["polygon"]
103 |                 try:
104 |                     text = annotation["utf8_string"]
105 |                 except:
106 |                     text = "###"
107 |                 for num in quad:
108 |                     label_file.write(f"{float(num)},")
109 |                 label_file.write(f"{text}\n")
110 | 
111 |         train_file.write(f"{image_filename} {label_file_name}\n")
112 | 


--------------------------------------------------------------------------------
/utils/prepare_icdar-2013_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import argparse
 4 | from glob import glob
 5 | import json
 6 | from xml.dom import minidom
 7 | import xml.etree.cElementTree as ET
 8 | from data_utils import COCO_Text
 9 | import numpy as np
10 | import shutil
11 | import os
12 | 
13 | 
14 | parser = argparse.ArgumentParser(description='Converts the icdar 2013 dataset to a format suitable for training tbpp with this repo.')
15 | parser.add_argument('dataset_dir', type=str, help='path to dataset dir.')
16 | parser.add_argument('output_dir', type=str, help='path to output dir.')
17 | args = parser.parse_args()
18 | 
19 | assert os.path.exists(args.dataset_dir), "dataset_dir does not exist"
20 | testing_dir = os.path.join(os.path.join(args.output_dir, "test"))
21 | train_dir = os.path.join(os.path.join(args.output_dir, "train"))
22 | os.makedirs(os.path.join(testing_dir, "images"), exist_ok=True)
23 | os.makedirs(os.path.join(testing_dir, "labels"), exist_ok=True)
24 | os.makedirs(os.path.join(train_dir, "images"), exist_ok=True)
25 | os.makedirs(os.path.join(train_dir, "labels"), exist_ok=True)
26 | 
27 | print("-- copy images for training sets")
28 | training_images = sorted(list(glob(os.path.join(args.dataset_dir, os.path.join("Challenge2_Training_Task12_Images", "*.jpg")))))
29 | test_images = sorted(list(glob(os.path.join(args.dataset_dir, os.path.join("Challenge2_Test_Task12_Images", "*.jpg")))))
30 | 
31 | with open(os.path.join(args.output_dir, "train.txt"), "w") as train_split:
32 |     for i, train_image in enumerate(training_images):
33 |         print(f"image {i+1}/{len(training_images)}")
34 |         image_filename = os.path.basename(train_image)
35 |         label_filename = f"gt_{image_filename[:image_filename.index('.')]}.txt"
36 |         shutil.copy(
37 |             os.path.join(os.path.join(args.dataset_dir, "Challenge2_Training_Task12_Images"), image_filename),
38 |             os.path.join(os.path.join(train_dir, "images"), image_filename)
39 |         )
40 |         with open(os.path.join(os.path.join(args.dataset_dir, "Challenge2_Training_Task1_GT"), label_filename), "r") as label_file:
41 |             quads = label_file.readlines()
42 |             with open(os.path.join(os.path.join(train_dir, "labels"), label_filename), "w") as output_label_file:
43 |                 for quad in quads:
44 |                     quad = quad.strip("\n")
45 |                     quad = quad.split(" ")
46 |                     quad[-1] = quad[-1][1:-1]
47 |                     quad = [i.strip(",") for i in quad]
48 |                     quad[:4] = [float(i) for i in quad[:4]]
49 |                     w = abs(quad[0] - quad[2])
50 |                     h = abs(quad[1] - quad[3])
51 |                     x1 = quad[0]
52 |                     y1 = quad[1]
53 |                     x2 = quad[0] + w
54 |                     y2 = quad[1]
55 |                     x3 = quad[0] + w
56 |                     y3 = quad[1] + h
57 |                     x4 = quad[0]
58 |                     y4 = quad[1] + h
59 |                     output_label_file.write(f"{x1},{y1},{x2},{y2},{x3},{y3},{x4},{y4},{quad[-1]}\n")
60 |         train_split.write(f"{image_filename} {label_filename}\n")
61 | 
62 | with open(os.path.join(args.output_dir, "test.txt"), "w") as test_split:
63 |     for i, test_image in enumerate(test_images):
64 |         print(f"image {i+1}/{len(test_images)}")
65 |         image_filename = os.path.basename(test_image)
66 |         label_filename = f"gt_{image_filename[:image_filename.index('.')]}.txt"
67 |         shutil.copy(
68 |             os.path.join(os.path.join(args.dataset_dir, "Challenge2_Test_Task12_Images"), image_filename),
69 |             os.path.join(os.path.join(testing_dir, "images"), image_filename)
70 |         )
71 |         with open(os.path.join(os.path.join(args.dataset_dir, "Challenge2_Test_Task1_GT"), label_filename), "r") as label_file:
72 |             quads = label_file.readlines()
73 |             with open(os.path.join(os.path.join(testing_dir, "labels"), label_filename), "w") as output_label_file:
74 |                 for quad in quads:
75 |                     quad = quad.strip("\n")
76 |                     quad = quad.split(" ")
77 |                     quad[-1] = quad[-1][1:-1]
78 |                     quad = [i.strip(",") for i in quad]
79 |                     quad[:4] = [float(i) for i in quad[:4]]
80 |                     w = abs(quad[0] - quad[2])
81 |                     h = abs(quad[1] - quad[3])
82 |                     x1 = quad[0]
83 |                     y1 = quad[1]
84 |                     x2 = quad[0] + w
85 |                     y2 = quad[1]
86 |                     x3 = quad[0] + w
87 |                     y3 = quad[1] + h
88 |                     x4 = quad[0]
89 |                     y4 = quad[1] + h
90 |                     output_label_file.write(f"{x1},{y1},{x2},{y2},{x3},{y3},{x4},{y4},{quad[-1]}\n")
91 |         test_split.write(f"{image_filename} {label_filename}\n")
92 | 


--------------------------------------------------------------------------------
/utils/prepare_icdar-2015_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import argparse
 4 | from glob import glob
 5 | import json
 6 | from xml.dom import minidom
 7 | import xml.etree.cElementTree as ET
 8 | from data_utils import COCO_Text
 9 | import numpy as np
10 | import shutil
11 | import os
12 | 
13 | 
14 | parser = argparse.ArgumentParser(description='Converts the icdar 2015 dataset to a format suitable for training tbpp with this repo.')
15 | parser.add_argument('dataset_dir', type=str, help='path to dataset dir.')
16 | parser.add_argument('output_dir', type=str, help='path to output dir.')
17 | args = parser.parse_args()
18 | 
19 | assert os.path.exists(args.dataset_dir), "dataset_dir does not exist"
20 | testing_dir = os.path.join(os.path.join(args.output_dir, "test"))
21 | train_dir = os.path.join(os.path.join(args.output_dir, "train"))
22 | os.makedirs(os.path.join(testing_dir, "images"), exist_ok=True)
23 | os.makedirs(os.path.join(testing_dir, "labels"), exist_ok=True)
24 | os.makedirs(os.path.join(train_dir, "images"), exist_ok=True)
25 | os.makedirs(os.path.join(train_dir, "labels"), exist_ok=True)
26 | 
27 | print("-- copy images for training sets")
28 | training_images = sorted(list(glob(os.path.join(args.dataset_dir, os.path.join("ch4_training_images", "*.jpg")))))
29 | test_images = sorted(list(glob(os.path.join(args.dataset_dir, os.path.join("ch4_test_images", "*.jpg")))))
30 | 
31 | with open(os.path.join(args.output_dir, "train.txt"), "w") as train_split:
32 |     for i, train_image in enumerate(training_images):
33 |         print(f"image {i+1}/{len(training_images)}")
34 |         image_filename = os.path.basename(train_image)
35 |         label_filename = f"gt_{image_filename[:image_filename.index('.')]}.txt"
36 |         shutil.copy(
37 |             os.path.join(os.path.join(args.dataset_dir, "ch4_training_images"), image_filename),
38 |             os.path.join(os.path.join(train_dir, "images"), image_filename)
39 |         )
40 |         shutil.copy(
41 |             os.path.join(os.path.join(args.dataset_dir, "ch4_training_localization_transcription_gt"), label_filename),
42 |             os.path.join(os.path.join(train_dir, "labels"), label_filename)
43 |         )
44 |         train_split.write(f"{image_filename} {label_filename}\n")
45 | 
46 | with open(os.path.join(args.output_dir, "test.txt"), "w") as test_split:
47 |     for i, test_image in enumerate(test_images):
48 |         print(f"image {i+1}/{len(test_images)}")
49 |         image_filename = os.path.basename(test_image)
50 |         label_filename = f"gt_{image_filename[:image_filename.index('.')]}.txt"
51 |         shutil.copy(
52 |             os.path.join(os.path.join(args.dataset_dir, "ch4_test_images"), image_filename),
53 |             os.path.join(os.path.join(testing_dir, "images"), image_filename)
54 |         )
55 |         shutil.copy(
56 |             os.path.join(os.path.join(args.dataset_dir, "Challenge4_Test_Task1_GT"), label_filename),
57 |             os.path.join(os.path.join(testing_dir, "labels"), label_filename)
58 |         )
59 |         test_split.write(f"{image_filename} {label_filename}\n")
60 | 


--------------------------------------------------------------------------------
/utils/prepare_pascal-voc-2007-2012_dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import argparse
  4 | from glob import glob
  5 | import json
  6 | from xml.dom import minidom
  7 | import xml.etree.cElementTree as ET
  8 | from data_utils import COCO_Text
  9 | import numpy as np
 10 | import shutil
 11 | import os
 12 | 
 13 | 
 14 | def str2bool(v):
 15 |     if isinstance(v, bool):
 16 |         return v
 17 |     if v.lower() in ('yes', 'true', 't', 'y', '1'):
 18 |         return True
 19 |     elif v.lower() in ('no', 'false', 'f', 'n', '0'):
 20 |         return False
 21 |     else:
 22 |         raise argparse.ArgumentTypeError('Boolean value expected.')
 23 | 
 24 | 
 25 | parser = argparse.ArgumentParser(
 26 |     description='Converts the Pascal VOC 2007 and 2012 dataset to a format suitable for training tbpp with this repo.')
 27 | parser.add_argument('dataset_dir', type=str, help='path to dataset dir.')
 28 | parser.add_argument('output_dir', type=str, help='path to output dir.')
 29 | args = parser.parse_args()
 30 | 
 31 | assert os.path.exists(args.dataset_dir), "dataset_dir does not exist"
 32 | out_images_dir = os.path.join(args.output_dir, "images")
 33 | out_labels_dir = os.path.join(args.output_dir, "labels")
 34 | os.makedirs(out_images_dir, exist_ok=True)
 35 | os.makedirs(out_labels_dir, exist_ok=True)
 36 | 
 37 | 
 38 | datasets = ["VOC2007", "VOC2012"]
 39 | train_samples, val_samples, trainval_samples, test_samples = [], [], [], []
 40 | for dataset in datasets:
 41 |     print(f"-- gather data from: {dataset}")
 42 |     dataset_dir = os.path.abspath(args.dataset_dir)
 43 |     dataset_dir = os.path.join(dataset_dir, dataset)
 44 |     images_dir = os.path.join(dataset_dir, "JPEGImages")
 45 |     labels_dir = os.path.join(dataset_dir, "Annotations")
 46 | 
 47 |     print(f"---- copy images")
 48 |     for image in list(glob(os.path.join(images_dir, "*jpg"))):
 49 |         destination_filename = os.path.basename(image)
 50 |         if dataset == "VOC2007":
 51 |             destination_filename = f"2007_{destination_filename}"
 52 |         dest = os.path.join(out_images_dir, destination_filename)
 53 |         shutil.copy(image, dest)
 54 | 
 55 |     print(f"---- copy labels")
 56 |     for label in list(glob(os.path.join(labels_dir, "*xml"))):
 57 |         destination_filename = os.path.basename(label)
 58 |         if dataset == "VOC2007":
 59 |             destination_filename = f"2007_{destination_filename}"
 60 |         dest = os.path.join(out_labels_dir, destination_filename)
 61 |         shutil.copy(label, dest)
 62 | 
 63 |     train_split = os.path.join(dataset_dir, "ImageSets/Main/train.txt")
 64 |     val_split = os.path.join(dataset_dir, "ImageSets/Main/val.txt")
 65 |     trainval_split = os.path.join(dataset_dir, "ImageSets/Main/trainval.txt")
 66 | 
 67 |     # train split
 68 |     print(f"---- gather train samples")
 69 |     with open(train_split, "r") as train_file:
 70 |         samples = train_file.readlines()
 71 |         for sample in samples:
 72 |             if dataset == "VOC2007":
 73 |                 sample = "2007_" + sample.strip("\n")
 74 |             else:
 75 |                 sample = sample.strip("\n")
 76 |             if sample not in train_samples:
 77 |                 train_samples.append(sample)
 78 | 
 79 |     # val split
 80 |     print(f"---- gather val samples")
 81 |     with open(val_split, "r") as val_file:
 82 |         samples = val_file.readlines()
 83 |         for sample in samples:
 84 |             if dataset == "VOC2007":
 85 |                 sample = "2007_" + sample.strip("\n")
 86 |             else:
 87 |                 sample = sample.strip("\n")
 88 |             if sample not in val_samples:
 89 |                 val_samples.append(sample)
 90 | 
 91 |     # trainval split
 92 |     print(f"---- gather trainval samples")
 93 |     with open(trainval_split, "r") as trainval_file:
 94 |         samples = trainval_file.readlines()
 95 |         for sample in samples:
 96 |             if dataset == "VOC2007":
 97 |                 sample = "2007_" + sample.strip("\n")
 98 |             else:
 99 |                 sample = sample.strip("\n")
100 |             if sample not in trainval_samples:
101 |                 trainval_samples.append(sample)
102 | 
103 |     if dataset == "VOC2007":
104 |         print(f"---- gather test samples")
105 |         with open(os.path.join(dataset_dir, "ImageSets/Main/test.txt"), "r") as test_file:
106 |             samples = test_file.readlines()
107 |             for sample in samples:
108 |                 if dataset == "VOC2007":
109 |                     sample = "2007_" + sample.strip("\n")
110 |                 else:
111 |                     sample = sample.strip("\n")
112 |                 if sample not in test_samples:
113 |                     test_samples.append(sample)
114 | 
115 | 
116 | def save_samples_to_split(s, name):
117 |     with open(os.path.join(args.output_dir, name), "w") as outfile:
118 |         for i in s:
119 |             outfile.write(f"{i}.jpg {i}.xml\n")
120 | 
121 | 
122 | print(f"-- num_train: {len(train_samples)}")
123 | save_samples_to_split(train_samples, "train.txt")
124 | print(f"-- num_val: {len(val_samples)}")
125 | save_samples_to_split(val_samples, "val.txt")
126 | print(f"-- num_trainval: {len(trainval_samples)}")
127 | save_samples_to_split(trainval_samples, "trainval.txt")
128 | print(f"-- num_test: {len(test_samples)}")
129 | save_samples_to_split(test_samples, "test.txt")
130 | 
131 | print(f"-- writing label_maps.txt")
132 | dataset_dir = os.path.abspath(args.dataset_dir)
133 | dataset_dir = os.path.join(dataset_dir, "VOC2007")
134 | 
135 | with open(os.path.join(args.output_dir, "label_maps.txt"), "w") as label_maps_file:
136 |     labels = list(
137 |         glob(os.path.join(dataset_dir, "ImageSets/Main/*_train.txt")))
138 |     labels = [os.path.basename(i) for i in labels]
139 |     labels = sorted([i[:i.index("_")] for i in labels])
140 |     for classname in labels:
141 |         label_maps_file.write(f"{classname}\n")
142 | 


--------------------------------------------------------------------------------
/utils/prepare_pascal_voc_2007_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import argparse
 4 | from glob import glob
 5 | import json
 6 | from xml.dom import minidom
 7 | import xml.etree.cElementTree as ET
 8 | from data_utils import COCO_Text
 9 | import numpy as np
10 | import shutil
11 | import os
12 | 
13 | 
14 | parser = argparse.ArgumentParser(
15 |     description='Converts the Pascal VOC 2007 dataset to a format suitable for training ssd with this repo.')
16 | parser.add_argument('dataset_dir', type=str, help='path to dataset dir.')
17 | parser.add_argument('output_dir', type=str, help='path to output dir.')
18 | args = parser.parse_args()
19 | 
20 | assert os.path.exists(args.dataset_dir), "dataset_dir does not exist"
21 | images_dir = os.path.join(args.dataset_dir, "JPEGImages")
22 | labels_dir = os.path.join(args.dataset_dir, "Annotations")
23 | out_images_dir = os.path.join(args.output_dir, "images")
24 | out_labels_dir = os.path.join(args.output_dir, "labels")
25 | os.makedirs(out_images_dir, exist_ok=True)
26 | os.makedirs(out_labels_dir, exist_ok=True)
27 | 
28 | print(f"-- creating split files")
29 | print(f"---- train.txt")
30 | with open(os.path.join(args.output_dir, "train.txt"), "w") as train_split_file:
31 |     with open(os.path.join(args.dataset_dir, "ImageSets/Main/train.txt"), "r") as train_file:
32 |         samples = train_file.readlines()
33 |         for i, sample in enumerate(samples):
34 |             sample = sample.strip("\n")
35 |             train_split_file.write(f"{sample}.jpg {sample}.xml\n")
36 | 
37 | print(f"---- val.txt")
38 | with open(os.path.join(args.output_dir, "val.txt"), "w") as val_split_file:
39 |     with open(os.path.join(args.dataset_dir, "ImageSets/Main/val.txt"), "r") as val_file:
40 |         samples = val_file.readlines()
41 |         for sample in samples:
42 |             sample = sample.strip("\n")
43 |             val_split_file.write(f"{sample}.jpg {sample}.xml\n")
44 | 
45 | print(f"---- test.txt")
46 | with open(os.path.join(args.output_dir, "test.txt"), "w") as val_split_file:
47 |     with open(os.path.join(args.dataset_dir, "ImageSets/Main/test.txt"), "r") as val_file:
48 |         samples = val_file.readlines()
49 |         for sample in samples:
50 |             sample = sample.strip("\n")
51 |             val_split_file.write(f"{sample}.jpg {sample}.xml\n")
52 | 
53 | print(f"---- trainval.txt")
54 | with open(os.path.join(args.output_dir, "split.txt"), "w") as trainval_split_file:
55 |     with open(os.path.join(args.dataset_dir, "ImageSets/Main/trainval.txt"), "r") as trainval_file:
56 |         samples = trainval_file.readlines()
57 |         for sample in samples:
58 |             sample = sample.strip("\n")
59 |             trainval_split_file.write(f"{sample}.jpg {sample}.xml\n")
60 | 
61 | print(f"-- copying images")
62 | for i, sample in enumerate(list(glob(os.path.join(images_dir, "*jpg")))):
63 |     filename = os.path.basename(sample)
64 |     shutil.copy(
65 |         sample,
66 |         os.path.join(out_images_dir, filename)
67 |     )
68 | 
69 | print(f"-- copying labels")
70 | for i, sample in enumerate(list(glob(os.path.join(labels_dir, "*xml")))):
71 |     filename = os.path.basename(sample)
72 |     shutil.copy(
73 |         sample,
74 |         os.path.join(out_labels_dir, filename)
75 |     )
76 | 
77 | print(f"-- writing label_maps.txt")
78 | with open(os.path.join(args.output_dir, "label_maps.txt"), "w") as label_maps_file:
79 |     labels = list(
80 |         glob(os.path.join(args.dataset_dir, "ImageSets/Main/*_train.txt")))
81 |     labels = [os.path.basename(i) for i in labels]
82 |     labels = sorted([i[:i.index("_")] for i in labels])
83 |     for classname in labels:
84 |         label_maps_file.write(f"{classname}\n")
85 | 


--------------------------------------------------------------------------------
/utils/prepare_pascal_voc_2012_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import argparse
 4 | from glob import glob
 5 | import json
 6 | from xml.dom import minidom
 7 | import xml.etree.cElementTree as ET
 8 | from data_utils import COCO_Text
 9 | import numpy as np
10 | import shutil
11 | import os
12 | 
13 | 
14 | parser = argparse.ArgumentParser(
15 |     description='Converts the Pascal VOC 2012 dataset to a format suitable for training tbpp with this repo.')
16 | parser.add_argument('dataset_dir', type=str, help='path to dataset dir.')
17 | parser.add_argument('output_dir', type=str, help='path to output dir.')
18 | args = parser.parse_args()
19 | 
20 | assert os.path.exists(args.dataset_dir), "dataset_dir does not exist"
21 | images_dir = os.path.join(args.dataset_dir, "JPEGImages")
22 | labels_dir = os.path.join(args.dataset_dir, "Annotations")
23 | out_images_dir = os.path.join(args.output_dir, "images")
24 | out_labels_dir = os.path.join(args.output_dir, "labels")
25 | os.makedirs(out_images_dir, exist_ok=True)
26 | os.makedirs(out_labels_dir, exist_ok=True)
27 | 
28 | print(f"-- creating split files")
29 | print(f"---- train.txt")
30 | with open(os.path.join(args.output_dir, "train.txt"), "w") as train_split_file:
31 |     with open(os.path.join(args.dataset_dir, "ImageSets/Main/train.txt"), "r") as train_file:
32 |         samples = train_file.readlines()
33 |         for i, sample in enumerate(samples):
34 |             sample = sample.strip("\n")
35 |             train_split_file.write(f"{sample}.jpg {sample}.xml\n")
36 | 
37 | print(f"---- val.txt")
38 | with open(os.path.join(args.output_dir, "val.txt"), "w") as val_split_file:
39 |     with open(os.path.join(args.dataset_dir, "ImageSets/Main/val.txt"), "r") as val_file:
40 |         samples = val_file.readlines()
41 |         for sample in samples:
42 |             sample = sample.strip("\n")
43 |             val_split_file.write(f"{sample}.jpg {sample}.xml\n")
44 | 
45 | print(f"---- trainval.txt")
46 | with open(os.path.join(args.output_dir, "split.txt"), "w") as trainval_split_file:
47 |     with open(os.path.join(args.dataset_dir, "ImageSets/Main/trainval.txt"), "r") as trainval_file:
48 |         samples = trainval_file.readlines()
49 |         for sample in samples:
50 |             sample = sample.strip("\n")
51 |             trainval_split_file.write(f"{sample}.jpg {sample}.xml\n")
52 | 
53 | print(f"-- copying images")
54 | for i, sample in enumerate(list(glob(os.path.join(images_dir, "*jpg")))):
55 |     filename = os.path.basename(sample)
56 |     shutil.copy(
57 |         sample,
58 |         os.path.join(out_images_dir, filename)
59 |     )
60 | 
61 | print(f"-- copying labels")
62 | for i, sample in enumerate(list(glob(os.path.join(labels_dir, "*xml")))):
63 |     filename = os.path.basename(sample)
64 |     shutil.copy(
65 |         sample,
66 |         os.path.join(out_labels_dir, filename)
67 |     )
68 | 
69 | print(f"-- writing label_maps.txt")
70 | with open(os.path.join(args.output_dir, "label_maps.txt"), "w") as label_maps_file:
71 |     labels = list(
72 |         glob(os.path.join(args.dataset_dir, "ImageSets/Main/*_train.txt")))
73 |     labels = [os.path.basename(i) for i in labels]
74 |     labels = sorted([i[:i.index("_")] for i in labels])
75 |     for classname in labels:
76 |         label_maps_file.write(f"{classname}\n")
77 | 


--------------------------------------------------------------------------------
/utils/prepare_svt_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import argparse
 4 | from glob import glob
 5 | import json
 6 | from xml.dom import minidom
 7 | import xml.etree.cElementTree as ET
 8 | from data_utils import COCO_Text
 9 | import numpy as np
10 | import shutil
11 | import os
12 | 
13 | 
14 | parser = argparse.ArgumentParser(description='Converts the coco dataset to a format suitable for training ssd with this repo.')
15 | parser.add_argument('dataset_dir', type=str, help='path to dataset dir.')
16 | parser.add_argument('output_dir', type=str, help='path to output dir.')
17 | args = parser.parse_args()
18 | 
19 | assert os.path.exists(args.dataset_dir), "dataset_dir does not exist"
20 | testing_dir = os.path.join(os.path.join(args.output_dir, "test"))
21 | train_dir = os.path.join(os.path.join(args.output_dir, "train"))
22 | os.makedirs(os.path.join(testing_dir, "images"), exist_ok=True)
23 | os.makedirs(os.path.join(testing_dir, "labels"), exist_ok=True)
24 | os.makedirs(os.path.join(train_dir, "images"), exist_ok=True)
25 | os.makedirs(os.path.join(train_dir, "labels"), exist_ok=True)
26 | 
27 | test = ET.parse(os.path.join(args.dataset_dir, "test.xml"))
28 | train = ET.parse(os.path.join(args.dataset_dir, "train.xml"))
29 | 
30 | print("-- copy images for train sets")
31 | training_images = train.getroot().findall("image")
32 | for i, image in enumerate(training_images):
33 |     print(f"image {i+1}/{len(training_images)}")
34 |     image_filename = image.find("imageName").text
35 |     image_filename = os.path.basename(image_filename)
36 |     label_filename = f"{image_filename[:image_filename.index('.')]}.txt"
37 |     rectangles = image.find("taggedRectangles").findall("taggedRectangle")
38 | 
39 |     shutil.copy(
40 |         os.path.join(os.path.join(args.dataset_dir, "img"), image_filename),
41 |         os.path.join(os.path.join(train_dir, "images"), image_filename)
42 |     )
43 | 
44 |     with open(os.path.join(os.path.join(train_dir, "labels"), label_filename), "w") as label_file:
45 |         for rectangle in rectangles:
46 |             text = rectangle.find("tag").text
47 |             xmin = int(rectangle.attrib["x"])
48 |             ymin = int(rectangle.attrib["y"])
49 |             width = int(rectangle.attrib["width"])
50 |             height = int(rectangle.attrib["height"])
51 |             label_file.write(f"{xmin},{ymin},{xmin+width},{ymin},{xmin+width},{ymin+height},{xmin},{ymin+height},{text}\n")
52 | 
53 | print("-- copy images for test sets")
54 | testing_images = test.getroot().findall("image")
55 | for i, image in enumerate(testing_images):
56 |     print(f"image {i+1}/{len(testing_images)}")
57 |     image_filename = image.find("imageName").text
58 |     image_filename = os.path.basename(image_filename)
59 |     label_filename = f"{image_filename[:image_filename.index('.')]}.txt"
60 |     rectangles = image.find("taggedRectangles").findall("taggedRectangle")
61 | 
62 |     shutil.copy(
63 |         os.path.join(os.path.join(args.dataset_dir, "img"), image_filename),
64 |         os.path.join(os.path.join(testing_dir, "images"), image_filename)
65 |     )
66 | 
67 |     with open(os.path.join(os.path.join(testing_dir, "labels"), label_filename), "w") as label_file:
68 |         for rectangle in rectangles:
69 |             text = rectangle.find("tag").text
70 |             xmin = int(rectangle.attrib["x"])
71 |             ymin = int(rectangle.attrib["y"])
72 |             width = int(rectangle.attrib["width"])
73 |             height = int(rectangle.attrib["height"])
74 |             label_file.write(f"{xmin},{ymin},{xmin+width},{ymin},{xmin+width},{ymin+height},{xmin},{ymin+height},{text}\n")
75 | 


--------------------------------------------------------------------------------
/utils/prepare_synthtext_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import numpy as np
 4 | import argparse
 5 | import shutil
 6 | from scipy import io
 7 | from glob import glob
 8 | 
 9 | parser = argparse.ArgumentParser(description='Converts the synthtext dataset to a format suitable for training textboxes plus plus with this repo.')
10 | parser.add_argument('annotations_file', type=str, help='path to annotations file.')
11 | parser.add_argument('images_dir', type=str, help='path to images dir.')
12 | parser.add_argument('output_dir', type=str, help='path to output dir.')
13 | args = parser.parse_args()
14 | 
15 | assert os.path.exists(args.annotations_file), "annotations_file does not exist"
16 | assert os.path.exists(args.images_dir), "images_dir does not exist"
17 | 
18 | images_output_dir = os.path.join(args.output_dir, "images")
19 | labels_output_dir = os.path.join(args.output_dir, "labels")
20 | 
21 | os.makedirs(images_output_dir, exist_ok=True)
22 | os.makedirs(labels_output_dir, exist_ok=True)
23 | 
24 | ground_truth_file = io.loadmat(args.annotations_file)
25 | 
26 | 
27 | def clip_polygon(p, image):
28 |     image_height, image_width, _ = image.shape
29 |     polygon = p.copy()
30 |     for n in [0, 2, 4, 6]:
31 |         if polygon[n] < 0:
32 |             polygon[n] = 0
33 |         elif polygon[n] > image_width:
34 |             polygon[n] = image_width
35 |     for n in [1, 3, 5, 7]:
36 |         if polygon[n] < 0:
37 |             polygon[n] = 0
38 |         elif polygon[n] > image_height:
39 |             polygon[n] = image_height
40 |     return polygon
41 | 
42 | 
43 | with open(os.path.join(args.output_dir, "samples.txt"), "w") as samples_file:
44 |     for img_id in range(ground_truth_file["imnames"].shape[-1]):
45 |         print(f"image: {img_id+1}/{ground_truth_file['imnames'].shape[-1]}")
46 |         imname = ground_truth_file["imnames"][0][img_id][0]
47 |         texts = ground_truth_file["txt"][0][img_id]
48 |         wordBboxes = ground_truth_file["wordBB"][0]
49 |         polygons = np.concatenate(
50 |             [
51 |                 np.expand_dims(wordBboxes[img_id][0].transpose(), axis=-1),
52 |                 np.expand_dims(wordBboxes[img_id][1].transpose(), axis=-1),
53 |             ],
54 |             axis=-1
55 |         )
56 | 
57 |         words = []
58 |         for word in texts:
59 |             for i in word.split("\n"):
60 |                 for j in i.split(" "):
61 |                     if j != "":
62 |                         words.append(j)
63 | 
64 |         filename = os.path.basename(imname)
65 |         sample = f"{filename} {filename[:filename.index('.')]}.txt"
66 | 
67 |         shutil.copy(os.path.join(args.images_dir, imname), os.path.join(images_output_dir, filename))
68 |         with open(os.path.join(labels_output_dir, f"{filename[:filename.index('.')]}.txt"), "w") as label_file:
69 |             image = cv2.imread(os.path.join(images_output_dir, filename))
70 |             if len(polygons.shape) == 2:
71 |                 word = words[0]
72 |                 polygon = np.reshape(polygons, (8,))
73 |                 polygon = clip_polygon(polygon, image)
74 | 
75 |                 for coord in polygon:
76 |                     label_file.write(str(float(coord)))
77 |                     label_file.write(",")
78 |                 label_file.write(word)
79 |                 label_file.write("\n")
80 |             else:
81 |                 for i, polygon in enumerate(polygons):
82 |                     word = words[i]
83 |                     polygon = np.reshape(polygon, (8,))
84 |                     polygon = clip_polygon(polygon, image)
85 | 
86 |                     for coord in polygon:
87 |                         label_file.write(str(float(coord)))
88 |                         label_file.write(",")
89 |                     label_file.write(word)
90 |                     label_file.write("\n")
91 | 
92 |         samples_file.write(sample)
93 |         samples_file.write("\n")
94 | 


--------------------------------------------------------------------------------
/utils/ssd_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .decode_predictions import decode_predictions
2 | from .encode_bboxes import encode_bboxes
3 | from .generate_default_boxes_for_feature_map import generate_default_boxes_for_feature_map
4 | from .get_number_default_boxes import get_number_default_boxes
5 | from .match_gt_boxes_to_default_boxes import match_gt_boxes_to_default_boxes
6 | from .read_sample import read_sample
7 | 


--------------------------------------------------------------------------------
/utils/ssd_utils/encode_bboxes.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def encode_bboxes(y, epsilon=10e-5):
 5 |     """ Encode the label to a proper format suitable for training SSD network.
 6 | 
 7 |     Args:
 8 |         - y: A numpy of shape (num_default_boxes, num_classes + 12) representing a label sample.
 9 | 
10 |     Returns:
11 |         - A numpy array with the same shape as y but its gt boxes values has been encoded to the proper SSD format.
12 | 
13 |     Paper References:
14 |         - Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C.Y., & Berg, A. C. (2016).
15 |           SSD: Single Shot MultiBox Detector. https://arxiv.org/abs/1512.02325
16 | 
17 |     Webpage References:
18 |         - https://leimao.github.io/blog/Bounding-Box-Encoding-Decoding/
19 | 
20 |     Code References:
21 |         - https://github.com/pierluigiferrari/ssd_keras/blob/master/ssd_encoder_decoder/ssd_input_encoder.py
22 |     """
23 |     gt_boxes = y[:, -12:-8]
24 |     df_boxes = y[:, -8:-4]
25 |     variances = y[:, -4:]
26 |     encoded_gt_boxes_cx = ((gt_boxes[:, 0] - df_boxes[:, 0]) / (df_boxes[:, 2])) / np.sqrt(variances[:, 0])
27 |     encoded_gt_boxes_cy = ((gt_boxes[:, 1] - df_boxes[:, 1]) / (df_boxes[:, 3])) / np.sqrt(variances[:, 1])
28 |     encoded_gt_boxes_w = np.log(epsilon + gt_boxes[:, 2] / df_boxes[:, 2]) / np.sqrt(variances[:, 2])
29 |     encoded_gt_boxes_h = np.log(epsilon + gt_boxes[:, 3] / df_boxes[:, 3]) / np.sqrt(variances[:, 3])
30 |     y[:, -12] = encoded_gt_boxes_cx
31 |     y[:, -11] = encoded_gt_boxes_cy
32 |     y[:, -10] = encoded_gt_boxes_w
33 |     y[:, -9] = encoded_gt_boxes_h
34 |     return y
35 | 


--------------------------------------------------------------------------------
/utils/ssd_utils/generate_default_boxes_for_feature_map.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from .get_number_default_boxes import get_number_default_boxes
 3 | from utils.bbox_utils import center_to_corner, corner_to_center
 4 | 
 5 | 
 6 | def generate_default_boxes_for_feature_map(
 7 |     feature_map_size,
 8 |     image_size,
 9 |     offset,
10 |     scale,
11 |     next_scale,
12 |     aspect_ratios,
13 |     variances,
14 |     extra_box_for_ar_1,
15 |     clip_boxes=True,
16 | ):
17 |     """ Generates a 4D Tensor representing default boxes.
18 | 
19 |     Note:
20 |     - The structure of a default box is [xmin, ymin, xmax, ymax]
21 | 
22 |     Args:
23 |     - feature_map_size: The size of the feature map. (must be square)
24 |     - image_size: The size of the input image. (must be square)
25 |     - offset: The offset for the center of the default boxes. The order is (offset_x, offset_y)
26 |     - scale: The current scale of the default boxes.
27 |     - next_scale: The next scale of the default boxes.
28 |     - aspect_ratios: A list of aspect ratios representing the default boxes.
29 |     - variance: ...
30 |     - extra_box_for_ar_1: Whether to add an extra box for default box with aspect ratio 1.
31 | 
32 |     Returns:
33 |     - A 4D numpy array of shape (feature_map_size, feature_map_size, num_default_boxes, 8)
34 | 
35 |     Raises:
36 |     - offset does not have a len of 2
37 | 
38 |     Code References:
39 |         - https://github.com/pierluigiferrari/ssd_keras/blob/master/keras_layers/keras_layer_AnchorBoxes.py
40 |     """
41 |     assert len(offset) == 2, "offset must be of len 2"
42 | 
43 |     grid_size = image_size / feature_map_size
44 |     offset_x, offset_y = offset
45 |     num_default_boxes = get_number_default_boxes(
46 |         aspect_ratios,
47 |         extra_box_for_ar_1=extra_box_for_ar_1
48 |     )
49 |     # get all width and height of default boxes
50 |     wh_list = []
51 |     for ar in aspect_ratios:
52 |         if ar == 1.0 and extra_box_for_ar_1:
53 |             wh_list.append([
54 |                 image_size * np.sqrt(scale * next_scale) * np.sqrt(ar),
55 |                 image_size * np.sqrt(scale * next_scale) * (1 / np.sqrt(ar)),
56 |             ])
57 |         wh_list.append([
58 |             image_size * scale * np.sqrt(ar),
59 |             image_size * scale * (1 / np.sqrt(ar)),
60 |         ])
61 |     wh_list = np.array(wh_list, dtype=np.float)
62 |     # get all center points of each grid cells
63 |     cx = np.linspace(offset_x * grid_size, image_size - (offset_x * grid_size), feature_map_size)
64 |     cy = np.linspace(offset_y * grid_size, image_size - (offset_y * grid_size), feature_map_size)
65 |     cx_grid, cy_grid = np.meshgrid(cx, cy)
66 |     cx_grid, cy_grid = np.expand_dims(cx_grid, axis=-1), np.expand_dims(cy_grid, axis=-1)
67 |     cx_grid, cy_grid = np.tile(cx_grid, (1, 1, num_default_boxes)), np.tile(cy_grid, (1, 1, num_default_boxes))
68 |     #
69 |     default_boxes = np.zeros((feature_map_size, feature_map_size, num_default_boxes, 4))
70 |     default_boxes[:, :, :, 0] = cx_grid
71 |     default_boxes[:, :, :, 1] = cy_grid
72 |     default_boxes[:, :, :, 2] = wh_list[:, 0]
73 |     default_boxes[:, :, :, 3] = wh_list[:, 1]
74 |     # clip overflow default boxes
75 |     if clip_boxes:
76 |         default_boxes = center_to_corner(default_boxes)
77 |         x_coords = default_boxes[:, :, :, [0, 2]]
78 |         x_coords[x_coords >= image_size] = image_size - 1
79 |         x_coords[x_coords < 0] = 0
80 |         default_boxes[:, :, :, [0, 2]] = x_coords
81 |         y_coords = default_boxes[:, :, :, [1, 3]]
82 |         y_coords[y_coords >= image_size] = image_size - 1
83 |         y_coords[y_coords < 0] = 0
84 |         default_boxes[:, :, :, [1, 3]] = y_coords
85 |         default_boxes = corner_to_center(default_boxes)
86 |     #
87 |     default_boxes[:, :, :, [0, 2]] /= image_size
88 |     default_boxes[:, :, :, [1, 3]] /= image_size
89 |     #
90 |     variances_tensor = np.zeros_like(default_boxes)
91 |     variances_tensor += variances
92 |     default_boxes = np.concatenate([default_boxes, variances_tensor], axis=-1)
93 |     return default_boxes
94 | 


--------------------------------------------------------------------------------
/utils/ssd_utils/get_number_default_boxes.py:
--------------------------------------------------------------------------------
 1 | def get_number_default_boxes(aspect_ratios, extra_box_for_ar_1=True):
 2 |     """ Get the number of default boxes for each grid cell based on the number of aspect ratios
 3 |     and whether to add a extra box for aspect ratio 1
 4 | 
 5 |     Args:
 6 |     - aspect_ratios: A list containing the different aspect ratios of default boxes.
 7 |     - extra_box_for_ar_1: Whether to add a extra box for aspect ratio 1.
 8 | 
 9 |     Returns:
10 |     - An integer for the number of default boxes.
11 |     """
12 |     num_aspect_ratios = len(aspect_ratios)
13 |     return num_aspect_ratios + 1 if (1.0 in aspect_ratios) and extra_box_for_ar_1 else num_aspect_ratios
14 | 


--------------------------------------------------------------------------------
/utils/ssd_utils/match_gt_boxes_to_default_boxes.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from utils.bbox_utils import iou, center_to_corner
 3 | 
 4 | 
 5 | def match_gt_boxes_to_default_boxes(
 6 |     gt_boxes,
 7 |     default_boxes,
 8 |     match_threshold=0.5,
 9 |     neutral_threshold=0.3
10 | ):
11 |     """ Matches ground truth bounding boxes to default boxes based on the SSD paper.
12 | 
13 |     'We begin by matching each ground truth box to the default box with the best jaccard overlap (as in MultiBox [7]).
14 |     Unlike MultiBox, we then match default boxes to any ground truth with jaccard overlap higher than a threshold (0.5)'
15 | 
16 |     Args:
17 |         - gt_boxes: A numpy array or tensor of shape (num_gt_boxes, 4). Structure [cx, cy, w, h]
18 |         - default_boxes: A numpy array of tensor of shape (num_default_boxes, 4). Structure [cx, cy, w, h]
19 |         - threshold: A float representing a target to decide whether the box is matched
20 |         - default_boxes: A numpy array of tensor of shape (num_default_boxes, 4). Structure [cx, cy, w, h]
21 | 
22 |     Returns:
23 |         - matches: A numpy array of shape (num_matches, 2). The first index in the last dimension is the index
24 |           of the ground truth box and the last index is the default box index.
25 |         - neutral_boxes: A numpy array of shape (num_neutral_boxes, 2). The first index in the last dimension is the index
26 |           of the ground truth box and the last index is the default box index.
27 | 
28 |     Raises:
29 |         - Either the shape of ground truth's boxes array or the default boxes array is not 2
30 | 
31 |     Code References:
32 |         - https://github.com/pierluigiferrari/ssd_keras/blob/master/ssd_encoder_decoder/matching_utils.py
33 | 
34 |     Paper References:
35 |         - Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C.Y., & Berg, A. C. (2016).
36 |           SSD: Single Shot MultiBox Detector. https://arxiv.org/abs/1512.02325
37 |     """
38 | 
39 |     assert len(gt_boxes.shape) == 2, "Shape of ground truth boxes array must be 2"
40 |     assert len(default_boxes.shape) == 2, "Shape of default boxes array must be 2"
41 | 
42 |     # convert gt_boxes and default_boxes to [xmin, ymin, xmax, ymax]
43 |     gt_boxes = center_to_corner(gt_boxes)
44 |     default_boxes = center_to_corner(default_boxes)
45 | 
46 |     num_gt_boxes = gt_boxes.shape[0]
47 |     num_default_boxes = default_boxes.shape[0]
48 | 
49 |     matches = np.zeros((num_gt_boxes, 2), dtype=np.int)
50 | 
51 |     # match ground truth to default box with highest iou
52 |     for i in range(num_gt_boxes):
53 |         gt_box = gt_boxes[i]
54 |         gt_box = np.tile(
55 |             np.expand_dims(gt_box, axis=0),
56 |             (num_default_boxes, 1)
57 |         )
58 |         ious = iou(gt_box, default_boxes)
59 |         matches[i] = [i, np.argmax(ious)]
60 | 
61 |     # match default boxes to ground truths with overlap higher than threshold
62 |     gt_boxes = np.tile(np.expand_dims(gt_boxes, axis=1), (1, num_default_boxes, 1))
63 |     default_boxes = np.tile(np.expand_dims(default_boxes, axis=0), (num_gt_boxes, 1, 1))
64 |     ious = iou(gt_boxes, default_boxes)
65 |     ious[:, matches[:, 1]] = 0
66 | 
67 |     matched_gt_boxes_idxs = np.argmax(ious, axis=0)  # for each default boxes, select the ground truth box that has the highest iou
68 |     matched_ious = ious[matched_gt_boxes_idxs, list(range(num_default_boxes))]  # get iou scores between gt and default box that were selected above
69 |     matched_df_boxes_idxs = np.nonzero(matched_ious >= match_threshold)[0]  # select only matched default boxes that has iou larger than threshold
70 |     matched_gt_boxes_idxs = matched_gt_boxes_idxs[matched_df_boxes_idxs]
71 | 
72 |     # concat the results of the two matching process together
73 |     matches = np.concatenate([
74 |         matches,
75 |         np.concatenate([
76 |             np.expand_dims(matched_gt_boxes_idxs, axis=-1),
77 |             np.expand_dims(matched_df_boxes_idxs, axis=-1)
78 |         ], axis=-1),
79 |     ], axis=0)
80 |     ious[:, matches[:, 1]] = 0
81 | 
82 |     # find neutral boxes (ious that are higher than neutral_threshold but below threshold)
83 |     # these boxes are neither background nor has enough ious score to qualify as a match.
84 |     background_gt_boxes_idxs = np.argmax(ious, axis=0)
85 |     background_gt_boxes_ious = ious[background_gt_boxes_idxs, list(range(num_default_boxes))]
86 |     neutral_df_boxes_idxs = np.nonzero(background_gt_boxes_ious >= neutral_threshold)[0]
87 |     neutral_gt_boxes_idxs = background_gt_boxes_idxs[neutral_df_boxes_idxs]
88 |     neutral_boxes = np.concatenate([
89 |         np.expand_dims(neutral_gt_boxes_idxs, axis=-1),
90 |         np.expand_dims(neutral_df_boxes_idxs, axis=-1)
91 |     ], axis=-1)
92 | 
93 |     return matches, neutral_boxes
94 | 


--------------------------------------------------------------------------------
/utils/ssd_utils/read_sample.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import xml.etree.ElementTree as ET
 4 | import numpy as np
 5 | from utils import pascal_voc_utils
 6 | 
 7 | 
 8 | def read_sample(image_path, label_path):
 9 |     """ Read image and label file in xml format.
10 | 
11 |     Args:
12 |         - image_path: path to image file
13 |         - label_path: path to label xml file
14 | 
15 |     Returns:
16 |         - image: a numpy array with a data type of float
17 |         - bboxes: a numpy array with a data type of float
18 |         - classes: a list of strings
19 | 
20 |     Raises:
21 |         - Image file does not exist
22 |         - Label file does not exist
23 |     """
24 |     image_path = image_path.strip("\n")
25 |     label_path = label_path.strip("\n")
26 |     assert os.path.exists(image_path), "Image file does not exist."
27 |     bboxes, classes = pascal_voc_utils.read_label(label_path)
28 |     image = cv2.imread(image_path)  # read image in bgr format
29 |     return np.array(image, dtype=np.float), np.array(bboxes, dtype=np.float), classes
30 | 


--------------------------------------------------------------------------------
/utils/textboxes_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .decode_predictions import decode_predictions
2 | from .get_bboxes_from_quads import get_bboxes_from_quads
3 | from .sort_quads_vertices import sort_quads_vertices
4 | from .read_sample import read_sample
5 | from .encode_textboxes import encode_textboxes
6 | from .get_samples import get_samples
7 | from .get_num_quads import get_num_quads
8 | 


--------------------------------------------------------------------------------
/utils/textboxes_utils/encode_textboxes.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from utils import bbox_utils
 3 | 
 4 | 
 5 | def encode_textboxes(y, epsilon=10e-5):
 6 |     """ Encode the label to a proper format suitable for training TextBoxes PlusPlus network.
 7 | 
 8 |     Args:
 9 |         - y: A numpy of shape (num_default_boxes, 2 + 12 + 8) representing a label sample.
10 | 
11 |     Returns:
12 |         - A numpy array with the same shape as y but its gt boxes values has been encoded to the proper TextBoxes PlusPlus format.
13 | 
14 |     Paper References:
15 |         - Liao, M., Shi, B., & Bai, X. (2018). TextBoxes++: A Single-Shot Oriented Scene Text Detector. https://arxiv.org/abs/1512.02325
16 |     """
17 |     gt_textboxes = y[:, -20:-8]
18 |     df_boxes = y[:, -8:-4]
19 |     df_boxes_vertices = bbox_utils.center_to_vertices(df_boxes)
20 |     variances = y[:, -4:]
21 |     encoded_gt_textboxes_cx = ((gt_textboxes[:, 0] - df_boxes[:, 0]) / (df_boxes[:, 2])) / np.sqrt(variances[:, 0])
22 |     encoded_gt_textboxes_cy = ((gt_textboxes[:, 1] - df_boxes[:, 1]) / (df_boxes[:, 3])) / np.sqrt(variances[:, 1])
23 |     encoded_gt_textboxes_w = np.log(epsilon + gt_textboxes[:, 2] / df_boxes[:, 2]) / np.sqrt(variances[:, 2])
24 |     encoded_gt_textboxes_h = np.log(epsilon + gt_textboxes[:, 3] / df_boxes[:, 3]) / np.sqrt(variances[:, 3])
25 |     encoded_gt_textboxes_x1 = ((gt_textboxes[:, 4] - df_boxes_vertices[:, 0, 0]) / df_boxes[:, 2]) / np.sqrt(variances[:, 0])
26 |     encoded_gt_textboxes_y1 = ((gt_textboxes[:, 5] - df_boxes_vertices[:, 0, 1]) / df_boxes[:, 3]) / np.sqrt(variances[:, 1])
27 |     encoded_gt_textboxes_x2 = ((gt_textboxes[:, 6] - df_boxes_vertices[:, 1, 0]) / df_boxes[:, 2]) / np.sqrt(variances[:, 0])
28 |     encoded_gt_textboxes_y2 = ((gt_textboxes[:, 7] - df_boxes_vertices[:, 1, 1]) / df_boxes[:, 3]) / np.sqrt(variances[:, 1])
29 |     encoded_gt_textboxes_x3 = ((gt_textboxes[:, 8] - df_boxes_vertices[:, 2, 0]) / df_boxes[:, 2]) / np.sqrt(variances[:, 0])
30 |     encoded_gt_textboxes_y3 = ((gt_textboxes[:, 9] - df_boxes_vertices[:, 2, 1]) / df_boxes[:, 3]) / np.sqrt(variances[:, 1])
31 |     encoded_gt_textboxes_x4 = ((gt_textboxes[:, 10] - df_boxes_vertices[:, 3, 0]) / df_boxes[:, 2]) / np.sqrt(variances[:, 0])
32 |     encoded_gt_textboxes_y4 = ((gt_textboxes[:, 11] - df_boxes_vertices[:, 3, 1]) / df_boxes[:, 3]) / np.sqrt(variances[:, 1])
33 |     y[:, -20] = encoded_gt_textboxes_cx
34 |     y[:, -19] = encoded_gt_textboxes_cy
35 |     y[:, -18] = encoded_gt_textboxes_w
36 |     y[:, -17] = encoded_gt_textboxes_h
37 |     y[:, -16] = encoded_gt_textboxes_x1
38 |     y[:, -15] = encoded_gt_textboxes_y1
39 |     y[:, -14] = encoded_gt_textboxes_x2
40 |     y[:, -13] = encoded_gt_textboxes_y2
41 |     y[:, -12] = encoded_gt_textboxes_x3
42 |     y[:, -11] = encoded_gt_textboxes_y3
43 |     y[:, -10] = encoded_gt_textboxes_x4
44 |     y[:, -9] = encoded_gt_textboxes_y4
45 |     return y
46 | 


--------------------------------------------------------------------------------
/utils/textboxes_utils/get_bboxes_from_quads.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def get_bboxes_from_quads(quads):
 5 |     """ Extracts minimum bounding rectangle from quadrilaterals.
 6 | 
 7 |     Args:
 8 |         - quad: A numpy of shape (n, 4, 2) representing the verticies of a quadrilateral.
 9 | 
10 |     Returns:
11 |         - A numpy array with the shape of (n, 4) for cx, cy, width, height
12 |     """
13 |     assert quads.shape[1] == 4 and quads.shape[2] == 2, "quad must have a shape of (n, 4, 2)"
14 |     xmin = np.min(quads[:, :, 0], axis=-1, keepdims=True)
15 |     ymin = np.min(quads[:, :, 1], axis=-1, keepdims=True)
16 |     xmax = np.max(quads[:, :, 0], axis=-1, keepdims=True)
17 |     ymax = np.max(quads[:, :, 1], axis=-1, keepdims=True)
18 |     cx = (xmax + xmin) / 2
19 |     cy = (ymax + ymin) / 2
20 |     width = np.abs(xmax - xmin)
21 |     height = np.abs(ymax - ymin)
22 |     return np.concatenate([cx, cy, width, height], axis=-1)
23 | 


--------------------------------------------------------------------------------
/utils/textboxes_utils/get_num_quads.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | 
 4 | 
 5 | def get_num_quads(label_file):
 6 |     """"""
 7 |     label_path = label_file.strip("\n")
 8 |     assert os.path.exists(label_path), "Label file does not exist."
 9 | 
10 |     with open(label_path, "r") as label_file:
11 |         temp_labels = label_file.readlines()
12 | 
13 |         num_labels = 0
14 | 
15 |         for label in temp_labels:
16 |             label = label.strip("\ufeff").strip("\n")
17 |             label = label.split(",")
18 | 
19 |             if len(label[:-1]) != 8:
20 |                 continue
21 | 
22 |             num_labels += 1
23 | 
24 |     return num_labels
25 | 


--------------------------------------------------------------------------------
/utils/textboxes_utils/get_samples.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from glob import glob
 3 | from utils import textboxes_utils
 4 | 
 5 | 
 6 | def get_samples(images_dir, labels_dir):
 7 |     """ Create a list of samples that can be feed to a data generator.
 8 | 
 9 |     Args:
10 |         - images_dir: Path to images directory.
11 |         - labels_dir: Path to labels directory.
12 | 
13 |     Returns:
14 |         - A list of samples. Each sample is a string containing paths to both the image file and its corresponding label file separated by space.
15 | 
16 |     Raises:
17 |         - images_dir is not a directory.
18 |         - labels_dir is not a directory.
19 |     """
20 |     assert os.path.isdir(images_dir), "images_dir is not a directory."
21 |     assert os.path.isdir(labels_dir), "labels_dir is not a directory."
22 | 
23 |     images = sorted(list(glob(os.path.join(images_dir, "*.jpg"))))
24 |     labels = sorted(list(glob(os.path.join(labels_dir, "*.txt"))))
25 | 
26 |     assert len(images) == len(labels), "the number of images and the number of labels does not match"
27 | 
28 |     samples = []
29 | 
30 |     all_samples = list(zip(images, labels))
31 |     num_samples = len(all_samples)
32 | 
33 |     for i, (image_path, label_path) in enumerate(all_samples):
34 | 
35 |         if (i % 100 == 0):
36 |             print(f"{i+1}/{num_samples}")
37 | 
38 |         num_quads = textboxes_utils.get_num_quads(label_path)
39 |         if num_quads == 0:
40 |             continue
41 | 
42 |         samples.append(f"{image_path} {label_path}")
43 | 
44 |     return samples
45 | 


--------------------------------------------------------------------------------
/utils/textboxes_utils/read_sample.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import xml.etree.ElementTree as ET
 4 | import numpy as np
 5 | 
 6 | 
 7 | def read_sample(image_path, label_path):
 8 |     """ Read image and label file in xml format.
 9 | 
10 |     Args:
11 |         - image_path: path to image file
12 |         - label_path: path to label xml file
13 | 
14 |     Returns:
15 |         - image: a numpy array with a data type of float
16 |         - quads: a numpy array with a data type of float
17 | 
18 |     Raises:
19 |         - Image file does not exist
20 |         - Label file does not exist
21 |     """
22 |     image_path = image_path.strip("\n")
23 |     label_path = label_path.strip("\n")
24 |     assert os.path.exists(image_path), "Image file does not exist."
25 |     assert os.path.exists(label_path), "Label file does not exist."
26 | 
27 |     image = cv2.imread(image_path)  # read image in bgr format
28 | 
29 |     with open(label_path, "r") as label_file:
30 |         temp_labels = label_file.readlines()
31 | 
32 |         labels = []
33 | 
34 |         for label in temp_labels:
35 |             label = label.strip("\ufeff").strip("\n")
36 |             label = label.split(",")
37 | 
38 |             if len(label) != 9:
39 |                 continue
40 | 
41 |             label = [float(i) for i in label[:8]]
42 |             labels.append(label)
43 | 
44 |         labels = np.array(labels)
45 |         quads = np.reshape(labels, (labels.shape[0], 4, 2))
46 | 
47 |     return np.array(image, dtype=np.float), np.array(quads, dtype=np.float)
48 | 


--------------------------------------------------------------------------------
/utils/textboxes_utils/sort_quads_vertices.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from .get_bboxes_from_quads import get_bboxes_from_quads
 3 | from utils import bbox_utils
 4 | import math
 5 | import cv2
 6 | 
 7 | 
 8 | def sort_quads_vertices(quads_prime):
 9 |     """ Sort quadrilateral vertices.
10 | 
11 |     Args:
12 |         - quads_prime: A numpy of shape (n, 4, 2) representing the quadrilaterals.
13 | 
14 |     Returns:
15 |         - A numpy array with the same shape as quads but its boxes are sorted based on the logic from Liao, Shi & Bai (2018).
16 | 
17 |     Paper References:
18 |         - Liao, M., Shi, B., & Bai, X. (2018). TextBoxes++: A Single-Shot Oriented Scene Text Detector. https://arxiv.org/abs/1512.02325
19 |     """
20 |     num_quads = quads_prime.shape[0]
21 |     quads = quads_prime.copy()
22 |     bboxes = get_bboxes_from_quads(quads_prime)
23 |     bboxes = bbox_utils.center_to_vertices(bboxes)
24 | 
25 |     deltas = np.reshape(np.tile(np.reshape(np.expand_dims(np.array([0, 1, 2, 3]), axis=0), (4, 1)), (1, 4)), (16, 1))
26 |     i = np.reshape(np.tile(np.expand_dims(np.array([1, 2, 3, 4]), axis=0), (1, 4)), (16, 1))
27 |     q_indexes = (i + deltas - 1) % 4 + 1
28 |     indexes = np.concatenate([i, q_indexes], axis=-1)
29 | 
30 |     pts_b = bboxes[:, indexes[:, 0] - 1]
31 |     pts_q = quads[:, indexes[:, 1] - 1]
32 |     distance = np.sqrt((pts_b[..., 0] - pts_q[..., 0]) ** 2 + (pts_b[..., 1] - pts_q[..., 1]) ** 2)
33 |     distance = np.reshape(distance, (num_quads, 4, 4))
34 |     distance = np.sum(distance, axis=-1)
35 | 
36 |     delta_ms = np.argmin(distance, axis=-1)
37 |     delta_ms = np.expand_dims(delta_ms, axis=-1)
38 |     delta_ms = np.tile(delta_ms, (1, 4))
39 |     delta_ms = np.reshape(delta_ms, (num_quads, 4, 1))
40 | 
41 |     i_prime = np.array([1, 2, 3, 4])
42 |     i_prime = np.expand_dims(i_prime, axis=-1)
43 |     i_prime = np.expand_dims(i_prime, axis=0)
44 |     i_prime = np.tile(i_prime, (num_quads, 1, 1))
45 |     q_idx_prime = (i_prime + delta_ms - 1) % 4 + 1
46 |     i_prime = np.reshape(i_prime, (num_quads, 4)) - 1
47 |     q_idx_prime = np.reshape(q_idx_prime, (num_quads, 4)) - 1
48 | 
49 |     for i in range(num_quads):
50 |         quads[i, i_prime[i]] = quads_prime[i, q_idx_prime[i]]
51 | 
52 |     return quads
53 | 


--------------------------------------------------------------------------------
/utils/training_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .ssd_mobilenetv2 import ssd_mobilenetv2
2 | from .ssd_mobilenetv1 import ssd_mobilenetv1
3 | from .ssd_vgg16 import ssd_vgg16
4 | from .tbpp_vgg16 import tbpp_vgg16
5 | 


--------------------------------------------------------------------------------
/utils/training_utils/ssd_mobilenetv1.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from losses import SSD_LOSS
 3 | from utils import data_utils
 4 | from networks import SSD_MOBILENET
 5 | from tensorflow.keras.optimizers import SGD
 6 | from data_generators import SSD_DATA_GENERATOR
 7 | from tensorflow.keras.callbacks import ModelCheckpoint
 8 | from tensorflow.keras.applications.mobilenet import preprocess_input
 9 | 
10 | 
11 | def ssd_mobilenetv1(config, args):
12 |     training_config = config["training"]
13 |     with open(args.label_maps, "r") as label_map_file:
14 |         label_maps = [i.strip("\n") for i in label_map_file.readlines()]
15 | 
16 |     training_samples = data_utils.get_samples_from_split(
17 |         split_file=args.training_split,
18 |         images_dir=args.images_dir,
19 |         labels_dir=args.labels_dir
20 |     )
21 | 
22 |     if args.validation_split is not None:
23 |         validation_samples = data_utils.get_samples_from_split(
24 |             split_file=args.validation_split,
25 |             images_dir=args.images_dir,
26 |             labels_dir=args.labels_dir
27 |         )
28 | 
29 |     training_data_generator = SSD_DATA_GENERATOR(
30 |         samples=training_samples,
31 |         config=config,
32 |         label_maps=label_maps,
33 |         shuffle=args.shuffle,
34 |         batch_size=args.batch_size,
35 |         augment=args.augment,
36 |         process_input_fn=preprocess_input
37 |     )
38 | 
39 |     if args.validation_split is not None:
40 |         print("-- validation split specified")
41 |         validation_data_generator = SSD_DATA_GENERATOR(
42 |             samples=validation_samples,
43 |             config=config,
44 |             label_maps=label_maps,
45 |             shuffle=args.shuffle,
46 |             batch_size=args.batch_size,
47 |             augment=False,
48 |             process_input_fn=preprocess_input
49 |         )
50 | 
51 |     loss = SSD_LOSS(
52 |         alpha=training_config["alpha"],
53 |         min_negative_boxes=training_config["min_negative_boxes"],
54 |         negative_boxes_ratio=training_config["negative_boxes_ratio"]
55 |     )
56 | 
57 |     model = SSD_MOBILENET(
58 |         config=config,
59 |         label_maps=label_maps,
60 |         is_training=True
61 |     )
62 | 
63 |     optimizer = SGD(
64 |         lr=args.learning_rate,
65 |         momentum=0.9,
66 |         decay=0.0005,
67 |         nesterov=False
68 |     )
69 | 
70 |     model.compile(
71 |         optimizer=optimizer,
72 |         loss=loss.compute
73 |     )
74 | 
75 |     if args.checkpoint is not None:
76 |         assert os.path.exists(args.checkpoint), "checkpoint does not exist"
77 |         model.load_weights(args.checkpoint, by_name=True)
78 | 
79 |     model.fit(
80 |         x=training_data_generator,
81 |         validation_data=validation_data_generator if args.validation_split is not None else None,
82 |         batch_size=args.batch_size,
83 |         validation_batch_size=args.batch_size,
84 |         epochs=args.epochs,
85 |         callbacks=[
86 |             ModelCheckpoint(
87 |                 filepath=os.path.join(
88 |                     args.output_dir,
89 |                     "cp_{epoch:02d}_loss-{loss:.2f}.h5" if args.validation_split is None else "cp_{epoch:02d}_loss-{loss:.2f}_valloss-{val_loss:.2f}.h5"
90 |                 ),
91 |                 save_weights_only=True,
92 |                 monitor='loss' if args.validation_split is None else 'val_loss',
93 |                 mode='min'
94 |             )
95 |         ]
96 |     )
97 | 
98 |     model.save_weights(os.path.join(args.output_dir, "model.h5"))
99 | 


--------------------------------------------------------------------------------
/utils/training_utils/ssd_mobilenetv2.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from losses import SSD_LOSS
 3 | from utils import data_utils
 4 | from networks import SSD_MOBILENETV2
 5 | from tensorflow.keras.optimizers import SGD
 6 | from data_generators import SSD_DATA_GENERATOR
 7 | from tensorflow.keras.callbacks import ModelCheckpoint
 8 | from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
 9 | 
10 | 
11 | def ssd_mobilenetv2(config, args):
12 |     training_config = config["training"]
13 |     with open(args.label_maps, "r") as label_map_file:
14 |         label_maps = [i.strip("\n") for i in label_map_file.readlines()]
15 | 
16 |     training_samples = data_utils.get_samples_from_split(
17 |         split_file=args.training_split,
18 |         images_dir=args.images_dir,
19 |         labels_dir=args.labels_dir
20 |     )
21 | 
22 |     if args.validation_split is not None:
23 |         validation_samples = data_utils.get_samples_from_split(
24 |             split_file=args.validation_split,
25 |             images_dir=args.images_dir,
26 |             labels_dir=args.labels_dir
27 |         )
28 | 
29 |     training_data_generator = SSD_DATA_GENERATOR(
30 |         samples=training_samples,
31 |         config=config,
32 |         label_maps=label_maps,
33 |         shuffle=args.shuffle,
34 |         batch_size=args.batch_size,
35 |         augment=args.augment,
36 |         process_input_fn=preprocess_input
37 |     )
38 | 
39 |     if args.validation_split is not None:
40 |         print("-- validation split specified")
41 |         validation_data_generator = SSD_DATA_GENERATOR(
42 |             samples=validation_samples,
43 |             config=config,
44 |             label_maps=label_maps,
45 |             shuffle=args.shuffle,
46 |             batch_size=args.batch_size,
47 |             augment=False,
48 |             process_input_fn=preprocess_input
49 |         )
50 | 
51 |     loss = SSD_LOSS(
52 |         alpha=training_config["alpha"],
53 |         min_negative_boxes=training_config["min_negative_boxes"],
54 |         negative_boxes_ratio=training_config["negative_boxes_ratio"]
55 |     )
56 | 
57 |     model = SSD_MOBILENETV2(
58 |         config=config,
59 |         label_maps=label_maps,
60 |         is_training=True
61 |     )
62 | 
63 |     optimizer = SGD(
64 |         lr=args.learning_rate,
65 |         momentum=0.9,
66 |         decay=0.0005,
67 |         nesterov=False
68 |     )
69 | 
70 |     model.compile(
71 |         optimizer=optimizer,
72 |         loss=loss.compute
73 |     )
74 | 
75 |     if args.checkpoint is not None:
76 |         assert os.path.exists(args.checkpoint), "checkpoint does not exist"
77 |         model.load_weights(args.checkpoint, by_name=True)
78 | 
79 |     model.fit(
80 |         x=training_data_generator,
81 |         validation_data=validation_data_generator if args.validation_split is not None else None,
82 |         batch_size=args.batch_size,
83 |         validation_batch_size=args.batch_size,
84 |         epochs=args.epochs,
85 |         callbacks=[
86 |             ModelCheckpoint(
87 |                 filepath=os.path.join(
88 |                     args.output_dir,
89 |                     "cp_{epoch:02d}_loss-{loss:.2f}.h5" if args.validation_split is None else "cp_{epoch:02d}_loss-{loss:.2f}_valloss-{val_loss:.2f}.h5"
90 |                 ),
91 |                 save_weights_only=True,
92 |                 monitor='loss' if args.validation_split is None else 'val_loss',
93 |                 mode='min'
94 |             )
95 |         ]
96 |     )
97 | 
98 |     model.save_weights(os.path.join(args.output_dir, "model.h5"))
99 | 


--------------------------------------------------------------------------------
/utils/training_utils/ssd_vgg16.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from losses import SSD_LOSS
  3 | from utils import data_utils
  4 | from networks import SSD_VGG16
  5 | import tensorflow as tf
  6 | from tensorflow.keras.optimizers import SGD, Adam
  7 | from data_generators import SSD_DATA_GENERATOR
  8 | from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger, TerminateOnNaN, LearningRateScheduler
  9 | from tensorflow.keras.applications.vgg16 import preprocess_input
 10 | 
 11 | 
 12 | def ssd_vgg16(config, args, callbacks):
 13 |     training_config = config["training"]
 14 |     with open(args.label_maps, "r") as label_map_file:
 15 |         label_maps = [i.strip("\n") for i in label_map_file.readlines()]
 16 | 
 17 |     training_samples = data_utils.get_samples_from_split(
 18 |         split_file=args.training_split,
 19 |         images_dir=args.images_dir,
 20 |         labels_dir=args.labels_dir
 21 |     )
 22 | 
 23 |     if args.validation_split is not None:
 24 |         validation_samples = data_utils.get_samples_from_split(
 25 |             split_file=args.validation_split,
 26 |             images_dir=args.images_dir,
 27 |             labels_dir=args.labels_dir
 28 |         )
 29 | 
 30 |     training_data_generator = SSD_DATA_GENERATOR(
 31 |         samples=training_samples,
 32 |         config=config,
 33 |         label_maps=label_maps,
 34 |         shuffle=args.shuffle,
 35 |         batch_size=args.batch_size,
 36 |         augment=args.augment,
 37 |         process_input_fn=preprocess_input
 38 |     )
 39 | 
 40 |     if args.validation_split is not None:
 41 |         print("-- validation split specified")
 42 |         validation_data_generator = SSD_DATA_GENERATOR(
 43 |             samples=validation_samples,
 44 |             config=config,
 45 |             label_maps=label_maps,
 46 |             shuffle=args.shuffle,
 47 |             batch_size=args.batch_size,
 48 |             augment=False,
 49 |             process_input_fn=preprocess_input
 50 |         )
 51 | 
 52 |     loss = SSD_LOSS(
 53 |         alpha=training_config["alpha"],
 54 |         min_negative_boxes=training_config["min_negative_boxes"],
 55 |         negative_boxes_ratio=training_config["negative_boxes_ratio"]
 56 |     )
 57 | 
 58 |     if training_config["optimizer"]["name"] == "adam":
 59 |         optimizer = Adam(
 60 |             learning_rate=args.learning_rate,
 61 |             beta_1=training_config["optimizer"]["beta_1"],
 62 |             beta_2=training_config["optimizer"]["beta_2"],
 63 |             epsilon=training_config["optimizer"]["epsilon"],
 64 |             decay=training_config["optimizer"]["decay"]
 65 |         )
 66 |     elif training_config["optimizer"]["name"] == "sgd":
 67 |         optimizer = SGD(
 68 |             learning_rate=args.learning_rate,
 69 |             momentum=training_config["optimizer"]["momentum"],
 70 |             decay=training_config["optimizer"]["decay"],
 71 |             nesterov=training_config["optimizer"]["nesterov"]
 72 |         )
 73 |     else:
 74 |         optimizer = Adam(
 75 |             learning_rate=args.learning_rate,
 76 |             beta_1=0.9,
 77 |             beta_2=0.999,
 78 |             epsilon=1e-08,
 79 |             decay=0.0
 80 |         )
 81 | 
 82 |     model = SSD_VGG16(
 83 |         config=config,
 84 |         label_maps=label_maps,
 85 |         is_training=True
 86 |     )
 87 | 
 88 |     if args.show_network_structure:
 89 |         model.summary()
 90 | 
 91 |     model.compile(
 92 |         optimizer=optimizer,
 93 |         loss=loss.compute
 94 |     )
 95 | 
 96 |     if args.checkpoint is not None:
 97 |         assert os.path.exists(args.checkpoint), "checkpoint does not exist"
 98 |         model.load_weights(args.checkpoint, by_name=True)
 99 | 
100 |     model.fit(
101 |         x=training_data_generator,
102 |         validation_data=validation_data_generator if args.validation_split is not None else None,
103 |         batch_size=args.batch_size,
104 |         validation_batch_size=args.batch_size,
105 |         epochs=args.epochs,
106 |         initial_epoch=args.initial_epoch,
107 |         callbacks=callbacks,
108 |     )
109 | 
110 |     model.save_weights(os.path.join(args.output_dir, "model.h5"))
111 | 


--------------------------------------------------------------------------------
/utils/training_utils/tbpp_vgg16.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from utils import data_utils
 3 | from losses import TBPP_LOSS
 4 | from networks import TBPP_VGG16
 5 | from tensorflow.keras.optimizers import Adam
 6 | from data_generators import TBPP_DATA_GENERATOR
 7 | from tensorflow.keras.callbacks import ModelCheckpoint
 8 | from tensorflow.keras.applications.vgg16 import preprocess_input
 9 | 
10 | 
11 | def tbpp_vgg16(config, args):
12 |     training_config = config["training"]
13 | 
14 |     training_samples = data_utils.get_samples_from_split(
15 |         split_file=args.training_split,
16 |         images_dir=args.images_dir,
17 |         labels_dir=args.labels_dir
18 |     )
19 | 
20 |     if args.validation_split is not None:
21 |         validation_samples = data_utils.get_samples_from_split(
22 |             split_file=args.validation_split,
23 |             images_dir=args.images_dir,
24 |             labels_dir=args.labels_dir
25 |         )
26 | 
27 |     print("creating data generator for tbpp_vgg16")
28 |     training_data_generator = TBPP_DATA_GENERATOR(
29 |         samples=training_samples,
30 |         config=config,
31 |         shuffle=args.shuffle,
32 |         batch_size=args.batch_size,
33 |         augment=args.augment,
34 |         process_input_fn=preprocess_input
35 |     )
36 | 
37 |     if args.validation_split is not None:
38 |         print("-- validation split specified")
39 |         validation_data_generator = TBPP_DATA_GENERATOR(
40 |             samples=validation_samples,
41 |             config=config,
42 |             shuffle=args.shuffle,
43 |             batch_size=args.batch_size,
44 |             augment=False,
45 |             process_input_fn=preprocess_input
46 |         )
47 | 
48 |     loss = TBPP_LOSS(
49 |         alpha=training_config["alpha"],
50 |         min_negative_boxes=training_config["min_negative_boxes"],
51 |         negative_boxes_ratio=training_config["negative_boxes_ratio"]
52 |     )
53 | 
54 |     model = TBPP_VGG16(
55 |         config=config,
56 |         is_training=True
57 |     )
58 | 
59 |     optimizer = Adam(
60 |         lr=args.learning_rate,
61 |         beta_1=0.9,
62 |         beta_2=0.999,
63 |         epsilon=0.001,
64 |         decay=0.0
65 |     )
66 | 
67 |     model.compile(
68 |         optimizer=optimizer,
69 |         loss=loss.compute
70 |     )
71 | 
72 |     if args.checkpoint is not None:
73 |         assert os.path.exists(args.checkpoint), "checkpoint does not exist"
74 |         model.load_weights(args.checkpoint, by_name=True)
75 | 
76 |     model.fit(
77 |         x=training_data_generator,
78 |         validation_data=validation_data_generator if args.validation_split is not None else None,
79 |         batch_size=args.batch_size,
80 |         validation_batch_size=args.batch_size,
81 |         epochs=args.epochs,
82 |         callbacks=[
83 |             ModelCheckpoint(
84 |                 filepath=os.path.join(
85 |                     args.output_dir,
86 |                     "cp_{epoch:02d}_loss-{loss:.2f}.h5" if args.validation_split is None else "cp_{epoch:02d}_loss-{loss:.2f}_valloss-{val_loss:.2f}.h5"
87 |                 ),
88 |                 save_weights_only=True,
89 |                 monitor='loss' if args.validation_split is None else 'val_loss',
90 |                 mode='min'
91 |             )
92 |         ]
93 |     )
94 | 
95 |     model.save_weights(os.path.join(args.output_dir, "model.h5"))
96 | 


--------------------------------------------------------------------------------
/utils/visualize_training_metrics.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import pandas as pd
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | parser = argparse.ArgumentParser(
 7 |     description='Visualize training metrics.')
 8 | parser.add_argument('logfile', type=str, help='path to dataset dir.')
 9 | args = parser.parse_args()
10 | 
11 | assert os.path.exists(args.logfile), "logfile does not exist"
12 | 
13 | data = pd.read_csv(args.logfile)
14 | 
15 | plt.plot(data["epoch"], data["loss"], label="loss")
16 | plt.plot(data["epoch"], data["val_loss"], label="val_loss")
17 | plt.xlabel("Epoch")
18 | plt.ylabel("Loss")
19 | plt.legend()
20 | plt.show()
21 | 


--------------------------------------------------------------------------------
/webcam.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import json
  4 | import argparse
  5 | import numpy as np
  6 | from networks import SSD_VGG16
  7 | from tensorflow.keras.applications import vgg16, mobilenet_v2
  8 | from utils import bbox_utils
  9 | from networks import SSD_MOBILENETV2
 10 | 
 11 | parser = argparse.ArgumentParser(
 12 |     description='run inference from images on webcam.')
 13 | parser.add_argument('config', type=str, help='path to config file.')
 14 | parser.add_argument('weights', type=str, help='path to the weight file.')
 15 | parser.add_argument('--label_maps', type=str, help='path to label maps file.')
 16 | parser.add_argument('--confidence_threshold', type=float,
 17 |                     help='the confidence score a detection should match in order to be counted.', default=0.9)
 18 | parser.add_argument('--num_predictions', type=int,
 19 |                     help='the number of detections to be output as final detections', default=10)
 20 | args = parser.parse_args()
 21 | 
 22 | with open(args.config, "r") as config_file:
 23 |     config = json.load(config_file)
 24 | 
 25 | input_size = config["model"]["input_size"]
 26 | model_config = config["model"]
 27 | 
 28 | if model_config["name"] == "ssd_mobilenetv2":
 29 |     with open(args.label_maps, "r") as file:
 30 |         label_maps = [line.strip("\n") for line in file.readlines()]
 31 |     model = SSD_MOBILENETV2(
 32 |         config,
 33 |         label_maps,
 34 |         is_training=False,
 35 |         num_predictions=args.num_predictions)
 36 |     process_input_fn = mobilenet_v2.preprocess_input
 37 | else:
 38 |     print("model have not been implemented")
 39 |     exit()
 40 | 
 41 | model.load_weights(args.weights)
 42 | 
 43 | webcam = cv2.VideoCapture(0)
 44 | 
 45 | while True:
 46 |     check, image = webcam.read()
 47 |     display_image = image.copy()
 48 |     image_height, image_width, _ = image.shape
 49 |     height_scale, width_scale = input_size/image_height, input_size/image_width
 50 | 
 51 |     image = cv2.resize(image, (input_size, input_size))
 52 |     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 53 |     image = process_input_fn(image)
 54 | 
 55 |     image = np.expand_dims(image, axis=0)
 56 |     y_pred = model.predict(image)
 57 | 
 58 |     for i, pred in enumerate(y_pred[0]):
 59 |         classname = label_maps[int(pred[0]) - 1].upper()
 60 |         confidence_score = pred[1]
 61 | 
 62 |         score = f"{'%.2f' % (confidence_score * 100)}%"
 63 |         print(f"-- {classname}: {score}")
 64 | 
 65 |         if confidence_score <= 1 and confidence_score > args.confidence_threshold:
 66 |             xmin = max(int(pred[2] / width_scale), 1)
 67 |             ymin = max(int(pred[3] / height_scale), 1)
 68 |             xmax = min(int(pred[4] / width_scale), image_width-1)
 69 |             ymax = min(int(pred[5] / height_scale), image_height-1)
 70 |             x1 = max(min(int(pred[6] / width_scale), image_width), 0)
 71 |             y1 = max(min(int(pred[7] / height_scale), image_height), 0)
 72 |             x2 = max(min(int(pred[8] / width_scale), image_width), 0)
 73 |             y2 = max(min(int(pred[9] / height_scale), image_height), 0)
 74 |             x3 = max(min(int(pred[10] / width_scale), image_width), 0)
 75 |             y3 = max(min(int(pred[11] / height_scale), image_height), 0)
 76 |             x4 = max(min(int(pred[12] / width_scale), image_width), 0)
 77 |             y4 = max(min(int(pred[13] / height_scale), image_height), 0)
 78 | 
 79 |             quad = np.array(
 80 |                 [[x1, y1], [x2, y2], [x3, y3], [x4, y4]], dtype=np.int)
 81 | 
 82 |             cv2.putText(
 83 |                 display_image,
 84 |                 classname,
 85 |                 (int(xmin), int(ymin)),
 86 |                 cv2.FONT_HERSHEY_PLAIN,
 87 |                 1,
 88 |                 (100, 100, 255),
 89 |                 1, 1)
 90 | 
 91 |             cv2.polylines(
 92 |                 display_image,
 93 |                 [quad],
 94 |                 True,
 95 |                 (0, 255, 0),
 96 |                 2
 97 |             )
 98 | 
 99 |             cv2.rectangle(
100 |                 display_image,
101 |                 (xmin, ymin),
102 |                 (xmax, ymax),
103 |                 (255, 0, 0),
104 |                 1
105 |             )
106 | 
107 |     cv2.imshow('video', display_image)
108 | 
109 |     if cv2.waitKey(1) == ord('q'):
110 |         break
111 | 
112 | webcam.release()
113 | cv2.destroyAllWindows()
114 | 


--------------------------------------------------------------------------------