├── .gitignore
├── LICENSE.md
├── README.md
├── detect_onnx.py
├── detect_openvino.py
├── inf_requirements.txt
├── media
    ├── 1.jpg
    ├── 2.jpg
    ├── 3.jpg
    ├── 4.jpg
    └── soccer1.mp4
├── models
    └── .gitkeep
└── utils
    ├── detector_utils.py
    └── general.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 98 | __pypackages__/
 99 | 
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 | 
104 | # SageMath parsed files
105 | *.sage.py
106 | 
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 | 
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 | 
120 | # Rope project settings
121 | .ropeproject
122 | 
123 | # mkdocs documentation
124 | /site
125 | 
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 | 
131 | # Pyre type checker
132 | .pyre/
133 | 
134 | # pytype static type analyzer
135 | .pytype/
136 | 
137 | # Cython debug symbols
138 | cython_debug/
139 | 
140 | # output files
141 | output
142 | 
143 | # pytorch weights
144 | *.pt
145 | *.pth
146 | 
147 | # onnx weights
148 | *.onnx
149 | 
150 | # openvino IR
151 | *.bin
152 | *.xml
153 | *.mapping
154 | 
155 | # mac os fsystem files
156 | *.DS_Store
157 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2022 SamSamhuns
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
15 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
16 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
17 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
19 | OR OTHER DEALINGS IN THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # YOLOv5 CPU Export and OpenVINO Inference
  2 | 
  3 | [![Codacy Badge](https://app.codacy.com/project/badge/Grade/55c3d2e474f14e7b8cb6c611504457d9)](https://www.codacy.com/gh/SamSamhuns/yolov5_export_cpu/dashboard?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=SamSamhuns/yolov5_export_cpu&amp;utm_campaign=Badge_Grade)
  4 | 
  5 | Documentation on exporting YOLOv5 models for fast CPU inference using Intel's OpenVINO framework (Tested on commits up to June 6, 2022 in docker).
  6 | 
  7 | ## Google Colab Conversion
  8 | 
  9 | Convert yolov5 model to IR format with Google Colab. [![Google Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1K8gnZEka47Gbcp1eJbBaSe3GxngJdvio?usp=sharing) (Recommended)
 10 | 
 11 | ## 1. Clone and set up the Official YOLOv5 GitHub repository
 12 | 
 13 | <details>
 14 |   <summary>Setup</summary>
 15 | 
 16 |   All package installations should be done in a virtualenv or conda env to prevent package conflict errors.
 17 | 
 18 | -   Install required requirements for onnx and openvino Inference
 19 | 
 20 | ```bash
 21 | pip install --upgrade pip
 22 | pip install -r inf_requirements.txt
 23 | ```
 24 | 
 25 | -   Clone and install requirements for yolov5 repository
 26 | 
 27 | ```bash
 28 | git clone https://github.com/ultralytics/yolov5                    # clone repo
 29 | cd yolov5
 30 | pip install -r requirements.txt                                    # base requirements
 31 | ```
 32 | 
 33 | </details>
 34 | 
 35 | ## 2. Export a Trained YOLOv5 Model as ONNX
 36 | 
 37 | <details>
 38 |   <summary>Export</summary>
 39 | 
 40 | Export a pre-trained or custom trained YOLOv5 model to generate the respective ONNX, TorchScript and CoreML formats of the model. The pre-trained `yolov5s.pt` is the lightest and fastest model for CPU inference. Other slower but more accurate models include `yolov5m.pt, yolov5l.pt` and `yolov5x.pt`. All available model details at Ultralytics YOLOv5 [README](https://github.com/ultralytics/yolov5#pretrained-checkpoints).
 41 | 
 42 | A custom training checkpoint i.e. `runs/exp/weights/best.pt` can be used for conversion as well.
 43 | 
 44 | -   Export a pre-trained light yolov5s.pt model at 640x640 with batch size 1
 45 | 
 46 | ```bash
 47 | python export.py --weights yolov5s.pt --include onnx --img 640 --batch 1
 48 | ```
 49 | 
 50 | -   Export a custom checkpoint for dynamic input shape {BATCH_SIZE, 3, HEIGHT, WIDTH}. Note, for CPU inference mode, BATCH_SIZE must be set to 1. Install onnx-simplifier for simplifying onnx exports
 51 | 
 52 | ```bash
 53 | pip install onnx-simplifier==0.3.10                                
 54 | python export.py --weights runs/exp/weights/best.pt --include onnx  --dynamic --simplify
 55 | ```
 56 | 
 57 | -  Cd to `yolov5_export_cpu` dir and move the onnx model to `yolov5_export_cpu/models` directory
 58 | 
 59 | ```bash
 60 | mv <PATH_TO_ONNX_MODEL> yolov5_export_cpu/models/
 61 | ```
 62 | 
 63 | </details>
 64 | 
 65 | ## 3. Test YOLOv5 ONNX model inference
 66 | 
 67 | <details>
 68 |   <summary>ONNX inference</summary>
 69 | 
 70 | ```bash
 71 | python detect_onnx.py -m image -i <IMG_FILE_PATH/IMG_DIR_PATH>
 72 | python detect_onnx.py -m video -i <VID_PATH_FILE>
 73 | # python detect_onnx.py -h for more info
 74 | ```
 75 | 
 76 | Optional: To convert the all frames in the `output` directory into a mp4 video using `ffmpeg`, use `ffmpeg -r 25 -start_number 00001 -i output/frame_onnx_%5d.jpg -vcodec libx264 -y -an onnx_result.mp4`
 77 | 
 78 | </details>
 79 | 
 80 | ## 4. Export ONNX to OpenVINO
 81 | 
 82 | **Recommended Option A**
 83 | 
 84 | ### Option A. Use OpenVINO's python dev library
 85 | 
 86 | <details>
 87 |   <summary> A1. Install OpenVINO python dev library</summary>
 88 | 
 89 |   Instructions for setting OpenVINO available [here](https://docs.openvino.ai/latest/openvino_docs_install_guides_install_dev_tools.html)
 90 | 
 91 | ```bash
 92 | # install required OpenVINO lib to convert ONNX to OpenVINO IR
 93 | pip install openvino-dev[onnx]
 94 | ```
 95 | 
 96 | </details>
 97 | 
 98 | <details>
 99 |   <summary> A2. Export ONNX to OpenVINO IR</summary>
100 | 
101 | This will create the OpenVINO Intermediate Model Representation (IR) model files (xml and bin) in the directory `models/yolov5_openvino`.
102 | 
103 | **Important Note:** --input_shape must be provided and match the img shape used to export ONNX model. Batching might not supported for CPU inference
104 | 
105 | ```bash
106 | # export onnx to OpenVINO IR
107 | mo \
108 |   --progress \
109 |   --input_shape [1,3,640,640] \
110 |   --input_model models/yolov5s.onnx \
111 |   --output_dir models/yolov5_openvino
112 | ```
113 | 
114 | [Full OpenVINO export options](https://docs.openvinotoolkit.org/latest/openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model_General.html)
115 | 
116 | </details>
117 | 
118 | ### Option B. Use OpenVINO Docker
119 | 
120 | <details>
121 |   <summary>B1. Download Docker and OpenVINO Docker Image</summary>
122 | 
123 | [Install docker](https://docs.docker.com/get-docker/) in your system if not already installed.
124 | 
125 | Pass the docker run command below in a terminal which will automatically download the OpenVINO Docker Image and run it. The `models` directory containing the ONNX model must be in the current working directory.
126 | 
127 | ```bash
128 | docker run -it --rm \
129 |             -v $PWD/models:/home/openvino/models \
130 |             openvino/ubuntu18_dev:latest \
131 |             /bin/bash -c "cd /home/openvino/; bash"
132 | ```
133 | 
134 | </details>
135 | 
136 | <details>
137 |   <summary>B2. Export ONNX model to an OpenVINO IR representation</summary>
138 | 
139 | This will create the OpenVINO Intermediate Model Representation (IR) model files (xml and bin) in the directory `models/yolov5_openvino` which will be available in the host system outside the docker container.
140 | 
141 | **Important Note:** --input_shape must be provided and match the img shape used to export ONNX model. Batching might not supported for CPU inference
142 | 
143 | ```bash
144 | # inside the OpenVINO docker container
145 | mo \
146 |   --progress \
147 |   --input_shape [1,3,640,640] \
148 |   --input_model models/yolov5s.onnx \
149 |   --output_dir models/yolov5_openvino
150 | # exit OpenVINO docker container
151 | exit  
152 | ```
153 | 
154 | [Full OpenVINO export options](https://docs.openvinotoolkit.org/latest/openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model_General.html)
155 | 
156 | </details>
157 | 
158 | ## 5. Test YOLOv5 OpenVINO IR model CPU inference
159 | 
160 | <details>
161 |   <summary>OpenVINO model inference</summary>
162 | 
163 | ```bash
164 | python detect_openvino.py -m image -i <IMG_FILE_PATH/IMG_DIR_PATH>
165 | python detect_openvino.py -m video -i <VID_PATH_FILE>
166 | # python detect_openvino.py -h for more info
167 | ```
168 | 
169 | Optional: To convert the all frames in the `output` directory into a mp4 video using `ffmpeg`, use `ffmpeg -r 25 -start_number 00001 -i output/frame_openvino_%5d.jpg -vcodec libx264 -y -an openvino_result.mp4`
170 | 
171 | </details>
172 | 


--------------------------------------------------------------------------------
/detect_onnx.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | from functools import partial
  4 | 
  5 | import torch
  6 | import numpy as np
  7 | import onnxruntime
  8 | 
  9 | from utils.general import parse_arguments, DataStreamer
 10 | from utils.detector_utils import save_output, preprocess_image, non_max_suppression, w_non_max_suppression
 11 | 
 12 | 
 13 | @torch.no_grad()
 14 | def detect_onnx(src_path: str,
 15 |                 media_type: str,
 16 |                 threshold: float = 0.6,
 17 |                 official: bool = True,
 18 |                 onnx_path: str = "models/yolov5s.onnx",
 19 |                 output_dir: str = "output",
 20 |                 num_classes: int = 80) -> None:
 21 |     session = onnxruntime.InferenceSession(onnx_path)
 22 |     model_batch_size = session.get_inputs()[0].shape[0]
 23 |     model_h = session.get_inputs()[0].shape[2]
 24 |     model_w = session.get_inputs()[0].shape[3]
 25 |     in_w = 640 if (model_w is None or isinstance(model_w, str)) else model_w
 26 |     in_h = 640 if (model_h is None or isinstance(model_h, str)) else model_h
 27 |     print("Input Layer: ", session.get_inputs()[0].name)
 28 |     print("Output Layer: ", session.get_outputs()[0].name)
 29 |     print("Model Input Shape: ", session.get_inputs()[0].shape)
 30 |     print("Model Output Shape: ", session.get_outputs()[0].shape)
 31 | 
 32 |     start_time = time.time()
 33 |     preprocess_func = partial(preprocess_image, in_size=(in_w, in_h))
 34 |     data_stream = DataStreamer(src_path, media_type, preprocess_func)
 35 |     if output_dir is not None:
 36 |         os.makedirs(output_dir, exist_ok=True)
 37 | 
 38 |     for i, (orig_input, model_input) in enumerate(data_stream, start=1):
 39 |         batch_size = model_input.shape[0] if isinstance(
 40 |             model_batch_size, str) else model_batch_size
 41 |         input_name = session.get_inputs()[0].name
 42 | 
 43 |         # inference
 44 |         start = time.time()
 45 |         outputs = session.run(None, {input_name: model_input})
 46 |         end = time.time()
 47 | 
 48 |         inf_time = end - start
 49 |         print('Inference Time: {} Seconds Single Image'.format(inf_time))
 50 |         fps = 1. / (end - start)
 51 |         print('Estimated Inference FPS: {} FPS Single Image'.format(fps))
 52 | 
 53 |         batch_detections = []
 54 |         # model.model[-1].export = boolean ---> True:3 False:4
 55 |         if official:  # recommended
 56 |             # model.model[-1].export = False ---> outputs[0] (1, xxxx, 85)
 57 |             # Use the official code directly
 58 |             batch_detections = torch.from_numpy(np.array(outputs[0]))
 59 |             batch_detections = non_max_suppression(
 60 |                 batch_detections, conf_thres=0.4, iou_thres=0.5, agnostic=False)
 61 |         else:
 62 |             # model.model[-1].export = False ---> outputs[1]/outputs[2]/outputs[2]
 63 |             # model.model[-1].export = True  ---> outputs
 64 |             # (1, 3, 20, 20, 85)
 65 |             # (1, 3, 40, 40, 85)
 66 |             # (1, 3, 80, 80, 85)
 67 |             # same anchors for 5s, 5l, 5x
 68 |             anchors = [[116, 90, 156, 198, 373, 326], [
 69 |                 30, 61, 62, 45, 59, 119], [10, 13, 16, 30, 33, 23]]
 70 | 
 71 |             boxs = []
 72 |             a = torch.tensor(anchors).float().view(3, -1, 2)
 73 |             anchor_grid = a.clone().view(3, 1, -1, 1, 1, 2)
 74 |             if len(outputs) == 4:
 75 |                 outputs = [outputs[1], outputs[2], outputs[3]]
 76 |             for index, out in enumerate(outputs):
 77 |                 out = torch.from_numpy(out)
 78 |                 # batch = out.shape[1]
 79 |                 feature_w = out.shape[2]
 80 |                 feature_h = out.shape[3]
 81 | 
 82 |                 # Feature map corresponds to the original image zoom factor
 83 |                 stride_w = int(in_w / feature_w)
 84 |                 stride_h = int(in_h / feature_h)
 85 | 
 86 |                 grid_x, grid_y = np.meshgrid(
 87 |                     np.arange(feature_w), np.arange(feature_h))
 88 | 
 89 |                 # cx, cy, w, h
 90 |                 pred_boxes = torch.FloatTensor(out[..., :4].shape)
 91 |                 pred_boxes[..., 0] = (torch.sigmoid(
 92 |                     out[..., 0]) * 2.0 - 0.5 + grid_x) * stride_w  # cx
 93 |                 pred_boxes[..., 1] = (torch.sigmoid(
 94 |                     out[..., 1]) * 2.0 - 0.5 + grid_y) * stride_h  # cy
 95 |                 pred_boxes[..., 2:4] = (torch.sigmoid(
 96 |                     out[..., 2:4]) * 2) ** 2 * anchor_grid[index]  # wh
 97 | 
 98 |                 conf = torch.sigmoid(out[..., 4])
 99 |                 pred_cls = torch.sigmoid(out[..., 5:])
100 | 
101 |                 output = torch.cat((pred_boxes.view(batch_size, -1, 4),
102 |                                     conf.view(batch_size, -1, 1),
103 |                                     pred_cls.view(batch_size, -1, num_classes)),
104 |                                    -1)
105 |                 boxs.append(output)
106 | 
107 |             outputx = torch.cat(boxs, 1)
108 |             # NMS
109 |             batch_detections = w_non_max_suppression(
110 |                 outputx, num_classes, conf_thres=0.4, nms_thres=0.3)
111 |         if output_dir is not None:
112 |             save_path = os.path.join(
113 |                 output_dir, f"frame_onnx_{str(i).zfill(5)}.jpg")
114 |             save_output(batch_detections[0], orig_input, save_path,
115 |                         threshold=threshold, model_in_HW=(in_h, in_w),
116 |                         line_thickness=None, text_bg_alpha=0.0)
117 | 
118 |     elapse_time = time.time() - start_time
119 |     print(f'Total Frames: {i}')
120 |     print(f'Total Elapsed Time: {elapse_time:.3f} Seconds'.format())
121 |     print(f'Final Estimated FPS: {i / (elapse_time):.2f}')
122 | 
123 | 
124 | if __name__ == '__main__':
125 |     args = parse_arguments("YoloV5 onnx demo")
126 |     t1 = time.time()
127 |     detect_onnx(src_path=args.input_path,
128 |                 media_type=args.media_type,
129 |                 threshold=args.threshold,
130 |                 official=True,  # official yolov5 post-processing
131 |                 onnx_path=args.onnx_path,
132 |                 output_dir=args.output_dir,
133 |                 num_classes=args.num_classes)
134 | 


--------------------------------------------------------------------------------
/detect_openvino.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import argparse
  4 | from functools import partial
  5 | 
  6 | import cv2
  7 | import torch
  8 | import openvino as ov
  9 | 
 10 | from utils.general import DataStreamer
 11 | from utils.detector_utils import save_output, non_max_suppression, preprocess_image
 12 | 
 13 | 
 14 | def parse_arguments(desc: str) -> argparse.Namespace:
 15 |     parser = argparse.ArgumentParser(description=desc)
 16 |     parser.add_argument('-i', '--input_path', dest='input_path', required=True, type=str,
 17 |                         help='Path to Input: Video File or Image file')
 18 |     parser.add_argument('--model_xml', dest='model_xml', default='models/yolov5_openvino/yolov5s.xml',
 19 |                         help='OpenVINO XML File. (default: %(default)s)')
 20 |     parser.add_argument('--model_bin', dest='model_bin', default='models/yolov5_openvino/yolov5s.bin',
 21 |                         help='OpenVINO BIN File. (default: %(default)s)')
 22 |     parser.add_argument('-d', '--target_device', dest='target_device', default='CPU', type=str,
 23 |                         help='Target Plugin: CPU, GPU, FPGA, MYRIAD, MULTI:CPU,GPU, HETERO:FPGA,CPU. (default: %(default)s)')
 24 |     parser.add_argument('-m', '--media_type', dest='media_type', default='image', type=str,
 25 |                         choices=('image', 'video'),
 26 |                         help='Type of Input: image, video. (default: %(default)s)')
 27 |     parser.add_argument('-o', '--output_dir', dest='output_dir', default='output', type=str,
 28 |                         help='Output directory. (default: %(default)s)')
 29 |     parser.add_argument('-t', '--threshold', dest='threshold', default=0.6, type=float,
 30 |                         help='Object Detection Accuracy Threshold. (default: %(default)s)')
 31 | 
 32 |     return parser.parse_args()
 33 | 
 34 | 
 35 | def get_openvino_core_net_exec(model_xml_path: str, model_bin_path: str, target_device: str = "CPU"):
 36 |     # load openvino Core object
 37 |     core = ov.Core()
 38 | 
 39 |     # load CPU extensions if availabel
 40 |     lib_ext_path = '/opt/intel/openvino/inference_engine/lib/intel64/libcpu_extension.so'
 41 |     if 'CPU' in target_device and os.path.exists(lib_ext_path):
 42 |         print(f"Loading CPU extensions from {lib_ext_path}")
 43 |         core.add_extension(lib_ext_path)
 44 | 
 45 |     # load openVINO network
 46 |     model = core.read_model(
 47 |         model=model_xml_path, weights=model_bin_path)
 48 | 
 49 |     # create executable network
 50 |     compiled_model = core.compile_model(
 51 |         model=model, device_name=target_device)
 52 | 
 53 |     return core, model, compiled_model
 54 | 
 55 | 
 56 | def inference(args: argparse.Namespace) -> None:
 57 |     """Run Object Detection Application
 58 | 
 59 |     args: ArgumentParser Namespace
 60 |     """
 61 |     print(f"Running Inference for {args.media_type}: {args.input_path}")
 62 |     # Load model and executable
 63 |     core, model, compiled_model = get_openvino_core_net_exec(
 64 |         args.model_xml, args.model_bin, args.target_device)
 65 | 
 66 |     # Get Input, Output Information
 67 |     print("Available Devices: ", core.available_devices)
 68 |     print("Input layer names ", model.inputs[0].names)
 69 |     print("Output layer names ", model.outputs[0].names)
 70 |     print("Input Layer: ", model.inputs)
 71 |     print("Output Layer: ", model.outputs)
 72 | 
 73 |     output_layer_ir = compiled_model.output(model.outputs[0].names.pop())
 74 |     if args.output_dir is not None:
 75 |         os.makedirs(args.output_dir, exist_ok=True)
 76 | 
 77 |     start_time = time.time()
 78 |     _, C, H, W = model.inputs[0].shape
 79 |     preprocess_func = partial(preprocess_image, in_size=(W, H))
 80 |     data_stream = DataStreamer(
 81 |         args.input_path, args.media_type, preprocess_func)
 82 | 
 83 |     for i, (orig_input, model_input) in enumerate(data_stream, start=1):
 84 |         # Inference
 85 |         start = time.time()
 86 |         # results = compiled_model.infer(inputs={InputLayer: model_input})
 87 |         results = compiled_model([model_input])
 88 |         end = time.time()
 89 | 
 90 |         inf_time = end - start
 91 |         print('Inference Time: {} Seconds Single Image'.format(inf_time))
 92 |         fps = 1. / (end - start)
 93 |         print('Estimated Inference FPS: {} FPS Single Image'.format(fps))
 94 | 
 95 |         # Write fos, inference info on Image
 96 |         text = 'FPS: {}, INF: {}'.format(round(fps, 2), round(inf_time, 2))
 97 |         cv2.putText(orig_input, text, (0, 20), cv2.FONT_HERSHEY_COMPLEX,
 98 |                     0.6, (0, 125, 255), 1)
 99 | 
100 |         # Print Bounding Boxes on Image
101 |         detections = results[output_layer_ir]
102 |         detections = torch.from_numpy(detections)
103 |         detections = non_max_suppression(
104 |             detections, conf_thres=0.4, iou_thres=0.5, agnostic=False)
105 | 
106 |         save_path = os.path.join(
107 |             args.output_dir, f"frame_openvino_{str(i).zfill(5)}.jpg")
108 |         save_output(detections[0], orig_input, save_path,
109 |                     threshold=args.threshold, model_in_HW=(H, W),
110 |                     line_thickness=None, text_bg_alpha=0.0)
111 | 
112 |     elapse_time = time.time() - start_time
113 |     print(f'Total Frames: {i}')
114 |     print(f'Total Elapsed Time: {elapse_time:.3f} Seconds'.format())
115 |     print(f'Final Estimated FPS: {i / (elapse_time):.2f}')
116 | 
117 | 
118 | if __name__ == '__main__':
119 |     parsed_args = parse_arguments(
120 |         desc="Basic OpenVINO Example for person/object detection")
121 |     inference(parsed_args)
122 | 


--------------------------------------------------------------------------------
/inf_requirements.txt:
--------------------------------------------------------------------------------
1 | onnxruntime==1.18.1
2 | opencv-python==4.10.0.84
3 | openvino==2024.3.0
4 | torch==2.4.0
5 | torchvision==0.19.0
6 | 


--------------------------------------------------------------------------------
/media/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SamSamhuns/yolov5_export_cpu/200f209ce6440a8851ea8598c0ae9e915890e3bb/media/1.jpg


--------------------------------------------------------------------------------
/media/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SamSamhuns/yolov5_export_cpu/200f209ce6440a8851ea8598c0ae9e915890e3bb/media/2.jpg


--------------------------------------------------------------------------------
/media/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SamSamhuns/yolov5_export_cpu/200f209ce6440a8851ea8598c0ae9e915890e3bb/media/3.jpg


--------------------------------------------------------------------------------
/media/4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SamSamhuns/yolov5_export_cpu/200f209ce6440a8851ea8598c0ae9e915890e3bb/media/4.jpg


--------------------------------------------------------------------------------
/media/soccer1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SamSamhuns/yolov5_export_cpu/200f209ce6440a8851ea8598c0ae9e915890e3bb/media/soccer1.mp4


--------------------------------------------------------------------------------
/models/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SamSamhuns/yolov5_export_cpu/200f209ce6440a8851ea8598c0ae9e915890e3bb/models/.gitkeep


--------------------------------------------------------------------------------
/utils/detector_utils.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import time
  3 | import torch
  4 | import torchvision
  5 | import numpy as np
  6 | from typing import Tuple, Optional, Union
  7 | 
  8 | from utils.general import CLASS_LABELS
  9 | 
 10 | 
 11 | def preprocess_image(
 12 |     cv2_img: np.ndarray,
 13 |     in_size: Tuple[int, int] = (640, 640)
 14 | ) -> np.ndarray:
 15 |     """preprocesses cv2 image and returns a norm np.ndarray
 16 | 
 17 |         cv2_img = cv2 image
 18 |         in_size: in_width, in_height
 19 |     """
 20 |     resized = pad_resize_image(cv2_img, in_size)
 21 |     img_in = np.transpose(resized, (2, 0, 1)).astype(np.float32)  # HWC -> CHW
 22 |     img_in /= 255.0
 23 |     return img_in
 24 | 
 25 | 
 26 | def save_output(
 27 |     detections,
 28 |     image_src: np.ndarray,
 29 |     save_path: str,
 30 |     threshold: float,
 31 |     model_in_HW: Tuple[int, int],
 32 |     line_thickness: Optional[int] = None,
 33 |     text_bg_alpha: float = 0.0
 34 | ) -> None:
 35 |     image_src = cv2.cvtColor(image_src, cv2.COLOR_RGB2BGR)
 36 |     labels = detections[..., -1].numpy()
 37 |     boxs = detections[..., :4].numpy()
 38 |     confs = detections[..., 4].numpy()
 39 | 
 40 |     if isinstance(image_src, str):
 41 |         image_src = cv2.imread(image_src)
 42 |     elif isinstance(image_src, np.ndarray):
 43 |         image_src = image_src
 44 | 
 45 |     mh, mw = model_in_HW
 46 |     h, w = image_src.shape[:2]
 47 |     boxs[:, :] = scale_coords((mh, mw), boxs[:, :], (h, w)).round()
 48 |     tl = line_thickness or round(0.002 * (w + h) / 2) + 1
 49 |     for i, box in enumerate(boxs):
 50 |         if confs[i] >= threshold:
 51 |             x1, y1, x2, y2 = map(int, box)
 52 |             np.random.seed(int(labels[i]) + 2020)
 53 |             color = [np.random.randint(0, 255), 0, np.random.randint(0, 255)]
 54 |             cv2.rectangle(image_src, (x1, y1), (x2, y2), color, thickness=max(
 55 |                 int((w + h) / 600), 1), lineType=cv2.LINE_AA)
 56 |             label = '%s %.2f' % (CLASS_LABELS[int(labels[i])], confs[i])
 57 |             t_size = cv2.getTextSize(
 58 |                 label, 0, fontScale=tl / 3, thickness=1)[0]
 59 |             c2 = x1 + t_size[0] + 3, y1 - t_size[1] - 5
 60 |             if text_bg_alpha == 0.0:
 61 |                 cv2.rectangle(image_src, (x1 - 1, y1), c2,
 62 |                               color, cv2.FILLED, cv2.LINE_AA)
 63 |             else:
 64 |                 # Transparent text background
 65 |                 alphaReserve = text_bg_alpha  # 0: opaque 1: transparent
 66 |                 BChannel, GChannel, RChannel = color
 67 |                 xMin, yMin = int(x1 - 1), int(y1 - t_size[1] - 3)
 68 |                 xMax, yMax = int(x1 + t_size[0]), int(y1)
 69 |                 image_src[yMin:yMax, xMin:xMax, 0] = image_src[yMin:yMax,
 70 |                                                                xMin:xMax, 0] * alphaReserve + BChannel * (1 - alphaReserve)
 71 |                 image_src[yMin:yMax, xMin:xMax, 1] = image_src[yMin:yMax,
 72 |                                                                xMin:xMax, 1] * alphaReserve + GChannel * (1 - alphaReserve)
 73 |                 image_src[yMin:yMax, xMin:xMax, 2] = image_src[yMin:yMax,
 74 |                                                                xMin:xMax, 2] * alphaReserve + RChannel * (1 - alphaReserve)
 75 |             cv2.putText(image_src, label, (x1 + 3, y1 - 4), 0, tl / 3, [255, 255, 255],
 76 |                         thickness=1, lineType=cv2.LINE_AA)
 77 |             print("bbox:", box, "conf:", confs[i],
 78 |                   "class:", CLASS_LABELS[int(labels[i])])
 79 |     cv2.imwrite(save_path, image_src)
 80 | 
 81 | 
 82 | def w_bbox_iou(
 83 |     box1: torch.Tensor,
 84 |     box2: torch.Tensor,
 85 |     x1y1x2y2: bool = True
 86 | ) -> torch.Tensor:
 87 |     """Calculate IOU
 88 | 
 89 |     """
 90 |     if not x1y1x2y2:
 91 |         b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
 92 |         b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
 93 |         b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
 94 |         b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
 95 |     else:
 96 |         b1_x1, b1_y1, b1_x2, b1_y2 = box1[:,
 97 |                                           0], box1[:, 1], box1[:, 2], box1[:, 3]
 98 |         b2_x1, b2_y1, b2_x2, b2_y2 = box2[:,
 99 |                                           0], box2[:, 1], box2[:, 2], box2[:, 3]
100 | 
101 |     inter_rect_x1 = torch.max(b1_x1, b2_x1)
102 |     inter_rect_y1 = torch.max(b1_y1, b2_y1)
103 |     inter_rect_x2 = torch.min(b1_x2, b2_x2)
104 |     inter_rect_y2 = torch.min(b1_y2, b2_y2)
105 | 
106 |     inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * \
107 |         torch.clamp(inter_rect_y2 - inter_rect_y1 + 1, min=0)
108 | 
109 |     b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
110 |     b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
111 | 
112 |     iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
113 | 
114 |     return iou
115 | 
116 | 
117 | def w_non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
118 |     # Find the upper left and lower right corners
119 |     # box_corner = prediction.new(prediction.shape)
120 |     box_corner = torch.FloatTensor(prediction.shape)
121 |     box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
122 |     box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
123 |     box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
124 |     box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
125 |     prediction[:, :, :4] = box_corner[:, :, :4]
126 | 
127 |     output = [None for _ in range(len(prediction))]
128 |     for image_i, image_pred in enumerate(prediction):
129 |         # Use confidence for the first round of screening
130 |         conf_mask = (image_pred[:, 4] >= conf_thres).squeeze()
131 |         image_pred = image_pred[conf_mask]
132 | 
133 |         if not image_pred.size(0):
134 |             continue
135 | 
136 |         # Obtain type and its confidence
137 |         class_conf, class_pred = torch.max(
138 |             image_pred[:, 5:5 + num_classes], 1, keepdim=True)
139 | 
140 |         # content obtained is (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
141 |         detections = torch.cat(
142 |             (image_pred[:, :5], class_conf.float(), class_pred.float()), 1)
143 | 
144 |         # Type of acquisition
145 |         unique_labels = detections[:, -1].cpu().unique()
146 | 
147 |         if prediction.is_cuda:
148 |             unique_labels = unique_labels.cuda()
149 | 
150 |         for c in unique_labels:
151 |             # Obtain all prediction results after a certain type of preliminary screening
152 |             detections_class = detections[detections[:, -1] == c]
153 |             # Sort according to the confidence of the existence of the object
154 |             _, conf_sort_index = torch.sort(
155 |                 detections_class[:, 4], descending=True)
156 |             detections_class = detections_class[conf_sort_index]
157 |             # Non-maximum suppression
158 |             max_detections = []
159 |             while detections_class.size(0):
160 |                 # Take out this category with the highest confidence, judge step by step, and
161 |                 # judge whether the degree of coincidence is greater than nms_thres, and if so, remove it
162 |                 max_detections.append(detections_class[0].unsqueeze(0))
163 |                 if len(detections_class) == 1:
164 |                     break
165 |                 ious = w_bbox_iou(max_detections[-1], detections_class[1:])
166 |                 detections_class = detections_class[1:][ious < nms_thres]
167 |             # Stacked
168 |             max_detections = torch.cat(max_detections).data
169 |             # Add max detections to outputs
170 |             output[image_i] = max_detections if output[image_i] is None else torch.cat(
171 |                 (output[image_i], max_detections))
172 | 
173 |     return output
174 | 
175 | 
176 | def box_iou(
177 |     box1: torch.Tensor,
178 |     box2: torch.Tensor
179 | ) -> torch.Tensor:
180 |     """Return intersection-over-union (Jaccard index) of boxes.
181 | 
182 |     # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
183 |     Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
184 |     Arguments:
185 |         box1 (Tensor[N, 4])
186 |         box2 (Tensor[M, 4])
187 |     Returns:
188 |         iou (Tensor[N, M]): the NxM matrix containing the pairwise
189 |             IoU values for every element in boxes1 and boxes2
190 |     """
191 | 
192 |     def box_area(box):
193 |         # box = 4xn
194 |         return (box[2] - box[0]) * (box[3] - box[1])
195 | 
196 |     area1 = box_area(box1.T)
197 |     area2 = box_area(box2.T)
198 | 
199 |     # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
200 |     inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) -
201 |              torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
202 |     # iou = inter / (area1 + area2 - inter)
203 |     return inter / (area1[:, None] + area2 - inter)
204 | 
205 | 
206 | def non_max_suppression(
207 |     prediction: torch.Tensor,
208 |     conf_thres: float = 0.25,
209 |     iou_thres: float = 0.45,
210 |     classes: Optional[torch.Tensor] = None,
211 |     agnostic: bool = False,
212 |     multi_label: bool = False,
213 |     labels: Tuple[str] = ()
214 | ) -> torch.Tensor:
215 |     """Runs Non-Maximum Suppression (NMS) on inference results
216 | 
217 |     Returns:
218 |          list of detections, on (n,6) tensor per image [xyxy, conf, cls]
219 |     """
220 |     nc = prediction.shape[2] - 5  # number of classes
221 |     xc = prediction[..., 4] > conf_thres  # candidates
222 | 
223 |     # Checks
224 |     assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
225 |     assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
226 | 
227 |     # Settings
228 |     # (pixels) maximum and minimum box width and height
229 |     max_wh = 4096  # min_wh = 2
230 |     max_det = 300  # maximum number of detections per image
231 |     max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
232 |     time_limit = 10.0  # seconds to quit after
233 |     redundant = True  # require redundant detections
234 |     multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
235 |     merge = False  # use merge-NMS
236 | 
237 |     t = time.time()
238 |     output = [torch.zeros((0, 6), device=prediction.device)
239 |               ] * prediction.shape[0]
240 |     for xi, x in enumerate(prediction):  # image index, image inference
241 |         # Apply constraints
242 |         # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
243 |         x = x[xc[xi]]  # confidence
244 | 
245 |         # Cat apriori labels if autolabelling
246 |         if labels and len(labels[xi]):
247 |             lxi = labels[xi]
248 |             v = torch.zeros((len(lxi), nc + 5), device=x.device)
249 |             v[:, :4] = lxi[:, 1:5]  # box
250 |             v[:, 4] = 1.0  # conf
251 |             v[range(len(lxi)), lxi[:, 0].long() + 5] = 1.0  # cls
252 |             x = torch.cat((x, v), 0)
253 | 
254 |         # If none remain process next image
255 |         if not x.shape[0]:
256 |             continue
257 | 
258 |         # Compute conf
259 |         x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
260 | 
261 |         # Box (center x, center y, width, height) to (x1, y1, x2, y2)
262 |         box = xywh2xyxy(x[:, :4])
263 | 
264 |         # Detections matrix nx6 (xyxy, conf, cls)
265 |         if multi_label:
266 |             i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
267 |             x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
268 |         else:  # best class only
269 |             conf, j = x[:, 5:].max(1, keepdim=True)
270 |             x = torch.cat((box, conf, j.float()), 1)[
271 |                 conf.view(-1) > conf_thres]
272 | 
273 |         # Filter by class
274 |         if classes is not None:
275 |             x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
276 | 
277 |         # Apply finite constraint
278 |         # if not torch.isfinite(x).all():
279 |         #     x = x[torch.isfinite(x).all(1)]
280 | 
281 |         # Check shape
282 |         n = x.shape[0]  # number of boxes
283 |         if not n:  # no boxes
284 |             continue
285 |         elif n > max_nms:  # excess boxes
286 |             # sort by confidence
287 |             x = x[x[:, 4].argsort(descending=True)[:max_nms]]
288 | 
289 |         # Batched NMS
290 |         c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
291 |         # boxes (offset by class), scores
292 |         boxes, scores = x[:, :4] + c, x[:, 4]
293 |         i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
294 |         if i.shape[0] > max_det:  # limit detections
295 |             i = i[:max_det]
296 |         if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)
297 |             # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
298 |             iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
299 |             weights = iou * scores[None]  # box weights
300 |             x[i, :4] = torch.mm(weights, x[:, :4]).float(
301 |             ) / weights.sum(1, keepdim=True)  # merged boxes
302 |             if redundant:
303 |                 i = i[iou.sum(1) > 1]  # require redundancy
304 | 
305 |         output[xi] = x[i]
306 |         if (time.time() - t) > time_limit:
307 |             print(f'WARNING: NMS time limit {time_limit}s exceeded')
308 |             break  # time limit exceeded
309 | 
310 |     return output
311 | 
312 | 
313 | def pad_resize_image(
314 |     cv2_img: np.ndarray,
315 |     new_size: Tuple[int, int] = (640, 480),
316 |     color: Tuple[int, int, int] = (125, 125, 125)
317 | ) -> np.ndarray:
318 |     """Resize and pad image with color if necessary, maintaining orig scale
319 | 
320 |     args:
321 |         cv2_img: numpy.ndarray = cv2 image
322 |         new_size: tuple(int, int) = (width, height)
323 |         color: tuple(int, int, int) = (B, G, R)
324 |     """
325 |     in_h, in_w = cv2_img.shape[:2]
326 |     new_w, new_h = new_size
327 |     # rescale down
328 |     scale = min(new_w / in_w, new_h / in_h)
329 |     # get new sacled widths and heights
330 |     scale_new_w, scale_new_h = int(in_w * scale), int(in_h * scale)
331 |     resized_img = cv2.resize(cv2_img, (scale_new_w, scale_new_h))
332 |     # calculate deltas for padding
333 |     d_w = max(new_w - scale_new_w, 0)
334 |     d_h = max(new_h - scale_new_h, 0)
335 |     # center image with padding on top/bottom or left/right
336 |     top, bottom = d_h // 2, d_h - (d_h // 2)
337 |     left, right = d_w // 2, d_w - (d_w // 2)
338 |     pad_resized_img = cv2.copyMakeBorder(resized_img,
339 |                                          top, bottom, left, right,
340 |                                          cv2.BORDER_CONSTANT,
341 |                                          value=color)
342 |     return pad_resized_img
343 | 
344 | 
345 | def clip_coords(
346 |     boxes: Union[torch.Tensor, np.ndarray],
347 |     img_shape: Tuple[int, int]
348 | ) -> None:
349 |     # Clip bounding xyxy bounding boxes to image shape (height, width)
350 |     if isinstance(boxes, torch.Tensor):
351 |         boxes[:, 0].clamp_(0, img_shape[1])  # x1
352 |         boxes[:, 1].clamp_(0, img_shape[0])  # y1
353 |         boxes[:, 2].clamp_(0, img_shape[1])  # x2
354 |         boxes[:, 3].clamp_(0, img_shape[0])  # y2
355 |     else:  # np.array
356 |         boxes[:, 0].clip(0, img_shape[1], out=boxes[:, 0])  # x1
357 |         boxes[:, 1].clip(0, img_shape[0], out=boxes[:, 1])  # y1
358 |         boxes[:, 2].clip(0, img_shape[1], out=boxes[:, 2])  # x2
359 |         boxes[:, 3].clip(0, img_shape[0], out=boxes[:, 3])  # y2
360 | 
361 | 
362 | def scale_coords(img1_shape: Tuple[int, int], coords: np.ndarray, img0_shape: Tuple[int, int], ratio_pad=None):
363 |     # Rescale coords (xyxy) from img1_shape to img0_shape
364 |     if ratio_pad is None:  # calculate from img0_shape
365 |         gain = min(img1_shape[0] / img0_shape[0],
366 |                    img1_shape[1] / img0_shape[1])
367 |         pad = (img1_shape[1] - img0_shape[1] * gain) / \
368 |             2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
369 |     else:
370 |         gain = ratio_pad[0][0]
371 |         pad = ratio_pad[1]
372 | 
373 |     coords[:, [0, 2]] -= pad[0]  # x padding
374 |     coords[:, [1, 3]] -= pad[1]  # y padding
375 |     coords[:, :4] /= gain
376 |     clip_coords(coords, img0_shape)
377 |     return coords
378 | 
379 | 
380 | def xyxy2xywh(x: torch.Tensor) -> torch.Tensor:
381 |     # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
382 |     y = torch.zeros_like(x) if isinstance(
383 |         x, torch.Tensor) else np.zeros_like(x)
384 |     y[:, 0] = (x[:, 0] + x[:, 2]) / 2  # x center
385 |     y[:, 1] = (x[:, 1] + x[:, 3]) / 2  # y center
386 |     y[:, 2] = x[:, 2] - x[:, 0]  # width
387 |     y[:, 3] = x[:, 3] - x[:, 1]  # height
388 |     return y
389 | 
390 | 
391 | def xywh2xyxy(x: torch.Tensor) -> torch.Tensor:
392 |     # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
393 |     y = torch.zeros_like(x) if isinstance(
394 |         x, torch.Tensor) else np.zeros_like(x)
395 |     y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
396 |     y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
397 |     y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
398 |     y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
399 |     return y
400 | 


--------------------------------------------------------------------------------
/utils/general.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import argparse
  3 | import os.path as osp
  4 | from typing import Callable
  5 | 
  6 | import cv2
  7 | import numpy as np
  8 | 
  9 | 
 10 | # mscoco class names
 11 | CLASS_LABELS = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
 12 |                 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
 13 |                 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
 14 |                 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
 15 |                 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
 16 |                 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
 17 |                 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
 18 |                 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
 19 |                 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
 20 | 
 21 | 
 22 | def parse_arguments(desc):
 23 |     parser = argparse.ArgumentParser(description=desc)
 24 |     parser.add_argument('-i', '--input_path', dest='input_path', required=True, type=str,
 25 |                         help='Path to Input: Video File or Image file')
 26 |     parser.add_argument('-m', '--media_type', dest='media_type', default='image', type=str,
 27 |                         choices=('image', 'video'),
 28 |                         help='Type of Input: image, video. (default: %(default)s)')
 29 |     parser.add_argument('-t', '--threshold', dest='threshold', default=0.6, type=float,
 30 |                         help='Detection Threshold. (default: %(default)s)')
 31 |     parser.add_argument('--ox', '--onnx_path', dest='onnx_path', default="models/yolov5s.onnx", type=str,
 32 |                         help='Path to ONNX model. (default: %(default)s)')
 33 |     parser.add_argument('-o', '--output_dir', dest='output_dir', default='output', type=str,
 34 |                         help='Output directory. (default: %(default)s)')
 35 |     parser.add_argument('-c', '--num_classes', dest='num_classes', default=80, type=int,
 36 |                         help='Num of classes. (default: %(default)s)')
 37 | 
 38 |     return parser.parse_args()
 39 | 
 40 | 
 41 | class DataStreamer(object):
 42 | 
 43 |     """Iterable DataStreamer class for generating numpy arr images
 44 |     Generates orig image and pre-processed image
 45 | 
 46 |     For loading data into detectors
 47 |     """
 48 | 
 49 |     def __init__(self, src_path: str, media_type: str = "image", preprocess_func: Callable = None):
 50 |         """Init DataStreamer Obj
 51 | 
 52 |         src_path : str
 53 |             path to a single image/video or path to directory containing images
 54 |         media_type : str
 55 |             inference media_type "image" or "video"
 56 |         preprocess_func : Callable function
 57 |             preprocessesing function applied to PIL images
 58 |         """
 59 |         if media_type not in {'video', 'image'}:
 60 |             raise NotImplementedError(
 61 |                 f"{media_type} not supported in streamer. Use video or image")
 62 |         self.img_path_list = []
 63 |         self.vid_path_list = []
 64 |         self.idx = 0
 65 |         self.media_type = media_type
 66 |         self.preprocess_func = preprocess_func
 67 | 
 68 |         if media_type == "video":
 69 |             if osp.isfile(src_path):
 70 |                 self.vid_path_list.append(src_path)
 71 |                 self.vcap = cv2.VideoCapture(src_path)
 72 |             elif osp.isdir(src_path):
 73 |                 raise NotImplementedError(
 74 |                     f"dir iteration supported for video media_type. {src_path} must be a video file")
 75 |         elif media_type == "image":
 76 |             if osp.isfile(src_path):
 77 |                 self.img_path_list.append(src_path)
 78 |             elif osp.isdir(src_path):
 79 |                 img_exts = ['*.png', '*.PNG', '*.jpg', '*.jpeg']
 80 |                 for ext in img_exts:
 81 |                     self.img_path_list.extend(
 82 |                         glob.glob(osp.join(src_path, ext)))
 83 | 
 84 |     def __iter__(self):
 85 |         return self
 86 | 
 87 |     def __next__(self):
 88 |         """Get next image or frame as numpy array
 89 | 
 90 |         """
 91 |         orig_img = None
 92 |         if self.media_type == 'image':
 93 |             if self.idx < len(self.img_path_list):
 94 |                 orig_img = cv2.imread(self.img_path_list[self.idx])
 95 |                 orig_img = orig_img[..., ::-1]
 96 |                 self.idx += 1
 97 |         elif self.media_type == 'video':
 98 |             if self.idx < len(self.vid_path_list):
 99 |                 ret, frame = self.vcap.read()
100 |                 if ret:
101 |                     orig_img = frame[..., ::-1]
102 |                 else:
103 |                     self.idx += 1
104 |         if orig_img is not None:
105 |             proc_img = None
106 |             if self.preprocess_func is not None:
107 |                 proc_img = self.preprocess_func(orig_img)
108 |                 proc_img = np.expand_dims(proc_img, axis=0)
109 |             return np.array(orig_img), proc_img
110 |         raise StopIteration
111 | 


--------------------------------------------------------------------------------