├── .gitattributes
├── .gitignore
├── LICENSE
├── README.md
├── intro.gif
├── intro.mp4
├── plugins
    ├── Makefile
    ├── README.md
    ├── gpu_cc.py
    ├── yolo_layer.cu
    ├── yolo_layer.h
    └── yolo_layer.o
├── utils
    ├── __init__.py
    ├── background.py
    ├── camera.py
    ├── display.py
    ├── mjpeg.py
    ├── modnet.py
    ├── mtcnn.py
    ├── ssd.py
    ├── ssd_classes.py
    ├── ssd_tf.py
    ├── visualization.py
    ├── writer.py
    ├── yolo_classes.py
    └── yolo_with_plugins.py
├── zed.py
└── zed_trt.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .nox/
 42 | .coverage
 43 | .coverage.*
 44 | .cache
 45 | nosetests.xml
 46 | coverage.xml
 47 | *.cover
 48 | .hypothesis/
 49 | .pytest_cache/
 50 | 
 51 | # Translations
 52 | *.mo
 53 | *.pot
 54 | 
 55 | # Django stuff:
 56 | *.log
 57 | local_settings.py
 58 | db.sqlite3
 59 | 
 60 | # Flask stuff:
 61 | instance/
 62 | .webassets-cache
 63 | 
 64 | # Scrapy stuff:
 65 | .scrapy
 66 | 
 67 | # Sphinx documentation
 68 | docs/_build/
 69 | 
 70 | # PyBuilder
 71 | target/
 72 | 
 73 | # Jupyter Notebook
 74 | .ipynb_checkpoints
 75 | 
 76 | # IPython
 77 | profile_default/
 78 | ipython_config.py
 79 | 
 80 | # pyenv
 81 | .python-version
 82 | 
 83 | # celery beat schedule file
 84 | celerybeat-schedule
 85 | 
 86 | # SageMath parsed files
 87 | *.sage.py
 88 | 
 89 | # Environments
 90 | .env
 91 | .venv
 92 | env/
 93 | venv/
 94 | ENV/
 95 | env.bak/
 96 | venv.bak/
 97 | 
 98 | # Spyder project settings
 99 | .spyderproject
100 | .spyproject
101 | 
102 | # Rope project settings
103 | .ropeproject
104 | 
105 | # mkdocs documentation
106 | /site
107 | 
108 | # mypy
109 | .mypy_cache/
110 | .dmypy.json
111 | dmypy.json
112 | 
113 | # Pyre type checker
114 | .pyre/
115 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Mehmet OKUYAR
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Yolo Object Detection and Distance Measurement with Zed camera
 2 | 
 3 | you can test object detection and distance measurement with zed camera
 4 | 
 5 | 
 6 | ### How to use 
 7 | You need to run this script like that `python zed.py `
 8 | If you use tensorRT yolo, You need to run this script like that `python zed_trt.py `
 9 | You need to edit the codes in `zed.py` line according to yourself.
10 | 
11 | The default values for weight, config, names file and ZED camera ID are
12 | ~~~~~~~~~~~~
13 |     config_path = "yolov4-tiny.cfg"
14 |     weight_path = "yolov4-tiny.weights"
15 |     meta_path = "coco.names"
16 |     svo_path = None
17 |     zed_id = 0
18 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
19 | 
20 | ### Download the model file, for instance Yolov4-Tiny
21 |     wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights
22 | 
23 | ### Making changes in the "zed.py" file
24 | You need to edit the following lines in `zed.py` 
25 | 
26 | Size should be changed according to image height and width value in .cfg file
27 | Default values are - width : 608, height : 608
28 | ~~~~~~
29 | 98. model.setInputParams(size=(608, 608), scale=1/255, swapRB=True)
30 | ~~~~~~~~~~~~~~~~~~~~
31 | ### Run the application
32 | To launch the ZED with YOLO simply run the script :
33 | 
34 |         python3 zed.py
35 | 
36 | The input parameters can be changed using the command line :
37 | 
38 |         python3 zed.py -c <config> -w <weight> -m <meta> -s <svo_file> -z <zed_id>
39 | 
40 | For instance :
41 | 
42 |         python3 zed.py -c yolov4-tiny.cfg -w yolov4-tiny.weights -m coco.names -z 1
43 |         
44 | For running with custom weights :
45 | 
46 |         python3 zed.py -c yolov4-custom.cfg -w yolov4-custom.weights -m obj.names -z 1
47 | 
48 | To display the help :
49 | 
50 |         python3 zed.py -h
51 |  
52 | that's all, if you have a zed camera you can easily find the distance of the objects you have detected
53 | ## You can see how the program works in the gif below.
54 | 
55 | 
56 | <p align="center">
57 |   <img src="intro.gif" width=100%>
58 | </p>
59 | 


--------------------------------------------------------------------------------
/intro.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MehmetOKUYAR/Yolo-Object-Detection-and-Distance-Measurement-with-Zed-camera/a112c65013eab48cf3210fb1e9973aad3ac25cf1/intro.gif


--------------------------------------------------------------------------------
/intro.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MehmetOKUYAR/Yolo-Object-Detection-and-Distance-Measurement-with-Zed-camera/a112c65013eab48cf3210fb1e9973aad3ac25cf1/intro.mp4


--------------------------------------------------------------------------------
/plugins/Makefile:
--------------------------------------------------------------------------------
 1 | CC=g++
 2 | LD=ld
 3 | CXXFLAGS=-Wall -std=c++11 -g -O
 4 | 
 5 | NVCC=nvcc
 6 | 
 7 | # space separated compute values ex: computes=70 75. If not present will fetch device's CC
 8 | computes=
 9 | 
10 | ifeq ($(computes), )
11 |   computes= $(shell python gpu_cc.py)
12 |   $(info computes: $(computes))
13 | endif
14 | 
15 | NVCCFLAGS= $(foreach compute, $(computes),-gencode arch=compute_$(compute),code=[sm_$(compute),compute_$(compute)])
16 | $(info NVCCFLAGS: $(NVCCFLAGS))
17 | 
18 | # These are the directories where I installed TensorRT on my x86_64 PC.
19 | TENSORRT_INCS=-I"/usr/local/TensorRT-7.1.3.4/include"
20 | TENSORRT_LIBS=-L"/usr/local/TensorRT-7.1.3.4/lib"
21 | 
22 | # INCS and LIBS
23 | INCS=-I"/usr/local/cuda/include" $(TENSORRT_INCS) -I"/usr/local/include" -I"plugin"
24 | LIBS=-L"/usr/local/cuda/lib64" $(TENSORRT_LIBS) -L"/usr/local/lib" -Wl,--start-group -lnvinfer -lnvparsers -lnvinfer_plugin -lcudnn -lcublas -lcudart_static -lnvToolsExt -lcudart -lrt -ldl -lpthread -Wl,--end-group
25 | 
26 | .PHONY: all clean
27 | 
28 | all: libyolo_layer.so
29 | 
30 | clean:
31 | 	rm -f *.so *.o
32 | 
33 | libyolo_layer.so: yolo_layer.o
34 | 	$(CC) -shared -o $@ $< $(LIBS)
35 | 
36 | yolo_layer.o: yolo_layer.cu yolo_layer.h
37 | 	$(NVCC) -ccbin $(CC) $(INCS) $(NVCCFLAGS) -Xcompiler -fPIC -c -o $@ $<
38 | 


--------------------------------------------------------------------------------
/plugins/README.md:
--------------------------------------------------------------------------------
1 | The "yolo_layer.h" and "yolo_layer.cu" were taken and modified from [wang-xinyu/tensorrtx/yolov4](https://github.com/wang-xinyu/tensorrtx/tree/master/yolov4).  The original code is under [MIT License](https://github.com/wang-xinyu/tensorrtx/blob/master/LICENSE).
2 | 


--------------------------------------------------------------------------------
/plugins/gpu_cc.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | '''
 5 | # ported from https://gist.github.com/f0k/63a664160d016a491b2cbea15913d549
 6 | '''
 7 | 
 8 | import ctypes
 9 | 
10 | CUDA_SUCCESS = 0
11 | 
12 | def get_gpu_archs():
13 |     libnames = ('libcuda.so', 'libcuda.dylib', 'cuda.dll')
14 |     for libname in libnames:
15 |         try:
16 |             cuda = ctypes.CDLL(libname)
17 |         except OSError:
18 |             continue
19 |         else:
20 |             break
21 |     else:
22 |         return
23 | 
24 |     gpu_archs = set()
25 | 
26 |     n_gpus = ctypes.c_int()
27 |     cc_major = ctypes.c_int()
28 |     cc_minor = ctypes.c_int()
29 | 
30 |     result = ctypes.c_int()
31 |     device = ctypes.c_int()
32 |     error_str = ctypes.c_char_p()
33 | 
34 |     result = cuda.cuInit(0)
35 |     if result != CUDA_SUCCESS:
36 |         cuda.cuGetErrorString(result, ctypes.byref(error_str))
37 |         # print('cuInit failed with error code %d: %s' % (result, error_str.value.decode()))
38 |         return []
39 | 
40 |     result = cuda.cuDeviceGetCount(ctypes.byref(n_gpus))
41 |     if result != CUDA_SUCCESS:
42 |         cuda.cuGetErrorString(result, ctypes.byref(error_str))
43 |         # print('cuDeviceGetCount failed with error code %d: %s' % (result, error_str.value.decode()))
44 |         return []
45 | 
46 |     for i in range(n_gpus.value):
47 |         if cuda.cuDeviceComputeCapability(ctypes.byref(cc_major), ctypes.byref(cc_minor), device) == CUDA_SUCCESS:
48 |             gpu_archs.add(str(cc_major.value) + str(cc_minor.value))
49 | 
50 |     return list(gpu_archs)
51 | 
52 | if __name__ == '__main__':
53 |     print(' '.join(get_gpu_archs()))
54 | 


--------------------------------------------------------------------------------
/plugins/yolo_layer.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * yolo_layer.cu
  3 |  *
  4 |  * This code was originally written by wang-xinyu under MIT license.
  5 |  * I took it from:
  6 |  *
  7 |  *     https://github.com/wang-xinyu/tensorrtx/tree/master/yolov4
  8 |  *
  9 |  * and made necessary modifications.
 10 |  *
 11 |  * - JK Jung
 12 |  */
 13 | 
 14 | #include "yolo_layer.h"
 15 | 
 16 | using namespace Yolo;
 17 | 
 18 | namespace
 19 | {
 20 | // Write values into buffer
 21 | template <typename T>
 22 | void write(char*& buffer, const T& val)
 23 | {
 24 |     *reinterpret_cast<T*>(buffer) = val;
 25 |     buffer += sizeof(T);
 26 | }
 27 | 
 28 | // Read values from buffer
 29 | template <typename T>
 30 | void read(const char*& buffer, T& val)
 31 | {
 32 |     val = *reinterpret_cast<const T*>(buffer);
 33 |     buffer += sizeof(T);
 34 | }
 35 | } // namespace
 36 | 
 37 | namespace nvinfer1
 38 | {
 39 |     YoloLayerPlugin::YoloLayerPlugin(int yolo_width, int yolo_height, int num_anchors, float* anchors, int num_classes, int input_width, int input_height, float scale_x_y, int new_coords)
 40 |     {
 41 |         mYoloWidth   = yolo_width;
 42 |         mYoloHeight  = yolo_height;
 43 |         mNumAnchors  = num_anchors;
 44 |         memcpy(mAnchorsHost, anchors, num_anchors * 2 * sizeof(float));
 45 |         mNumClasses  = num_classes;
 46 |         mInputWidth  = input_width;
 47 |         mInputHeight = input_height;
 48 |         mScaleXY     = scale_x_y;
 49 |         mNewCoords   = new_coords;
 50 | 
 51 |         CHECK(cudaMalloc(&mAnchors, MAX_ANCHORS * 2 * sizeof(float)));
 52 |         CHECK(cudaMemcpy(mAnchors, mAnchorsHost, mNumAnchors * 2 * sizeof(float), cudaMemcpyHostToDevice));
 53 |     }
 54 | 
 55 |     YoloLayerPlugin::YoloLayerPlugin(const void* data, size_t length)
 56 |     {
 57 |         const char *d = reinterpret_cast<const char *>(data), *a = d;
 58 |         read(d, mThreadCount);
 59 |         read(d, mYoloWidth);
 60 |         read(d, mYoloHeight);
 61 |         read(d, mNumAnchors);
 62 |         memcpy(mAnchorsHost, d, MAX_ANCHORS * 2 * sizeof(float));
 63 |         d += MAX_ANCHORS * 2 * sizeof(float);
 64 |         read(d, mNumClasses);
 65 |         read(d, mInputWidth);
 66 |         read(d, mInputHeight);
 67 |         read(d, mScaleXY);
 68 |         read(d, mNewCoords);
 69 | 
 70 |         CHECK(cudaMalloc(&mAnchors, MAX_ANCHORS * 2 * sizeof(float)));
 71 |         CHECK(cudaMemcpy(mAnchors, mAnchorsHost, mNumAnchors * 2 * sizeof(float), cudaMemcpyHostToDevice));
 72 | 
 73 |         assert(d == a + length);
 74 |     }
 75 | 
 76 |     void YoloLayerPlugin::serialize(void* buffer) const
 77 |     {
 78 |         char* d = static_cast<char*>(buffer), *a = d;
 79 |         write(d, mThreadCount);
 80 |         write(d, mYoloWidth);
 81 |         write(d, mYoloHeight);
 82 |         write(d, mNumAnchors);
 83 |         memcpy(d, mAnchorsHost, MAX_ANCHORS * 2 * sizeof(float));
 84 |         d += MAX_ANCHORS * 2 * sizeof(float);
 85 |         write(d, mNumClasses);
 86 |         write(d, mInputWidth);
 87 |         write(d, mInputHeight);
 88 |         write(d, mScaleXY);
 89 |         write(d, mNewCoords);
 90 | 
 91 |         assert(d == a + getSerializationSize());
 92 |     }
 93 | 
 94 |     size_t YoloLayerPlugin::getSerializationSize() const
 95 |     {
 96 |         return sizeof(mThreadCount) + \
 97 |                sizeof(mYoloWidth) + sizeof(mYoloHeight) + \
 98 |                sizeof(mNumAnchors) + MAX_ANCHORS * 2 * sizeof(float) + \
 99 |                sizeof(mNumClasses) + \
100 |                sizeof(mInputWidth) + sizeof(mInputHeight) + \
101 |                sizeof(mScaleXY) + sizeof(mNewCoords);
102 |     }
103 | 
104 |     int YoloLayerPlugin::initialize()
105 |     {
106 |         return 0;
107 |     }
108 | 
109 |     void YoloLayerPlugin::terminate()
110 |     {
111 |         CHECK(cudaFree(mAnchors));
112 |     }
113 | 
114 |     Dims YoloLayerPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)
115 |     {
116 |         assert(index == 0);
117 |         assert(nbInputDims == 1);
118 |         assert(inputs[0].d[0] == (mNumClasses + 5) * mNumAnchors);
119 |         assert(inputs[0].d[1] == mYoloHeight);
120 |         assert(inputs[0].d[2] == mYoloWidth);
121 |         // output detection results to the channel dimension
122 |         int totalsize = mYoloWidth * mYoloHeight * mNumAnchors * sizeof(Detection) / sizeof(float);
123 |         return Dims3(totalsize, 1, 1);
124 |     }
125 | 
126 |     void YoloLayerPlugin::setPluginNamespace(const char* pluginNamespace)
127 |     {
128 |         mPluginNamespace = pluginNamespace;
129 |     }
130 | 
131 |     const char* YoloLayerPlugin::getPluginNamespace() const
132 |     {
133 |         return mPluginNamespace;
134 |     }
135 | 
136 |     // Return the DataType of the plugin output at the requested index
137 |     DataType YoloLayerPlugin::getOutputDataType(int index, const DataType* inputTypes, int nbInputs) const
138 |     {
139 |         return DataType::kFLOAT;
140 |     }
141 | 
142 |     // Return true if output tensor is broadcast across a batch.
143 |     bool YoloLayerPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const
144 |     {
145 |         return false;
146 |     }
147 | 
148 |     // Return true if plugin can use input that is broadcast across batch without replication.
149 |     bool YoloLayerPlugin::canBroadcastInputAcrossBatch(int inputIndex) const
150 |     {
151 |         return false;
152 |     }
153 | 
154 |     void YoloLayerPlugin::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput)
155 |     {
156 |     }
157 | 
158 |     // Attach the plugin object to an execution context and grant the plugin the access to some context resource.
159 |     void YoloLayerPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator)
160 |     {
161 |     }
162 | 
163 |     // Detach the plugin object from its execution context.
164 |     void YoloLayerPlugin::detachFromContext()
165 |     {
166 |     }
167 | 
168 |     const char* YoloLayerPlugin::getPluginType() const
169 |     {
170 |         return "YoloLayer_TRT";
171 |     }
172 | 
173 |     const char* YoloLayerPlugin::getPluginVersion() const
174 |     {
175 |         return "1";
176 |     }
177 | 
178 |     void YoloLayerPlugin::destroy()
179 |     {
180 |         delete this;
181 |     }
182 | 
183 |     // Clone the plugin
184 |     IPluginV2IOExt* YoloLayerPlugin::clone() const
185 |     {
186 |         YoloLayerPlugin *p = new YoloLayerPlugin(mYoloWidth, mYoloHeight, mNumAnchors, (float*) mAnchorsHost, mNumClasses, mInputWidth, mInputHeight, mScaleXY, mNewCoords);
187 |         p->setPluginNamespace(mPluginNamespace);
188 |         return p;
189 |     }
190 | 
191 |     inline __device__ float sigmoidGPU(float x) { return 1.0f / (1.0f + __expf(-x)); }
192 | 
193 |     inline __device__ float scale_sigmoidGPU(float x, float s)
194 |     {
195 |         return s * sigmoidGPU(x) - (s - 1.0f) * 0.5f;
196 |     }
197 | 
198 |     // CalDetection(): This kernel processes 1 yolo layer calculation.  It
199 |     // distributes calculations so that 1 GPU thread would be responsible
200 |     // for each grid/anchor combination.
201 |     // NOTE: The output (x, y, w, h) are between 0.0 and 1.0
202 |     //       (relative to orginal image width and height).
203 |     __global__ void CalDetection(const float *input, float *output,
204 |                                  int batch_size,
205 |                                  int yolo_width, int yolo_height,
206 |                                  int num_anchors, const float *anchors,
207 |                                  int num_classes, int input_w, int input_h,
208 |                                  float scale_x_y)
209 |     {
210 |         int idx = threadIdx.x + blockDim.x * blockIdx.x;
211 |         Detection* det = ((Detection*) output) + idx;
212 |         int total_grids = yolo_width * yolo_height;
213 |         if (idx >= batch_size * total_grids * num_anchors) return;
214 | 
215 |         int info_len = 5 + num_classes;
216 |         //int batch_idx = idx / (total_grids * num_anchors);
217 |         int group_idx = idx / total_grids;
218 |         int anchor_idx = group_idx % num_anchors;
219 |         const float* cur_input = input + group_idx * (info_len * total_grids) + (idx % total_grids);
220 | 
221 |         int class_id;
222 |         float max_cls_logit = -CUDART_INF_F;  // minus infinity
223 |         for (int i = 5; i < info_len; ++i) {
224 |             float l = *(cur_input + i * total_grids);
225 |             if (l > max_cls_logit) {
226 |                 max_cls_logit = l;
227 |                 class_id = i - 5;
228 |             }
229 |         }
230 |         float max_cls_prob = sigmoidGPU(max_cls_logit);
231 |         float box_prob = sigmoidGPU(*(cur_input + 4 * total_grids));
232 |         //if (max_cls_prob < IGNORE_THRESH || box_prob < IGNORE_THRESH)
233 |         //    return;
234 | 
235 |         int row = (idx % total_grids) / yolo_width;
236 |         int col = (idx % total_grids) % yolo_width;
237 | 
238 |         det->bbox[0] = (col + scale_sigmoidGPU(*(cur_input + 0 * total_grids), scale_x_y)) / yolo_width;    // [0, 1]
239 |         det->bbox[1] = (row + scale_sigmoidGPU(*(cur_input + 1 * total_grids), scale_x_y)) / yolo_height;   // [0, 1]
240 |         det->bbox[2] = __expf(*(cur_input + 2 * total_grids)) * *(anchors + 2 * anchor_idx + 0) / input_w;  // [0, 1]
241 |         det->bbox[3] = __expf(*(cur_input + 3 * total_grids)) * *(anchors + 2 * anchor_idx + 1) / input_h;  // [0, 1]
242 | 
243 |         det->bbox[0] -= det->bbox[2] / 2;  // shift from center to top-left
244 |         det->bbox[1] -= det->bbox[3] / 2;
245 | 
246 |         det->det_confidence = box_prob;
247 |         det->class_id = class_id;
248 |         det->class_confidence = max_cls_prob;
249 |     }
250 | 
251 |     inline __device__ float scale(float x, float s)
252 |     {
253 |         return s * x - (s - 1.0f) * 0.5f;
254 |     }
255 | 
256 |     inline __device__ float square(float x)
257 |     {
258 |         return x * x;
259 |     }
260 | 
261 |     __global__ void CalDetection_NewCoords(const float *input, float *output,
262 |                                            int batch_size,
263 |                                            int yolo_width, int yolo_height,
264 |                                            int num_anchors, const float *anchors,
265 |                                            int num_classes, int input_w, int input_h,
266 |                                            float scale_x_y)
267 |     {
268 |         int idx = threadIdx.x + blockDim.x * blockIdx.x;
269 |         Detection* det = ((Detection*) output) + idx;
270 |         int total_grids = yolo_width * yolo_height;
271 |         if (idx >= batch_size * total_grids * num_anchors) return;
272 | 
273 |         int info_len = 5 + num_classes;
274 |         //int batch_idx = idx / (total_grids * num_anchors);
275 |         int group_idx = idx / total_grids;
276 |         int anchor_idx = group_idx % num_anchors;
277 |         const float* cur_input = input + group_idx * (info_len * total_grids) + (idx % total_grids);
278 | 
279 |         int class_id;
280 |         float max_cls_prob = -CUDART_INF_F;  // minus infinity
281 |         for (int i = 5; i < info_len; ++i) {
282 |             float l = *(cur_input + i * total_grids);
283 |             if (l > max_cls_prob) {
284 |                 max_cls_prob = l;
285 |                 class_id = i - 5;
286 |             }
287 |         }
288 |         float box_prob = *(cur_input + 4 * total_grids);
289 |         //if (max_cls_prob < IGNORE_THRESH || box_prob < IGNORE_THRESH)
290 |         //    return;
291 | 
292 |         int row = (idx % total_grids) / yolo_width;
293 |         int col = (idx % total_grids) % yolo_width;
294 | 
295 |         det->bbox[0] = (col + scale(*(cur_input + 0 * total_grids), scale_x_y)) / yolo_width;                   // [0, 1]
296 |         det->bbox[1] = (row + scale(*(cur_input + 1 * total_grids), scale_x_y)) / yolo_height;                  // [0, 1]
297 |         det->bbox[2] = square(*(cur_input + 2 * total_grids)) * 4 * *(anchors + 2 * anchor_idx + 0) / input_w;  // [0, 1]
298 |         det->bbox[3] = square(*(cur_input + 3 * total_grids)) * 4 * *(anchors + 2 * anchor_idx + 1) / input_h;  // [0, 1]
299 | 
300 |         det->bbox[0] -= det->bbox[2] / 2;  // shift from center to top-left
301 |         det->bbox[1] -= det->bbox[3] / 2;
302 | 
303 |         det->det_confidence = box_prob;
304 |         det->class_id = class_id;
305 |         det->class_confidence = max_cls_prob;
306 |     }
307 | 
308 |     void YoloLayerPlugin::forwardGpu(const float* const* inputs, float* output, cudaStream_t stream, int batchSize)
309 |     {
310 |         int num_elements = batchSize * mNumAnchors * mYoloWidth * mYoloHeight;
311 | 
312 |         //CHECK(cudaMemset(output, 0, num_elements * sizeof(Detection)));
313 | 
314 |         if (mNewCoords) {
315 |             CalDetection_NewCoords<<<(num_elements + mThreadCount - 1) / mThreadCount, mThreadCount, 0, stream>>>
316 |                 (inputs[0], output, batchSize, mYoloWidth, mYoloHeight, mNumAnchors, (const float*) mAnchors, mNumClasses, mInputWidth, mInputHeight, mScaleXY);
317 |         } else {
318 |             CalDetection<<<(num_elements + mThreadCount - 1) / mThreadCount, mThreadCount, 0, stream>>>
319 |                 (inputs[0], output, batchSize, mYoloWidth, mYoloHeight, mNumAnchors, (const float*) mAnchors, mNumClasses, mInputWidth, mInputHeight, mScaleXY);
320 |         }
321 |     }
322 | 
323 |     int YoloLayerPlugin::enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream)
324 |     {
325 |         forwardGpu((const float* const*)inputs, (float*)outputs[0], stream, batchSize);
326 |         return 0;
327 |     }
328 | 
329 |     YoloPluginCreator::YoloPluginCreator()
330 |     {
331 |         mPluginAttributes.clear();
332 | 
333 |         mFC.nbFields = mPluginAttributes.size();
334 |         mFC.fields = mPluginAttributes.data();
335 |     }
336 | 
337 |     const char* YoloPluginCreator::getPluginName() const
338 |     {
339 |         return "YoloLayer_TRT";
340 |     }
341 | 
342 |     const char* YoloPluginCreator::getPluginVersion() const
343 |     {
344 |         return "1";
345 |     }
346 | 
347 |     const PluginFieldCollection* YoloPluginCreator::getFieldNames()
348 |     {
349 |         return &mFC;
350 |     }
351 | 
352 |     IPluginV2IOExt* YoloPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)
353 |     {
354 |         assert(!strcmp(name, getPluginName()));
355 |         const PluginField* fields = fc->fields;
356 |         int yolo_width, yolo_height, num_anchors = 0;
357 |         float anchors[MAX_ANCHORS * 2];
358 |         int num_classes, input_multiplier, new_coords = 0;
359 |         float scale_x_y = 1.0;
360 | 
361 |         for (int i = 0; i < fc->nbFields; ++i)
362 |         {
363 |             const char* attrName = fields[i].name;
364 |             if (!strcmp(attrName, "yoloWidth"))
365 |             {
366 |                 assert(fields[i].type == PluginFieldType::kINT32);
367 |                 yolo_width = *(static_cast<const int*>(fields[i].data));
368 |             }
369 |             else if (!strcmp(attrName, "yoloHeight"))
370 |             {
371 |                 assert(fields[i].type == PluginFieldType::kINT32);
372 |                 yolo_height = *(static_cast<const int*>(fields[i].data));
373 |             }
374 |             else if (!strcmp(attrName, "numAnchors"))
375 |             {
376 |                 assert(fields[i].type == PluginFieldType::kINT32);
377 |                 num_anchors = *(static_cast<const int*>(fields[i].data));
378 |             }
379 |             else if (!strcmp(attrName, "numClasses"))
380 |             {
381 |                 assert(fields[i].type == PluginFieldType::kINT32);
382 |                 num_classes = *(static_cast<const int*>(fields[i].data));
383 |             }
384 |             else if (!strcmp(attrName, "inputMultiplier"))
385 |             {
386 |                 assert(fields[i].type == PluginFieldType::kINT32);
387 |                 input_multiplier = *(static_cast<const int*>(fields[i].data));
388 |             }
389 |             else if (!strcmp(attrName, "anchors")){
390 |                 assert(num_anchors > 0 && num_anchors <= MAX_ANCHORS);
391 |                 assert(fields[i].type == PluginFieldType::kFLOAT32);
392 |                 memcpy(anchors, static_cast<const float*>(fields[i].data), num_anchors * 2 * sizeof(float));
393 |             }
394 |             else if (!strcmp(attrName, "scaleXY"))
395 |             {
396 |                 assert(fields[i].type == PluginFieldType::kFLOAT32);
397 |                 scale_x_y = *(static_cast<const float*>(fields[i].data));
398 |             }
399 |             else if (!strcmp(attrName, "newCoords"))
400 |             {
401 |                 assert(fields[i].type == PluginFieldType::kINT32);
402 |                 new_coords = *(static_cast<const int*>(fields[i].data));
403 |             }
404 |             else
405 |             {
406 |                 std::cerr <<  "Unknown attribute: " << attrName << std::endl;
407 |                 assert(0);
408 |             }
409 |         }
410 |         assert(yolo_width > 0 && yolo_height > 0);
411 |         assert(anchors[0] > 0.0f && anchors[1] > 0.0f);
412 |         assert(num_classes > 0);
413 |         assert(input_multiplier == 8 || input_multiplier == 16 || input_multiplier == 32);
414 |         assert(scale_x_y >= 1.0);
415 | 
416 |         YoloLayerPlugin* obj = new YoloLayerPlugin(yolo_width, yolo_height, num_anchors, anchors, num_classes, yolo_width * input_multiplier, yolo_height * input_multiplier, scale_x_y, new_coords);
417 |         obj->setPluginNamespace(mNamespace.c_str());
418 |         return obj;
419 |     }
420 | 
421 |     IPluginV2IOExt* YoloPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)
422 |     {
423 |         YoloLayerPlugin* obj = new YoloLayerPlugin(serialData, serialLength);
424 |         obj->setPluginNamespace(mNamespace.c_str());
425 |         return obj;
426 |     }
427 | 
428 |     PluginFieldCollection YoloPluginCreator::mFC{};
429 |     std::vector<PluginField> YoloPluginCreator::mPluginAttributes;
430 | } // namespace nvinfer1
431 | 


--------------------------------------------------------------------------------
/plugins/yolo_layer.h:
--------------------------------------------------------------------------------
  1 | #ifndef _YOLO_LAYER_H
  2 | #define _YOLO_LAYER_H
  3 | 
  4 | #include <cassert>
  5 | #include <vector>
  6 | #include <string>
  7 | #include <iostream>
  8 | #include "math_constants.h"
  9 | #include "NvInfer.h"
 10 | 
 11 | #define MAX_ANCHORS 6
 12 | 
 13 | #define CHECK(status)                                           \
 14 |     do {                                                        \
 15 |         auto ret = status;                                      \
 16 |         if (ret != 0) {                                         \
 17 |             std::cerr << "Cuda failure in file '" << __FILE__   \
 18 |                       << "' line " << __LINE__                  \
 19 |                       << ": " << ret << std::endl;              \
 20 |             abort();                                            \
 21 |         }                                                       \
 22 |     } while (0)
 23 | 
 24 | namespace Yolo
 25 | {
 26 |     static constexpr float IGNORE_THRESH = 0.01f;
 27 | 
 28 |     struct alignas(float) Detection {
 29 |         float bbox[4];  // x, y, w, h
 30 |         float det_confidence;
 31 |         float class_id;
 32 |         float class_confidence;
 33 |     };
 34 | }
 35 | 
 36 | namespace nvinfer1
 37 | {
 38 |     class YoloLayerPlugin: public IPluginV2IOExt
 39 |     {
 40 |         public:
 41 |             YoloLayerPlugin(int yolo_width, int yolo_height, int num_anchors, float* anchors, int num_classes, int input_width, int input_height, float scale_x_y, int new_coords);
 42 |             YoloLayerPlugin(const void* data, size_t length);
 43 | 
 44 |             ~YoloLayerPlugin() override = default;
 45 | 
 46 |             int getNbOutputs() const override
 47 |             {
 48 |                 return 1;
 49 |             }
 50 | 
 51 |             Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override;
 52 | 
 53 |             int initialize() override;
 54 | 
 55 |             void terminate() override;
 56 | 
 57 |             virtual size_t getWorkspaceSize(int maxBatchSize) const override { return 0;}
 58 | 
 59 |             virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override;
 60 | 
 61 |             virtual size_t getSerializationSize() const override;
 62 | 
 63 |             virtual void serialize(void* buffer) const override;
 64 | 
 65 |             bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const override {
 66 |                 return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT;
 67 |             }
 68 | 
 69 |             const char* getPluginType() const override;
 70 | 
 71 |             const char* getPluginVersion() const override;
 72 | 
 73 |             void destroy() override;
 74 | 
 75 |             IPluginV2IOExt* clone() const override;
 76 | 
 77 |             void setPluginNamespace(const char* pluginNamespace) override;
 78 | 
 79 |             const char* getPluginNamespace() const override;
 80 | 
 81 |             DataType getOutputDataType(int index, const DataType* inputTypes, int nbInputs) const override;
 82 | 
 83 |             bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const override;
 84 | 
 85 |             bool canBroadcastInputAcrossBatch(int inputIndex) const override;
 86 | 
 87 |             void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) override;
 88 | 
 89 |             void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) override TRTNOEXCEPT;
 90 | 
 91 |             void detachFromContext() override;
 92 | 
 93 |         private:
 94 |             void forwardGpu(const float* const* inputs, float* output, cudaStream_t stream, int batchSize = 1);
 95 | 
 96 |             int mThreadCount = 64;
 97 |             int mYoloWidth, mYoloHeight, mNumAnchors;
 98 |             float mAnchorsHost[MAX_ANCHORS * 2];
 99 |             float *mAnchors;  // allocated on GPU
100 |             int mNumClasses;
101 |             int mInputWidth, mInputHeight;
102 |             float mScaleXY;
103 |             int mNewCoords = 0;
104 | 
105 |             const char* mPluginNamespace;
106 | 
107 |         protected:
108 |             using IPluginV2IOExt::configurePlugin;
109 |     };
110 | 
111 |     class YoloPluginCreator : public IPluginCreator
112 |     {
113 |         public:
114 |             YoloPluginCreator();
115 | 
116 |             ~YoloPluginCreator() override = default;
117 | 
118 |             const char* getPluginName() const override;
119 | 
120 |             const char* getPluginVersion() const override;
121 | 
122 |             const PluginFieldCollection* getFieldNames() override;
123 | 
124 |             IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) override;
125 | 
126 |             IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override;
127 | 
128 |             void setPluginNamespace(const char* libNamespace) override
129 |             {
130 |                 mNamespace = libNamespace;
131 |             }
132 | 
133 |             const char* getPluginNamespace() const override
134 |             {
135 |                 return mNamespace.c_str();
136 |             }
137 | 
138 |         private:
139 |             static PluginFieldCollection mFC;
140 |             static std::vector<PluginField> mPluginAttributes;
141 |             std::string mNamespace;
142 |     };
143 | 
144 |     REGISTER_TENSORRT_PLUGIN(YoloPluginCreator);
145 | };
146 | 
147 | #endif
148 | 


--------------------------------------------------------------------------------
/plugins/yolo_layer.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MehmetOKUYAR/Yolo-Object-Detection-and-Distance-Measurement-with-Zed-camera/a112c65013eab48cf3210fb1e9973aad3ac25cf1/plugins/yolo_layer.o


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MehmetOKUYAR/Yolo-Object-Detection-and-Distance-Measurement-with-Zed-camera/a112c65013eab48cf3210fb1e9973aad3ac25cf1/utils/__init__.py


--------------------------------------------------------------------------------
/utils/background.py:
--------------------------------------------------------------------------------
 1 | """background.py
 2 | 
 3 | This code implements the Background class for the TensorRT MODNet
 4 | demo.  The Background class could generate background images from
 5 | either a still image, a video file or nothing (pure black bg).
 6 | """
 7 | 
 8 | 
 9 | import numpy as np
10 | import cv2
11 | 
12 | 
13 | class Background():
14 |     """Backgrounf class which supports one of the following sources:
15 | 
16 |     1. Image (jpg, png, etc.) file, repeating indefinitely
17 |     2. Video file, looping forever
18 |     3. None -> black background
19 | 
20 |     # Arguments
21 |         src: if not spcified, use black background; else, src should be
22 |              a filename of an image (jpg/png) or video (mp4/ts)
23 |         width & height: width & height of the output background image
24 |     """
25 | 
26 |     def __init__(self, src, width, height, demo_mode=False):
27 |         self.src = src
28 |         self.width = width
29 |         self.height = height
30 |         self.demo_mode = demo_mode
31 |         if not src:  # empty source: black background
32 |             self.is_video = False
33 |             self.bg_frame = np.zeros((height, width, 3), dtype=np.uint8)
34 |         elif not isinstance(src, str):
35 |             raise ValueError('bad src')
36 |         elif src.endswith('.jpg') or src.endswith('.png'):
37 |             self.is_video = False
38 |             self.bg_frame = cv2.resize(cv2.imread(src), (width, height))
39 |             assert self.bg_frame is not None and self.bg_frame.ndim == 3
40 |         elif src.endswith('.mp4') or src.endswith('.ts'):
41 |             self.is_video = True
42 |             self.cap = cv2.VideoCapture(src)
43 |             assert self.cap.isOpened()
44 |         else:
45 |             raise ValueError('unknown src')
46 | 
47 |     def read(self):
48 |         """Read a frame from the Background object."""
49 |         if self.is_video:
50 |             _, frame = self.cap.read()
51 |             if frame is None:
52 |                 # assume end of video file has been reached, so loop around
53 |                 self.cap.release()
54 |                 self.cap = cv2.VideoCapture(self.src)
55 |                 _, frame = self.cap.read()
56 |             return cv2.resize(frame, (self.width, self.height))
57 |         else:
58 |             return self.bg_frame.copy()
59 | 
60 |     def __del__(self):
61 |         if self.is_video:
62 |             try:
63 |                 self.cap.release()
64 |             except:
65 |                 pass
66 | 


--------------------------------------------------------------------------------
/utils/camera.py:
--------------------------------------------------------------------------------
  1 | """camera.py
  2 | 
  3 | This code implements the Camera class, which encapsulates code to
  4 | handle IP CAM, USB webcam or the Jetson onboard camera.  In
  5 | addition, this Camera class is further extended to take a video
  6 | file or an image file as input.
  7 | """
  8 | 
  9 | 
 10 | import logging
 11 | import threading
 12 | import subprocess
 13 | 
 14 | import numpy as np
 15 | import cv2
 16 | 
 17 | 
 18 | # The following flag ise used to control whether to use a GStreamer
 19 | # pipeline to open USB webcam source.  If set to False, we just open
 20 | # the webcam using cv2.VideoCapture(index) machinery. i.e. relying
 21 | # on cv2's built-in function to capture images from the webcam.
 22 | USB_GSTREAMER = True
 23 | 
 24 | 
 25 | def add_camera_args(parser):
 26 |     """Add parser augument for camera options."""
 27 |     parser.add_argument('--image', type=str, default=None,
 28 |                         help='image file name, e.g. dog.jpg')
 29 |     parser.add_argument('--video', type=str, default=None,
 30 |                         help='video file name, e.g. traffic.mp4')
 31 |     parser.add_argument('--video_looping', action='store_true',
 32 |                         help='loop around the video file [False]')
 33 |     parser.add_argument('--rtsp', type=str, default=None,
 34 |                         help=('RTSP H.264 stream, e.g. '
 35 |                               'rtsp://admin:123456@192.168.1.64:554'))
 36 |     parser.add_argument('--rtsp_latency', type=int, default=200,
 37 |                         help='RTSP latency in ms [200]')
 38 |     parser.add_argument('--usb', type=int, default=None,
 39 |                         help='USB webcam device id (/dev/video?) [None]')
 40 |     parser.add_argument('--gstr', type=str, default=None,
 41 |                         help='GStreamer string [None]')
 42 |     parser.add_argument('--onboard', type=int, default=None,
 43 |                         help='Jetson onboard camera [None]')
 44 |     parser.add_argument('--copy_frame', action='store_true',
 45 |                         help=('copy video frame internally [False]'))
 46 |     parser.add_argument('--do_resize', action='store_true',
 47 |                         help=('resize image/video [False]'))
 48 |     parser.add_argument('--width', type=int, default=640,
 49 |                         help='image width [640]')
 50 |     parser.add_argument('--height', type=int, default=480,
 51 |                         help='image height [480]')
 52 |     return parser
 53 | 
 54 | 
 55 | def open_cam_rtsp(uri, width, height, latency):
 56 |     """Open an RTSP URI (IP CAM)."""
 57 |     gst_elements = str(subprocess.check_output('gst-inspect-1.0'))
 58 |     if 'omxh264dec' in gst_elements:
 59 |         # Use hardware H.264 decoder on Jetson platforms
 60 |         gst_str = ('rtspsrc location={} latency={} ! '
 61 |                    'rtph264depay ! h264parse ! omxh264dec ! '
 62 |                    'nvvidconv ! '
 63 |                    'video/x-raw, width=(int){}, height=(int){}, '
 64 |                    'format=(string)BGRx ! videoconvert ! '
 65 |                    'appsink').format(uri, latency, width, height)
 66 |     elif 'avdec_h264' in gst_elements:
 67 |         # Otherwise try to use the software decoder 'avdec_h264'
 68 |         # NOTE: in case resizing images is necessary, try adding
 69 |         #       a 'videoscale' into the pipeline
 70 |         gst_str = ('rtspsrc location={} latency={} ! '
 71 |                    'rtph264depay ! h264parse ! avdec_h264 ! '
 72 |                    'videoconvert ! appsink').format(uri, latency)
 73 |     else:
 74 |         raise RuntimeError('H.264 decoder not found!')
 75 |     return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)
 76 | 
 77 | 
 78 | def open_cam_usb(dev, width, height):
 79 |     """Open a USB webcam."""
 80 |     if USB_GSTREAMER:
 81 |         return cv2.VideoCapture(dev)
 82 |     else:
 83 |         return cv2.VideoCapture(dev)
 84 | 
 85 | 
 86 | def open_cam_gstr(gstr, width, height):
 87 |     """Open camera using a GStreamer string.
 88 | 
 89 |     Example:
 90 |     gstr = 'v4l2src device=/dev/video0 ! video/x-raw, width=(int){width}, height=(int){height} ! videoconvert ! appsink'
 91 |     """
 92 |     gst_str = gstr.format(width=width, height=height)
 93 |     return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)
 94 | 
 95 | 
 96 | def open_cam_onboard(width, height):
 97 |     """Open the Jetson onboard camera."""
 98 |     gst_elements = str(subprocess.check_output('gst-inspect-1.0'))
 99 |     if 'nvcamerasrc' in gst_elements:
100 |         # On versions of L4T prior to 28.1, you might need to add
101 |         # 'flip-method=2' into gst_str below.
102 |         gst_str = ('nvcamerasrc ! '
103 |                    'video/x-raw(memory:NVMM), '
104 |                    'width=(int)2592, height=(int)1458, '
105 |                    'format=(string)I420, framerate=(fraction)30/1 ! '
106 |                    'nvvidconv ! '
107 |                    'video/x-raw, width=(int){}, height=(int){}, '
108 |                    'format=(string)BGRx ! '
109 |                    'videoconvert ! appsink').format(width, height)
110 |     elif 'nvarguscamerasrc' in gst_elements:
111 |         gst_str = ('nvarguscamerasrc ! '
112 |                    'video/x-raw(memory:NVMM), '
113 |                    'width=(int)1920, height=(int)1080, '
114 |                    'format=(string)NV12, framerate=(fraction)30/1 ! '
115 |                    'nvvidconv flip-method=2 ! '
116 |                    'video/x-raw, width=(int){}, height=(int){}, '
117 |                    'format=(string)BGRx ! '
118 |                    'videoconvert ! appsink').format(width, height)
119 |     else:
120 |         raise RuntimeError('onboard camera source not found!')
121 |     return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)
122 | 
123 | 
124 | def grab_img(cam):
125 |     """This 'grab_img' function is designed to be run in the sub-thread.
126 |     Once started, this thread continues to grab a new image and put it
127 |     into the global 'img_handle', until 'thread_running' is set to False.
128 |     """
129 |     while cam.thread_running:
130 |         _, cam.img_handle = cam.cap.read()
131 |         if cam.img_handle is None:
132 |             #logging.warning('Camera: cap.read() returns None...')
133 |             break
134 |     cam.thread_running = False
135 | 
136 | 
137 | class Camera():
138 |     """Camera class which supports reading images from theses video sources:
139 | 
140 |     1. Image (jpg, png, etc.) file, repeating indefinitely
141 |     2. Video file
142 |     3. RTSP (IP CAM)
143 |     4. USB webcam
144 |     5. Jetson onboard camera
145 |     """
146 | 
147 |     def __init__(self, args):
148 |         self.args = args
149 |         self.is_opened = False
150 |         self.video_file = ''
151 |         self.video_looping = args.video_looping
152 |         self.thread_running = False
153 |         self.img_handle = None
154 |         self.copy_frame = args.copy_frame
155 |         self.do_resize = args.do_resize
156 |         self.img_width = args.width
157 |         self.img_height = args.height
158 |         self.cap = None
159 |         self.thread = None
160 |         self._open()  # try to open the camera
161 | 
162 |     def _open(self):
163 |         """Open camera based on command line arguments."""
164 |         if self.cap is not None:
165 |             raise RuntimeError('camera is already opened!')
166 |         a = self.args
167 |         if a.image:
168 |             logging.info('Camera: using a image file %s' % a.image)
169 |             self.cap = 'image'
170 |             self.img_handle = cv2.imread(a.image)
171 |             if self.img_handle is not None:
172 |                 if self.do_resize:
173 |                     self.img_handle = cv2.resize(
174 |                         self.img_handle, (a.width, a.height))
175 |                 self.is_opened = True
176 |                 self.img_height, self.img_width, _ = self.img_handle.shape
177 |         elif a.video:
178 |             logging.info('Camera: using a video file %s' % a.video)
179 |             self.video_file = a.video
180 |             self.cap = cv2.VideoCapture(a.video)
181 |             self._start()
182 |         elif a.rtsp:
183 |             logging.info('Camera: using RTSP stream %s' % a.rtsp)
184 |             self.cap = open_cam_rtsp(a.rtsp, a.width, a.height, a.rtsp_latency)
185 |             self._start()
186 |         elif a.usb is not None:
187 |             logging.info('Camera: using USB webcam /dev/video%d' % a.usb)
188 |             self.cap = open_cam_usb(a.usb, a.width, a.height)
189 |             self._start()
190 |         elif a.gstr is not None:
191 |             logging.info('Camera: using GStreamer string "%s"' % a.gstr)
192 |             self.cap = open_cam_gstr(a.gstr, a.width, a.height)
193 |             self._start()
194 |         elif a.onboard is not None:
195 |             logging.info('Camera: using Jetson onboard camera')
196 |             self.cap = open_cam_onboard(a.width, a.height)
197 |             self._start()
198 |         else:
199 |             raise RuntimeError('no camera type specified!')
200 | 
201 |     def isOpened(self):
202 |         return self.is_opened
203 | 
204 |     def _start(self):
205 |         if not self.cap.isOpened():
206 |             logging.warning('Camera: starting while cap is not opened!')
207 |             return
208 | 
209 |         # Try to grab the 1st image and determine width and height
210 |         _, self.img_handle = self.cap.read()
211 |         if self.img_handle is None:
212 |             logging.warning('Camera: cap.read() returns no image!')
213 |             self.is_opened = False
214 |             return
215 | 
216 |         self.is_opened = True
217 |         if self.video_file:
218 |             if not self.do_resize:
219 |                 self.img_height, self.img_width, _ = self.img_handle.shape
220 |         else:
221 |             self.img_height, self.img_width, _ = self.img_handle.shape
222 |             # start the child thread if not using a video file source
223 |             # i.e. rtsp, usb or onboard
224 |             assert not self.thread_running
225 |             self.thread_running = True
226 |             self.thread = threading.Thread(target=grab_img, args=(self,))
227 |             self.thread.start()
228 | 
229 |     def _stop(self):
230 |         if self.thread_running:
231 |             self.thread_running = False
232 |             #self.thread.join()
233 | 
234 |     def read(self):
235 |         """Read a frame from the camera object.
236 | 
237 |         Returns None if the camera runs out of image or error.
238 |         """
239 |         if not self.is_opened:
240 |             return None
241 | 
242 |         if self.video_file:
243 |             _, img = self.cap.read()
244 |             if img is None:
245 |                 logging.info('Camera: reaching end of video file')
246 |                 if self.video_looping:
247 |                     self.cap.release()
248 |                     self.cap = cv2.VideoCapture(self.video_file)
249 |                 _, img = self.cap.read()
250 |             if img is not None and self.do_resize:
251 |                 img = cv2.resize(img, (self.img_width, self.img_height))
252 |             return img
253 |         elif self.cap == 'image':
254 |             return np.copy(self.img_handle)
255 |         else:
256 |             if self.copy_frame:
257 |                 return self.img_handle.copy()
258 |             else:
259 |                 return self.img_handle
260 | 
261 |     def release(self):
262 |         self._stop()
263 |         try:
264 |             self.cap.release()
265 |         except:
266 |             pass
267 |         self.is_opened = False
268 | 
269 |     def __del__(self):
270 |         self.release()
271 | 


--------------------------------------------------------------------------------
/utils/display.py:
--------------------------------------------------------------------------------
 1 | """display.py
 2 | """
 3 | 
 4 | 
 5 | import time
 6 | 
 7 | import cv2
 8 | 
 9 | 
10 | def open_window(window_name, title, width=None, height=None):
11 |     """Open the display window."""
12 |     cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
13 |     cv2.setWindowTitle(window_name, title)
14 |     if width and height:
15 |         cv2.resizeWindow(window_name, width, height)
16 | 
17 | 
18 | def show_help_text(img, help_text):
19 |     """Draw help text on image."""
20 |     cv2.putText(img, help_text, (11, 20), cv2.FONT_HERSHEY_PLAIN, 1.0,
21 |                 (32, 32, 32), 4, cv2.LINE_AA)
22 |     cv2.putText(img, help_text, (10, 20), cv2.FONT_HERSHEY_PLAIN, 1.0,
23 |                 (240, 240, 240), 1, cv2.LINE_AA)
24 |     return img
25 | 
26 | 
27 | def show_fps(img, fps):
28 |     """Draw fps number at top-left corner of the image."""
29 |     font = cv2.FONT_HERSHEY_PLAIN
30 |     line = cv2.LINE_AA
31 |     fps_text = 'FPS: {:.2f}'.format(fps)
32 |     cv2.putText(img, fps_text, (11, 20), font, 1.0, (32, 32, 32), 4, line)
33 |     cv2.putText(img, fps_text, (10, 20), font, 1.0, (240, 240, 240), 1, line)
34 |     return img
35 | 
36 | 
37 | def set_display(window_name, full_scrn):
38 |     """Set disply window to either full screen or normal."""
39 |     if full_scrn:
40 |         cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN,
41 |                               cv2.WINDOW_FULLSCREEN)
42 |     else:
43 |         cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN,
44 |                               cv2.WINDOW_NORMAL)
45 | 
46 | 
47 | class FpsCalculator():
48 |     """Helper class for calculating frames-per-second (FPS)."""
49 | 
50 |     def __init__(self, decay_factor=0.95):
51 |         self.fps = 0.0
52 |         self.tic = time.time()
53 |         self.decay_factor = decay_factor
54 | 
55 |     def update(self):
56 |         toc = time.time()
57 |         curr_fps = 1.0 / (toc - self.tic)
58 |         self.fps = curr_fps if self.fps == 0.0 else self.fps
59 |         self.fps = self.fps * self.decay_factor + \
60 |                    curr_fps * (1 - self.decay_factor)
61 |         self.tic = toc
62 |         return self.fps
63 | 
64 |     def reset(self):
65 |         self.fps = 0.0
66 | 
67 | 
68 | class ScreenToggler():
69 |     """Helper class for toggling between non-fullscreen and fullscreen."""
70 | 
71 |     def __init__(self):
72 |         self.full_scrn = False
73 | 
74 |     def toggle(self):
75 |         self.full_scrn = not self.full_scrn
76 |         set_display(WINDOW_NAME, self.full_scrn)
77 | 


--------------------------------------------------------------------------------
/utils/mjpeg.py:
--------------------------------------------------------------------------------
  1 | """mjpeg.py
  2 | 
  3 | This module implements a simple MJPEG server which handles HTTP
  4 | requests from remote clients.
  5 | """
  6 | 
  7 | 
  8 | import time
  9 | import queue
 10 | import threading
 11 | import socket
 12 | from http.server import BaseHTTPRequestHandler, HTTPServer
 13 | from socketserver import ThreadingMixIn
 14 | 
 15 | import numpy as np
 16 | import cv2
 17 | 
 18 | 
 19 | # globals
 20 | _MJPEG_QUEUE = queue.Queue(maxsize=2)
 21 | _SLEEP_INTERVAL = 0.1  # update JPG roughly every 0.1 second
 22 | 
 23 | 
 24 | class MjpegHandler(BaseHTTPRequestHandler):
 25 |     """A simple MJPEG handler which publishes images."""
 26 | 
 27 |     def _handle_mjpeg(self):
 28 |         global _MJPEG_QUEUE
 29 |         img = _MJPEG_QUEUE.get()
 30 | 
 31 |         self.send_response(200)
 32 |         self.send_header(
 33 |             'Content-type',
 34 |             'multipart/x-mixed-replace; boundary=--jpgboundary'
 35 |         )
 36 |         self.end_headers()
 37 | 
 38 |         while True:
 39 |             if not _MJPEG_QUEUE.empty():
 40 |                 img = _MJPEG_QUEUE.get()
 41 |             ret, jpg = cv2.imencode('.jpg', img)
 42 |             assert jpg is not None
 43 |             self.wfile.write("--jpgboundary".encode("utf-8"))
 44 |             self.send_header('Content-type', 'image/jpeg')
 45 |             self.send_header('Content-length', str(jpg.size))
 46 |             self.end_headers()
 47 |             self.wfile.write(jpg.tostring())
 48 |             time.sleep(_SLEEP_INTERVAL)
 49 | 
 50 |     def _handle_error(self):
 51 |         self.send_response(404)
 52 |         self.send_header('Content-type', 'text/html')
 53 |         self.end_headers()
 54 |         self.wfile.write('<html><head></head><body>')
 55 |         self.wfile.write('<h1>{0!s} not found</h1>'.format(self.path))
 56 |         self.wfile.write('</body></html>')
 57 | 
 58 |     def do_GET(self):
 59 |         if self.path == '/mjpg' or self.path == '/':
 60 |             self._handle_mjpeg()
 61 |         else:
 62 |             #print('ERROR: ', self.path)
 63 |             self._handle_error()
 64 | 
 65 |     def handle(self):
 66 |         try:
 67 |             super().handle()
 68 |         except socket.error:
 69 |             # ignore BrokenPipeError, which is caused by the client
 70 |             # terminating the HTTP connection
 71 |             pass
 72 | 
 73 | 
 74 | class ThreadedHTTPServer(ThreadingMixIn, HTTPServer):
 75 |     """Handle HTTP requests in a separate thread."""
 76 |     # not used...
 77 | 
 78 | 
 79 | def run_server(server):
 80 |     server.serve_forever()  # this exits when server.shutdown() is called
 81 |     server.socket.shutdown(socket.SHUT_RDWR)
 82 |     server.socket.close()
 83 | 
 84 | 
 85 | class MjpegServer(object):
 86 |     def __init__(self, init_img=None, ip='', port=8080):
 87 |         # initialize the queue with a dummy image
 88 |         global _MJPEG_QUEUE
 89 |         init_img = init_img if init_img else \
 90 |                    np.ones((480, 640, 3), np.uint8) * 255  # all white
 91 |         _MJPEG_QUEUE.put(init_img)
 92 |         # create the HTTP server and run it from the child thread
 93 |         self.server = HTTPServer((ip, port), MjpegHandler)
 94 |         self.run_thread = threading.Thread(
 95 |             target=run_server, args=(self.server,))
 96 |         self.run_thread.start()
 97 | 
 98 |     def send_img(self, img):
 99 |         global _MJPEG_QUEUE
100 |         try:
101 |             _MJPEG_QUEUE.put(img, block=False)
102 |         except queue.Full:
103 |             pass
104 | 
105 |     def shutdown(self):
106 |         self.server.shutdown()
107 |         del self.server
108 | 


--------------------------------------------------------------------------------
/utils/modnet.py:
--------------------------------------------------------------------------------
  1 | """modnet.py
  2 | 
  3 | Implementation of TrtMODNet class.
  4 | """
  5 | 
  6 | 
  7 | import numpy as np
  8 | import cv2
  9 | import tensorrt as trt
 10 | import pycuda.driver as cuda
 11 | 
 12 | 
 13 | # Code in this module is only for TensorRT 7+
 14 | if trt.__version__[0] < '7':
 15 |     raise SystemExit('TensorRT version < 7')
 16 | 
 17 | 
 18 | def _preprocess_modnet(img, input_shape):
 19 |     """Preprocess an image before TRT MODNet inferencing.
 20 | 
 21 |     # Args
 22 |         img: int8 numpy array of shape (img_h, img_w, 3)
 23 |         input_shape: a tuple of (H, W)
 24 | 
 25 |     # Returns
 26 |         preprocessed img: float32 numpy array of shape (3, H, W)
 27 |     """
 28 |     img = cv2.resize(img, (input_shape[1], input_shape[0]), cv2.INTER_AREA)
 29 |     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 30 |     img = img.transpose((2, 0, 1)).astype(np.float32)
 31 |     img = (img - 127.5) / 127.5
 32 |     return img
 33 | 
 34 | 
 35 | def _postprocess_modnet(output, output_shape):
 36 |     """Postprocess TRT MODNet output.
 37 | 
 38 |     # Args
 39 |         output: inferenced output by the TensorRT engine
 40 |         output_shape: (H, W), e.g. (480, 640)
 41 |     """
 42 |     matte = cv2.resize(
 43 |         output, (output_shape[1], output_shape[0]),
 44 |         interpolation=cv2.INTER_AREA)
 45 |     return matte
 46 | 
 47 | 
 48 | class HostDeviceMem(object):
 49 |     """Simple helper data class that's a little nicer to use than a 2-tuple."""
 50 |     def __init__(self, host_mem, device_mem):
 51 |         self.host = host_mem
 52 |         self.device = device_mem
 53 | 
 54 |     def __str__(self):
 55 |         return 'Host:\n' + str(self.host) + '\nDevice:\n' + str(self.device)
 56 | 
 57 |     def __repr__(self):
 58 |         return self.__str__()
 59 | 
 60 | 
 61 | def allocate_buffers(engine, context):
 62 |     """Allocates all host/device in/out buffers required for an engine."""
 63 |     assert len(engine) == 2 and engine[0] == 'input' and engine[1] == 'output'
 64 |     dtype = trt.nptype(engine.get_binding_dtype('input'))
 65 |     assert trt.nptype(engine.get_binding_dtype('output')) == dtype
 66 |     bindings = []
 67 | 
 68 |     dims_in = context.get_binding_shape(0)
 69 |     assert len(dims_in) == 4 and dims_in[0] == 1 and dims_in[1] == 3
 70 |     hmem_in = cuda.pagelocked_empty(trt.volume(dims_in), dtype)
 71 |     dmem_in = cuda.mem_alloc(hmem_in.nbytes)
 72 |     bindings.append(int(dmem_in))
 73 |     inputs = [HostDeviceMem(hmem_in, dmem_in)]
 74 | 
 75 |     dims_out = context.get_binding_shape(1)
 76 |     assert len(dims_out) == 4 and dims_out[0] == 1 and dims_out[1] == 1
 77 |     assert dims_out[2] == dims_in[2] and dims_out[3] == dims_in[3]
 78 |     hmem_out = cuda.pagelocked_empty(trt.volume(dims_out), dtype)
 79 |     dmem_out = cuda.mem_alloc(hmem_out.nbytes)
 80 |     bindings.append(int(dmem_out))
 81 |     outputs = [HostDeviceMem(hmem_out, dmem_out)]
 82 | 
 83 |     return bindings, inputs, outputs
 84 | 
 85 | 
 86 | def do_inference_v2(context, bindings, inputs, outputs, stream):
 87 |     """do_inference_v2 (for TensorRT 7.0+)
 88 | 
 89 |     This function is generalized for multiple inputs/outputs for full
 90 |     dimension networks.  Inputs and outputs are expected to be lists
 91 |     of HostDeviceMem objects.
 92 |     """
 93 |     # Transfer input data to the GPU.
 94 |     [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
 95 |     # Run inference.
 96 |     context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
 97 |     # Transfer predictions back from the GPU.
 98 |     [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
 99 |     # Synchronize the stream
100 |     stream.synchronize()
101 |     # Return only the host outputs.
102 |     return [out.host for out in outputs]
103 | 
104 | 
105 | class TrtMODNet(object):
106 |     """TrtMODNet class encapsulates things needed to run TRT MODNet."""
107 | 
108 |     def __init__(self, cuda_ctx=None):
109 |         """Initialize TensorRT plugins, engine and conetxt.
110 | 
111 |         # Arguments
112 |             cuda_ctx: PyCUDA context for inferencing (usually only needed
113 |                       in multi-threaded cases
114 |         """
115 |         self.cuda_ctx = cuda_ctx
116 |         if self.cuda_ctx:
117 |             self.cuda_ctx.push()
118 |         self.trt_logger = trt.Logger(trt.Logger.INFO)
119 |         self.engine = self._load_engine()
120 |         assert self.engine.get_binding_dtype('input') == trt.tensorrt.DataType.FLOAT
121 | 
122 |         try:
123 |             self.context = self.engine.create_execution_context()
124 |             self.output_shape = self.context.get_binding_shape(1)  # (1, 1, 480, 640)
125 |             self.stream = cuda.Stream()
126 |             self.bindings, self.inputs, self.outputs = allocate_buffers(
127 |                 self.engine, self.context)
128 |         except Exception as e:
129 |             raise RuntimeError('fail to allocate CUDA resources') from e
130 |         finally:
131 |             if self.cuda_ctx:
132 |                 self.cuda_ctx.pop()
133 |         dims = self.context.get_binding_shape(0)  # 'input'
134 |         self.input_shape = (dims[2], dims[3])
135 | 
136 |     def _load_engine(self):
137 |         if not trt.init_libnvinfer_plugins(self.trt_logger, ''):
138 |             raise RuntimeError('fail to init built-in plugins')
139 |         engine_path = 'modnet/modnet.engine'
140 |         with open(engine_path, 'rb') as f, trt.Runtime(self.trt_logger) as runtime:
141 |             return runtime.deserialize_cuda_engine(f.read())
142 | 
143 |     def infer(self, img):
144 |         """Infer an image.
145 | 
146 |         The output is a matte (matting mask), which is a grayscale image
147 |         with either 0 or 255 pixels.
148 |         """
149 |         img_resized = _preprocess_modnet(img, self.input_shape)
150 | 
151 |         self.inputs[0].host = np.ascontiguousarray(img_resized)
152 |         if self.cuda_ctx:
153 |             self.cuda_ctx.push()
154 |         trt_outputs = do_inference_v2(
155 |             context=self.context,
156 |             bindings=self.bindings,
157 |             inputs=self.inputs,
158 |             outputs=self.outputs,
159 |             stream=self.stream)
160 |         if self.cuda_ctx:
161 |             self.cuda_ctx.pop()
162 | 
163 |         output = trt_outputs[0].reshape(self.output_shape[-2:])
164 |         return _postprocess_modnet(output, img.shape[:2])
165 | 


--------------------------------------------------------------------------------
/utils/mtcnn.py:
--------------------------------------------------------------------------------
  1 | """mtcnn_trt.py
  2 | """
  3 | 
  4 | import numpy as np
  5 | import cv2
  6 | import pytrt
  7 | 
  8 | 
  9 | PIXEL_MEAN = 127.5
 10 | PIXEL_SCALE = 0.0078125
 11 | 
 12 | 
 13 | def convert_to_1x1(boxes):
 14 |     """Convert detection boxes to 1:1 sizes
 15 | 
 16 |     # Arguments
 17 |         boxes: numpy array, shape (n,5), dtype=float32
 18 | 
 19 |     # Returns
 20 |         boxes_1x1
 21 |     """
 22 |     boxes_1x1 = boxes.copy()
 23 |     hh = boxes[:, 3] - boxes[:, 1] + 1.
 24 |     ww = boxes[:, 2] - boxes[:, 0] + 1.
 25 |     mm = np.maximum(hh, ww)
 26 |     boxes_1x1[:, 0] = boxes[:, 0] + ww * 0.5 - mm * 0.5
 27 |     boxes_1x1[:, 1] = boxes[:, 1] + hh * 0.5 - mm * 0.5
 28 |     boxes_1x1[:, 2] = boxes_1x1[:, 0] + mm - 1.
 29 |     boxes_1x1[:, 3] = boxes_1x1[:, 1] + mm - 1.
 30 |     boxes_1x1[:, 0:4] = np.fix(boxes_1x1[:, 0:4])
 31 |     return boxes_1x1
 32 | 
 33 | 
 34 | def crop_img_with_padding(img, box, padding=0):
 35 |     """Crop a box from image, with out-of-boundary pixels padded
 36 | 
 37 |     # Arguments
 38 |         img: img as a numpy array, shape (H, W, 3)
 39 |         box: numpy array, shape (5,) or (4,)
 40 |         padding: integer value for padded pixels
 41 | 
 42 |     # Returns
 43 |         cropped_im: cropped image as a numpy array, shape (H, W, 3)
 44 |     """
 45 |     img_h, img_w, _ = img.shape
 46 |     if box.shape[0] == 5:
 47 |         cx1, cy1, cx2, cy2, _ = box.astype(int)
 48 |     elif box.shape[0] == 4:
 49 |         cx1, cy1, cx2, cy2 = box.astype(int)
 50 |     else:
 51 |         raise ValueError
 52 |     cw = cx2 - cx1 + 1
 53 |     ch = cy2 - cy1 + 1
 54 |     cropped_im = np.zeros((ch, cw, 3), dtype=np.uint8) + padding
 55 |     ex1 = max(0, -cx1)  # ex/ey's are the destination coordinates
 56 |     ey1 = max(0, -cy1)
 57 |     ex2 = min(cw, img_w - cx1)
 58 |     ey2 = min(ch, img_h - cy1)
 59 |     fx1 = max(cx1, 0)  # fx/fy's are the source coordinates
 60 |     fy1 = max(cy1, 0)
 61 |     fx2 = min(cx2+1, img_w)
 62 |     fy2 = min(cy2+1, img_h)
 63 |     cropped_im[ey1:ey2, ex1:ex2, :] = img[fy1:fy2, fx1:fx2, :]
 64 |     return cropped_im
 65 | 
 66 | 
 67 | def nms(boxes, threshold, type='Union'):
 68 |     """Non-Maximum Supression
 69 | 
 70 |     # Arguments
 71 |         boxes: numpy array [:, 0:5] of [x1, y1, x2, y2, score]'s
 72 |         threshold: confidence/score threshold, e.g. 0.5
 73 |         type: 'Union' or 'Min'
 74 | 
 75 |     # Returns
 76 |         A list of indices indicating the result of NMS
 77 |     """
 78 |     if boxes.shape[0] == 0:
 79 |         return []
 80 |     xx1, yy1, xx2, yy2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
 81 |     areas = np.multiply(xx2-xx1+1, yy2-yy1+1)
 82 |     sorted_idx = boxes[:, 4].argsort()
 83 | 
 84 |     pick = []
 85 |     while len(sorted_idx) > 0:
 86 |         # In each loop, pick the last box (highest score) and remove
 87 |         # all other boxes with IoU over threshold
 88 |         tx1 = np.maximum(xx1[sorted_idx[-1]], xx1[sorted_idx[0:-1]])
 89 |         ty1 = np.maximum(yy1[sorted_idx[-1]], yy1[sorted_idx[0:-1]])
 90 |         tx2 = np.minimum(xx2[sorted_idx[-1]], xx2[sorted_idx[0:-1]])
 91 |         ty2 = np.minimum(yy2[sorted_idx[-1]], yy2[sorted_idx[0:-1]])
 92 |         tw = np.maximum(0.0, tx2 - tx1 + 1)
 93 |         th = np.maximum(0.0, ty2 - ty1 + 1)
 94 |         inter = tw * th
 95 |         if type == 'Min':
 96 |             iou = inter / \
 97 |                   np.minimum(areas[sorted_idx[-1]], areas[sorted_idx[0:-1]])
 98 |         else:
 99 |             iou = inter / \
100 |                   (areas[sorted_idx[-1]] + areas[sorted_idx[0:-1]] - inter)
101 |         pick.append(sorted_idx[-1])
102 |         sorted_idx = sorted_idx[np.where(iou <= threshold)[0]]
103 |     return pick
104 | 
105 | 
106 | def generate_pnet_bboxes(conf, reg, scale, t):
107 |     """
108 |     # Arguments
109 |         conf: softmax score (face or not) of each grid
110 |         reg: regression values of x1, y1, x2, y2 coordinates.
111 |              The values are normalized to grid width (12) and
112 |              height (12).
113 |         scale: scale-down factor with respect to original image
114 |         t: confidence threshold
115 | 
116 |     # Returns
117 |         A numpy array of bounding box coordinates and the
118 |         cooresponding scores: [[x1, y1, x2, y2, score], ...]
119 | 
120 |     # Notes
121 |         Top left corner coordinates of each grid is (x*2, y*2),
122 |         or (x*2/scale, y*2/scale) in the original image.
123 |         Bottom right corner coordinates is (x*2+12-1, y*2+12-1),
124 |         or ((x*2+12-1)/scale, (y*2+12-1)/scale) in the original
125 |         image.
126 |     """
127 |     conf = conf.T  # swap H and W dimensions
128 |     dx1 = reg[0, :, :].T
129 |     dy1 = reg[1, :, :].T
130 |     dx2 = reg[2, :, :].T
131 |     dy2 = reg[3, :, :].T
132 |     (x, y) = np.where(conf >= t)
133 |     if len(x) == 0:
134 |         return np.zeros((0, 5), np.float32)
135 | 
136 |     score = np.array(conf[x, y]).reshape(-1, 1)          # Nx1
137 |     reg = np.array([dx1[x, y], dy1[x, y],
138 |                     dx2[x, y], dy2[x, y]]).T * 12.       # Nx4
139 |     topleft = np.array([x, y], dtype=np.float32).T * 2.  # Nx2
140 |     bottomright = topleft + np.array([11., 11.], dtype=np.float32)  # Nx2
141 |     boxes = (np.concatenate((topleft, bottomright), axis=1) + reg) / scale
142 |     boxes = np.concatenate((boxes, score), axis=1)       # Nx5
143 |     # filter bboxes which are too small
144 |     #boxes = boxes[boxes[:, 2]-boxes[:, 0] >= 12., :]
145 |     #boxes = boxes[boxes[:, 3]-boxes[:, 1] >= 12., :]
146 |     return boxes
147 | 
148 | 
149 | def generate_rnet_bboxes(conf, reg, pboxes, t):
150 |     """
151 |     # Arguments
152 |         conf: softmax score (face or not) of each box
153 |         reg: regression values of x1, y1, x2, y2 coordinates.
154 |              The values are normalized to box width and height.
155 |         pboxes: input boxes to RNet
156 |         t: confidence threshold
157 | 
158 |     # Returns
159 |         boxes: a numpy array of box coordinates and cooresponding
160 |                scores: [[x1, y1, x2, y2, score], ...]
161 |     """
162 |     boxes = pboxes.copy()  # make a copy
163 |     assert boxes.shape[0] == conf.shape[0]
164 |     boxes[:, 4] = conf  # update 'score' of all boxes
165 |     boxes = boxes[conf >= t, :]
166 |     reg = reg[conf >= t, :]
167 |     ww = (boxes[:, 2]-boxes[:, 0]+1).reshape(-1, 1)  # x2 - x1 + 1
168 |     hh = (boxes[:, 3]-boxes[:, 1]+1).reshape(-1, 1)  # y2 - y1 + 1
169 |     boxes[:, 0:4] += np.concatenate((ww, hh, ww, hh), axis=1) * reg
170 |     return boxes
171 | 
172 | 
173 | def generate_onet_outputs(conf, reg_boxes, reg_marks, rboxes, t):
174 |     """
175 |     # Arguments
176 |         conf: softmax score (face or not) of each box
177 |         reg_boxes: regression values of x1, y1, x2, y2
178 |                    The values are normalized to box width and height.
179 |         reg_marks: regression values of the 5 facial landmark points
180 |         rboxes: input boxes to ONet (already converted to 2x1)
181 |         t: confidence threshold
182 | 
183 |     # Returns
184 |         boxes: a numpy array of box coordinates and cooresponding
185 |                scores: [[x1, y1, x2, y2,... , score], ...]
186 |         landmarks: a numpy array of facial landmark coordinates:
187 |                    [[x1, x2, ..., x5, y1, y2, ..., y5], ...]
188 |     """
189 |     boxes = rboxes.copy()  # make a copy
190 |     assert boxes.shape[0] == conf.shape[0]
191 |     boxes[:, 4] = conf
192 |     boxes = boxes[conf >= t, :]
193 |     reg_boxes = reg_boxes[conf >= t, :]
194 |     reg_marks = reg_marks[conf >= t, :]
195 |     xx = boxes[:, 0].reshape(-1, 1)
196 |     yy = boxes[:, 1].reshape(-1, 1)
197 |     ww = (boxes[:, 2]-boxes[:, 0]).reshape(-1, 1)
198 |     hh = (boxes[:, 3]-boxes[:, 1]).reshape(-1, 1)
199 |     marks = np.concatenate((xx, xx, xx, xx, xx, yy, yy, yy, yy, yy), axis=1)
200 |     marks += np.concatenate((ww, ww, ww, ww, ww, hh, hh, hh, hh, hh), axis=1) * reg_marks
201 |     ww = ww + 1
202 |     hh = hh + 1
203 |     boxes[:, 0:4] += np.concatenate((ww, hh, ww, hh), axis=1) * reg_boxes
204 |     return boxes, marks
205 | 
206 | 
207 | def clip_dets(dets, img_w, img_h):
208 |     """Round and clip detection (x1, y1, ...) values.
209 | 
210 |     Note we exclude the last value of 'dets' in computation since
211 |     it is 'conf'.
212 |     """
213 |     dets[:, 0:-1] = np.fix(dets[:, 0:-1])
214 |     evens = np.arange(0, dets.shape[1]-1, 2)
215 |     odds  = np.arange(1, dets.shape[1]-1, 2)
216 |     dets[:, evens] = np.clip(dets[:, evens], 0., float(img_w-1))
217 |     dets[:, odds]  = np.clip(dets[:, odds], 0., float(img_h-1))
218 |     return dets
219 | 
220 | 
221 | class TrtPNet(object):
222 |     """TrtPNet
223 | 
224 |     Refer to mtcnn/det1_relu.prototxt for calculation of input/output
225 |     dimmensions of TrtPNet, as well as input H offsets (for all scales).
226 |     The output H offsets are merely input offsets divided by stride (2).
227 |     """
228 |     input_h_offsets  = (0, 216, 370, 478, 556, 610, 648, 676, 696)
229 |     output_h_offsets = (0, 108, 185, 239, 278, 305, 324, 338, 348)
230 |     max_n_scales = 9
231 | 
232 |     def __init__(self, engine):
233 |         """__init__
234 | 
235 |         # Arguments
236 |             engine: path to the TensorRT engine file
237 |         """
238 |         self.trtnet = pytrt.PyTrtMtcnn(engine,
239 |                                        (3, 710, 384),
240 |                                        (2, 350, 187),
241 |                                        (4, 350, 187))
242 |         self.trtnet.set_batchsize(1)
243 | 
244 |     def detect(self, img, minsize=40, factor=0.709, threshold=0.7):
245 |         """Detect faces using PNet
246 | 
247 |         # Arguments
248 |             img: input image as a RGB numpy array
249 |             threshold: confidence threshold
250 | 
251 |         # Returns
252 |             A numpy array of bounding box coordinates and the
253 |             cooresponding scores: [[x1, y1, x2, y2, score], ...]
254 |         """
255 |         if minsize < 40:
256 |             raise ValueError("TrtPNet is currently designed with "
257 |                              "'minsize' >= 40")
258 |         if factor > 0.709:
259 |             raise ValueError("TrtPNet is currently designed with "
260 |                              "'factor' <= 0.709")
261 |         m = 12.0 / minsize
262 |         img_h, img_w, _ = img.shape
263 |         minl = min(img_h, img_w) * m
264 | 
265 |         # create scale pyramid
266 |         scales = []
267 |         while minl >= 12:
268 |             scales.append(m)
269 |             m *= factor
270 |             minl *= factor
271 |         if len(scales) > self.max_n_scales:  # probably won't happen...
272 |             raise ValueError('Too many scales, try increasing minsize '
273 |                              'or decreasing factor.')
274 | 
275 |         total_boxes = np.zeros((0, 5), dtype=np.float32)
276 |         img = (img.astype(np.float32) - PIXEL_MEAN) * PIXEL_SCALE
277 | 
278 |         # stack all scales of the input image vertically into 1 big
279 |         # image, and only do inferencing once
280 |         im_data = np.zeros((1, 3, 710, 384), dtype=np.float32)
281 |         for i, scale in enumerate(scales):
282 |             h_offset = self.input_h_offsets[i]
283 |             h = int(img_h * scale)
284 |             w = int(img_w * scale)
285 |             im_data[0, :, h_offset:(h_offset+h), :w] = \
286 |                 cv2.resize(img, (w, h)).transpose((2, 0, 1))
287 | 
288 |         out = self.trtnet.forward(im_data)
289 | 
290 |         # extract outputs of each scale from the big output blob
291 |         for i, scale in enumerate(scales):
292 |             h_offset = self.output_h_offsets[i]
293 |             h = (int(img_h * scale) - 12) // 2 + 1
294 |             w = (int(img_w * scale) - 12) // 2 + 1
295 |             pp = out['prob1'][0, 1, h_offset:(h_offset+h), :w]
296 |             cc = out['boxes'][0, :, h_offset:(h_offset+h), :w]
297 |             boxes = generate_pnet_bboxes(pp, cc, scale, threshold)
298 |             if boxes.shape[0] > 0:
299 |                 pick = nms(boxes, 0.5, 'Union')
300 |                 if len(pick) > 0:
301 |                     boxes = boxes[pick, :]
302 |             if boxes.shape[0] > 0:
303 |                 total_boxes = np.concatenate((total_boxes, boxes), axis=0)
304 | 
305 |         if total_boxes.shape[0] == 0:
306 |             return total_boxes
307 |         pick = nms(total_boxes, 0.7, 'Union')
308 |         dets = clip_dets(total_boxes[pick, :], img_w, img_h)
309 |         return dets
310 | 
311 |     def destroy(self):
312 |         self.trtnet.destroy()
313 |         self.trtnet = None
314 | 
315 | 
316 | class TrtRNet(object):
317 |     """TrtRNet
318 | 
319 |     # Arguments
320 |         engine: path to the TensorRT engine (det2) file
321 |     """
322 | 
323 |     def __init__(self, engine):
324 |         self.trtnet = pytrt.PyTrtMtcnn(engine,
325 |                                        (3, 24, 24),
326 |                                        (2, 1, 1),
327 |                                        (4, 1, 1))
328 | 
329 |     def detect(self, img, boxes, max_batch=256, threshold=0.6):
330 |         """Detect faces using RNet
331 | 
332 |         # Arguments
333 |             img: input image as a RGB numpy array
334 |             boxes: detection results by PNet, a numpy array [:, 0:5]
335 |                    of [x1, y1, x2, y2, score]'s
336 |             max_batch: only process these many top boxes from PNet
337 |             threshold: confidence threshold
338 | 
339 |         # Returns
340 |             A numpy array of bounding box coordinates and the
341 |             cooresponding scores: [[x1, y1, x2, y2, score], ...]
342 |         """
343 |         if max_batch > 256:
344 |             raise ValueError('Bad max_batch: %d' % max_batch)
345 |         boxes = boxes[:max_batch]  # assuming boxes are sorted by score
346 |         if boxes.shape[0] == 0:
347 |             return boxes
348 |         img_h, img_w, _ = img.shape
349 |         boxes = convert_to_1x1(boxes)
350 |         crops = np.zeros((boxes.shape[0], 24, 24, 3), dtype=np.uint8)
351 |         for i, det in enumerate(boxes):
352 |             cropped_im = crop_img_with_padding(img, det)
353 |             # NOTE: H and W dimensions need to be transposed for RNet!
354 |             crops[i, ...] = cv2.transpose(cv2.resize(cropped_im, (24, 24)))
355 |         crops = crops.transpose((0, 3, 1, 2))  # NHWC -> NCHW
356 |         crops = (crops.astype(np.float32) - PIXEL_MEAN) * PIXEL_SCALE
357 | 
358 |         self.trtnet.set_batchsize(crops.shape[0])
359 |         out = self.trtnet.forward(crops)
360 | 
361 |         pp = out['prob1'][:, 1, 0, 0]
362 |         cc = out['boxes'][:, :, 0, 0]
363 |         boxes = generate_rnet_bboxes(pp, cc, boxes, threshold)
364 |         if boxes.shape[0] == 0:
365 |             return boxes
366 |         pick = nms(boxes, 0.7, 'Union')
367 |         dets = clip_dets(boxes[pick, :], img_w, img_h)
368 |         return dets
369 | 
370 |     def destroy(self):
371 |         self.trtnet.destroy()
372 |         self.trtnet = None
373 | 
374 | 
375 | class TrtONet(object):
376 |     """TrtONet
377 | 
378 |     # Arguments
379 |         engine: path to the TensorRT engine (det3) file
380 |     """
381 | 
382 |     def __init__(self, engine):
383 |         self.trtnet = pytrt.PyTrtMtcnn(engine,
384 |                                        (3, 48, 48),
385 |                                        (2, 1, 1),
386 |                                        (4, 1, 1),
387 |                                        (10, 1, 1))
388 | 
389 |     def detect(self, img, boxes, max_batch=64, threshold=0.7):
390 |         """Detect faces using ONet
391 | 
392 |         # Arguments
393 |             img: input image as a RGB numpy array
394 |             boxes: detection results by RNet, a numpy array [:, 0:5]
395 |                    of [x1, y1, x2, y2, score]'s
396 |             max_batch: only process these many top boxes from RNet
397 |             threshold: confidence threshold
398 | 
399 |         # Returns
400 |             dets: boxes and conf scores
401 |             landmarks
402 |         """
403 |         if max_batch > 64:
404 |             raise ValueError('Bad max_batch: %d' % max_batch)
405 |         if boxes.shape[0] == 0:
406 |             return (np.zeros((0, 5), dtype=np.float32),
407 |                     np.zeros((0, 10), dtype=np.float32))
408 |         boxes = boxes[:max_batch]  # assuming boxes are sorted by score
409 |         img_h, img_w, _ = img.shape
410 |         boxes = convert_to_1x1(boxes)
411 |         crops = np.zeros((boxes.shape[0], 48, 48, 3), dtype=np.uint8)
412 |         for i, det in enumerate(boxes):
413 |             cropped_im = crop_img_with_padding(img, det)
414 |             # NOTE: H and W dimensions need to be transposed for RNet!
415 |             crops[i, ...] = cv2.transpose(cv2.resize(cropped_im, (48, 48)))
416 |         crops = crops.transpose((0, 3, 1, 2))  # NHWC -> NCHW
417 |         crops = (crops.astype(np.float32) - PIXEL_MEAN) * PIXEL_SCALE
418 | 
419 |         self.trtnet.set_batchsize(crops.shape[0])
420 |         out = self.trtnet.forward(crops)
421 | 
422 |         pp = out['prob1'][:, 1, 0, 0]
423 |         cc = out['boxes'][:, :, 0, 0]
424 |         mm = out['landmarks'][:, :, 0, 0]
425 |         boxes, landmarks = generate_onet_outputs(pp, cc, mm, boxes, threshold)
426 |         pick = nms(boxes, 0.7, 'Min')
427 |         return (clip_dets(boxes[pick, :], img_w, img_h),
428 |                 np.fix(landmarks[pick, :]))
429 | 
430 |     def destroy(self):
431 |         self.trtnet.destroy()
432 |         self.trtnet = None
433 | 
434 | 
435 | class TrtMtcnn(object):
436 |     """TrtMtcnn"""
437 | 
438 |     def __init__(self):
439 |         self.pnet = TrtPNet('mtcnn/det1.engine')
440 |         self.rnet = TrtRNet('mtcnn/det2.engine')
441 |         self.onet = TrtONet('mtcnn/det3.engine')
442 | 
443 |     def __del__(self):
444 |         self.onet.destroy()
445 |         self.rnet.destroy()
446 |         self.pnet.destroy()
447 | 
448 |     def _detect_1280x720(self, img, minsize):
449 |         """_detec_1280x720()
450 | 
451 |         Assuming 'img' has been resized to less than 1280x720.
452 |         """
453 |         # MTCNN model was trained with 'MATLAB' image so its channel
454 |         # order is RGB instead of BGR.
455 |         img = img[:, :, ::-1]  # BGR -> RGB
456 |         dets = self.pnet.detect(img, minsize=minsize)
457 |         dets = self.rnet.detect(img, dets)
458 |         dets, landmarks = self.onet.detect(img, dets)
459 |         return dets, landmarks
460 | 
461 |     def detect(self, img, minsize=40):
462 |         """detect()
463 | 
464 |         This function handles rescaling of the input image if it's
465 |         larger than 1280x720.
466 |         """
467 |         if img is None:
468 |             raise ValueError
469 |         img_h, img_w, _ = img.shape
470 |         scale = min(720. / img_h, 1280. / img_w)
471 |         if scale < 1.0:
472 |             new_h = int(np.ceil(img_h * scale))
473 |             new_w = int(np.ceil(img_w * scale))
474 |             img = cv2.resize(img, (new_w, new_h))
475 |             minsize = max(int(np.ceil(minsize * scale)), 40)
476 |         dets, landmarks = self._detect_1280x720(img, minsize)
477 |         if scale < 1.0:
478 |             dets[:, :-1] = np.fix(dets[:, :-1] / scale)
479 |             landmarks = np.fix(landmarks / scale)
480 |         return dets, landmarks
481 | 


--------------------------------------------------------------------------------
/utils/ssd.py:
--------------------------------------------------------------------------------
  1 | """ssd.py
  2 | 
  3 | This module implements the TrtSSD class.
  4 | """
  5 | 
  6 | 
  7 | import ctypes
  8 | 
  9 | import numpy as np
 10 | import cv2
 11 | import tensorrt as trt
 12 | import pycuda.driver as cuda
 13 | 
 14 | 
 15 | def _preprocess_trt(img, shape=(300, 300)):
 16 |     """Preprocess an image before TRT SSD inferencing."""
 17 |     img = cv2.resize(img, shape)
 18 |     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 19 |     img = img.transpose((2, 0, 1)).astype(np.float32)
 20 |     img *= (2.0/255.0)
 21 |     img -= 1.0
 22 |     return img
 23 | 
 24 | 
 25 | def _postprocess_trt(img, output, conf_th, output_layout=7):
 26 |     """Postprocess TRT SSD output."""
 27 |     img_h, img_w, _ = img.shape
 28 |     boxes, confs, clss = [], [], []
 29 |     for prefix in range(0, len(output), output_layout):
 30 |         #index = int(output[prefix+0])
 31 |         conf = float(output[prefix+2])
 32 |         if conf < conf_th:
 33 |             continue
 34 |         x1 = int(output[prefix+3] * img_w)
 35 |         y1 = int(output[prefix+4] * img_h)
 36 |         x2 = int(output[prefix+5] * img_w)
 37 |         y2 = int(output[prefix+6] * img_h)
 38 |         cls = int(output[prefix+1])
 39 |         boxes.append((x1, y1, x2, y2))
 40 |         confs.append(conf)
 41 |         clss.append(cls)
 42 |     return boxes, confs, clss
 43 | 
 44 | 
 45 | class TrtSSD(object):
 46 |     """TrtSSD class encapsulates things needed to run TRT SSD."""
 47 | 
 48 |     def _load_plugins(self):
 49 |         if trt.__version__[0] < '7':
 50 |             ctypes.CDLL("ssd/libflattenconcat.so")
 51 |         trt.init_libnvinfer_plugins(self.trt_logger, '')
 52 | 
 53 |     def _load_engine(self):
 54 |         TRTbin = 'ssd/TRT_%s.bin' % self.model
 55 |         with open(TRTbin, 'rb') as f, trt.Runtime(self.trt_logger) as runtime:
 56 |             return runtime.deserialize_cuda_engine(f.read())
 57 | 
 58 |     def _allocate_buffers(self):
 59 |         host_inputs, host_outputs, cuda_inputs, cuda_outputs, bindings = \
 60 |             [], [], [], [], []
 61 |         for binding in self.engine:
 62 |             size = trt.volume(self.engine.get_binding_shape(binding)) * \
 63 |                    self.engine.max_batch_size
 64 |             host_mem = cuda.pagelocked_empty(size, np.float32)
 65 |             cuda_mem = cuda.mem_alloc(host_mem.nbytes)
 66 |             bindings.append(int(cuda_mem))
 67 |             if self.engine.binding_is_input(binding):
 68 |                 host_inputs.append(host_mem)
 69 |                 cuda_inputs.append(cuda_mem)
 70 |             else:
 71 |                 host_outputs.append(host_mem)
 72 |                 cuda_outputs.append(cuda_mem)
 73 |         return host_inputs, host_outputs, cuda_inputs, cuda_outputs, bindings
 74 | 
 75 |     def __init__(self, model, input_shape, cuda_ctx=None):
 76 |         """Initialize TensorRT plugins, engine and conetxt."""
 77 |         self.model = model
 78 |         self.input_shape = input_shape
 79 |         self.cuda_ctx = cuda_ctx
 80 |         if self.cuda_ctx:
 81 |             self.cuda_ctx.push()
 82 | 
 83 |         self.trt_logger = trt.Logger(trt.Logger.INFO)
 84 |         self._load_plugins()
 85 |         self.engine = self._load_engine()
 86 | 
 87 |         try:
 88 |             self.context = self.engine.create_execution_context()
 89 |             self.stream = cuda.Stream()
 90 |             self.host_inputs, self.host_outputs, self.cuda_inputs, self.cuda_outputs, self.bindings = self._allocate_buffers()
 91 |         except Exception as e:
 92 |             raise RuntimeError('fail to allocate CUDA resources') from e
 93 |         finally:
 94 |             if self.cuda_ctx:
 95 |                 self.cuda_ctx.pop()
 96 | 
 97 |     def __del__(self):
 98 |         """Free CUDA memories and context."""
 99 |         del self.cuda_outputs
100 |         del self.cuda_inputs
101 |         del self.stream
102 | 
103 |     def detect(self, img, conf_th=0.3):
104 |         """Detect objects in the input image."""
105 |         img_resized = _preprocess_trt(img, self.input_shape)
106 |         np.copyto(self.host_inputs[0], img_resized.ravel())
107 | 
108 |         if self.cuda_ctx:
109 |             self.cuda_ctx.push()
110 |         cuda.memcpy_htod_async(
111 |             self.cuda_inputs[0], self.host_inputs[0], self.stream)
112 |         self.context.execute_async(
113 |             batch_size=1,
114 |             bindings=self.bindings,
115 |             stream_handle=self.stream.handle)
116 |         cuda.memcpy_dtoh_async(
117 |             self.host_outputs[1], self.cuda_outputs[1], self.stream)
118 |         cuda.memcpy_dtoh_async(
119 |             self.host_outputs[0], self.cuda_outputs[0], self.stream)
120 |         self.stream.synchronize()
121 |         if self.cuda_ctx:
122 |             self.cuda_ctx.pop()
123 | 
124 |         output = self.host_outputs[0]
125 |         return _postprocess_trt(img, output, conf_th)
126 | 


--------------------------------------------------------------------------------
/utils/ssd_classes.py:
--------------------------------------------------------------------------------
  1 | """ssd_classes.py
  2 | 
  3 | This file was modified from:
  4 | http://github.com/AastaNV/TRT_object_detection/blob/master/coco.py
  5 | """
  6 | 
  7 | COCO_CLASSES_LIST = [
  8 |     'background',  # was 'unlabeled'
  9 |     'person',
 10 |     'bicycle',
 11 |     'car',
 12 |     'motorcycle',
 13 |     'airplane',
 14 |     'bus',
 15 |     'train',
 16 |     'truck',
 17 |     'boat',
 18 |     'traffic light',
 19 |     'fire hydrant',
 20 |     'street sign',
 21 |     'stop sign',
 22 |     'parking meter',
 23 |     'bench',
 24 |     'bird',
 25 |     'cat',
 26 |     'dog',
 27 |     'horse',
 28 |     'sheep',
 29 |     'cow',
 30 |     'elephant',
 31 |     'bear',
 32 |     'zebra',
 33 |     'giraffe',
 34 |     'hat',
 35 |     'backpack',
 36 |     'umbrella',
 37 |     'shoe',
 38 |     'eye glasses',
 39 |     'handbag',
 40 |     'tie',
 41 |     'suitcase',
 42 |     'frisbee',
 43 |     'skis',
 44 |     'snowboard',
 45 |     'sports ball',
 46 |     'kite',
 47 |     'baseball bat',
 48 |     'baseball glove',
 49 |     'skateboard',
 50 |     'surfboard',
 51 |     'tennis racket',
 52 |     'bottle',
 53 |     'plate',
 54 |     'wine glass',
 55 |     'cup',
 56 |     'fork',
 57 |     'knife',
 58 |     'spoon',
 59 |     'bowl',
 60 |     'banana',
 61 |     'apple',
 62 |     'sandwich',
 63 |     'orange',
 64 |     'broccoli',
 65 |     'carrot',
 66 |     'hot dog',
 67 |     'pizza',
 68 |     'donut',
 69 |     'cake',
 70 |     'chair',
 71 |     'couch',
 72 |     'potted plant',
 73 |     'bed',
 74 |     'mirror',
 75 |     'dining table',
 76 |     'window',
 77 |     'desk',
 78 |     'toilet',
 79 |     'door',
 80 |     'tv',
 81 |     'laptop',
 82 |     'mouse',
 83 |     'remote',
 84 |     'keyboard',
 85 |     'cell phone',
 86 |     'microwave',
 87 |     'oven',
 88 |     'toaster',
 89 |     'sink',
 90 |     'refrigerator',
 91 |     'blender',
 92 |     'book',
 93 |     'clock',
 94 |     'vase',
 95 |     'scissors',
 96 |     'teddy bear',
 97 |     'hair drier',
 98 |     'toothbrush',
 99 | ]
100 | 
101 | EGOHANDS_CLASSES_LIST = [
102 |     'background',
103 |     'hand',
104 | ]
105 | 
106 | 
107 | def get_cls_dict(model):
108 |     """Get the class ID to name translation dictionary."""
109 |     if model == 'coco':
110 |         cls_list = COCO_CLASSES_LIST
111 |     elif model == 'egohands':
112 |         cls_list = EGOHANDS_CLASSES_LIST
113 |     else:
114 |         raise ValueError('Bad model name')
115 |     return {i: n for i, n in enumerate(cls_list)}
116 | 


--------------------------------------------------------------------------------
/utils/ssd_tf.py:
--------------------------------------------------------------------------------
 1 | """ssd_tf.py
 2 | 
 3 | This module implements the TfSSD class.
 4 | """
 5 | 
 6 | 
 7 | import numpy as np
 8 | import cv2
 9 | import tensorflow as tf
10 | 
11 | 
12 | def _postprocess_tf(img, boxes, scores, classes, conf_th):
13 |     """Postprocess TensorFlow SSD output."""
14 |     h, w, _ = img.shape
15 |     out_boxes = boxes[0] * np.array([h, w, h, w])
16 |     out_boxes = out_boxes.astype(np.int32)
17 |     out_boxes = out_boxes[:, [1, 0, 3, 2]]  # swap x's and y's
18 |     out_confs = scores[0]
19 |     out_clss = classes[0].astype(np.int32)
20 | 
21 |     # only return bboxes with confidence score above threshold
22 |     mask = np.where(out_confs >= conf_th)
23 |     return out_boxes[mask], out_confs[mask], out_clss[mask]
24 | 
25 | 
26 | class TfSSD(object):
27 |     """TfSSD class encapsulates things needed to run TensorFlow SSD."""
28 | 
29 |     def __init__(self, model, input_shape):
30 |         self.model = model
31 |         self.input_shape = input_shape
32 | 
33 |         # load detection graph
34 |         ssd_graph = tf.Graph()
35 |         with ssd_graph.as_default():
36 |             graph_def = tf.GraphDef()
37 |             with tf.gfile.GFile('ssd/%s.pb' % model, 'rb') as fid:
38 |                 serialized_graph = fid.read()
39 |                 graph_def.ParseFromString(serialized_graph)
40 |                 tf.import_graph_def(graph_def, name='')
41 | 
42 |         # define input/output tensors
43 |         self.image_tensor = ssd_graph.get_tensor_by_name('image_tensor:0')
44 |         self.det_boxes = ssd_graph.get_tensor_by_name('detection_boxes:0')
45 |         self.det_scores = ssd_graph.get_tensor_by_name('detection_scores:0')
46 |         self.det_classes = ssd_graph.get_tensor_by_name('detection_classes:0')
47 | 
48 |         # create the session for inferencing
49 |         self.sess = tf.Session(graph=ssd_graph)
50 | 
51 |     def __del__(self):
52 |         self.sess.close()
53 | 
54 |     def detect(self, img, conf_th):
55 |         img_resized = _preprocess_tf(img, self.input_shape)
56 |         boxes, scores, classes = self.sess.run(
57 |             [self.det_boxes, self.det_scores, self.det_classes],
58 |             feed_dict={self.image_tensor: np.expand_dims(img_resized, 0)})
59 |         return _postprocess_tf(img, boxes, scores, classes, conf_th)
60 | 


--------------------------------------------------------------------------------
/utils/visualization.py:
--------------------------------------------------------------------------------
  1 | """visualization.py
  2 | 
  3 | The BBoxVisualization class implements drawing of nice looking
  4 | bounding boxes based on object detection results.
  5 | """
  6 | 
  7 | 
  8 | import numpy as np
  9 | import cv2
 10 | 
 11 | 
 12 | # Constants
 13 | ALPHA = 0.5
 14 | FONT = cv2.FONT_HERSHEY_PLAIN
 15 | TEXT_SCALE = 1.0
 16 | TEXT_THICKNESS = 1
 17 | BLACK = (0, 0, 0)
 18 | WHITE = (255, 255, 255)
 19 | 
 20 | 
 21 | def gen_colors(num_colors):
 22 |     """Generate different colors.
 23 | 
 24 |     # Arguments
 25 |       num_colors: total number of colors/classes.
 26 | 
 27 |     # Output
 28 |       bgrs: a list of (B, G, R) tuples which correspond to each of
 29 |             the colors/classes.
 30 |     """
 31 |     import random
 32 |     import colorsys
 33 | 
 34 |     hsvs = [[float(x) / num_colors, 1., 0.7] for x in range(num_colors)]
 35 |     random.seed(1234)
 36 |     random.shuffle(hsvs)
 37 |     rgbs = list(map(lambda x: list(colorsys.hsv_to_rgb(*x)), hsvs))
 38 |     bgrs = [(int(rgb[2] * 255), int(rgb[1] * 255),  int(rgb[0] * 255))
 39 |             for rgb in rgbs]
 40 |     return bgrs
 41 | 
 42 | 
 43 | def draw_boxed_text(img, text, topleft, color):
 44 |     """Draw a transluent boxed text in white, overlayed on top of a
 45 |     colored patch surrounded by a black border. FONT, TEXT_SCALE,
 46 |     TEXT_THICKNESS and ALPHA values are constants (fixed) as defined
 47 |     on top.
 48 | 
 49 |     # Arguments
 50 |       img: the input image as a numpy array.
 51 |       text: the text to be drawn.
 52 |       topleft: XY coordinate of the topleft corner of the boxed text.
 53 |       color: color of the patch, i.e. background of the text.
 54 | 
 55 |     # Output
 56 |       img: note the original image is modified inplace.
 57 |     """
 58 |     assert img.dtype == np.uint8
 59 |     img_h, img_w, _ = img.shape
 60 |     if topleft[0] >= img_w or topleft[1] >= img_h:
 61 |         return img
 62 |     margin = 3
 63 |     size = cv2.getTextSize(text, FONT, TEXT_SCALE, TEXT_THICKNESS)
 64 |     w = size[0][0] + margin * 2
 65 |     h = size[0][1] + margin * 2
 66 |     # the patch is used to draw boxed text
 67 |     patch = np.zeros((h, w, 3), dtype=np.uint8)
 68 |     patch[...] = color
 69 |     cv2.putText(patch, text, (margin+1, h-margin-2), FONT, TEXT_SCALE,
 70 |                 WHITE, thickness=TEXT_THICKNESS, lineType=cv2.LINE_8)
 71 |     cv2.rectangle(patch, (0, 0), (w-1, h-1), BLACK, thickness=1)
 72 |     w = min(w, img_w - topleft[0])  # clip overlay at image boundary
 73 |     h = min(h, img_h - topleft[1])
 74 |     # Overlay the boxed text onto region of interest (roi) in img
 75 |     roi = img[topleft[1]:topleft[1]+h, topleft[0]:topleft[0]+w, :]
 76 |     cv2.addWeighted(patch[0:h, 0:w, :], ALPHA, roi, 1 - ALPHA, 0, roi)
 77 |     return img
 78 | 
 79 | 
 80 | class BBoxVisualization():
 81 |     """BBoxVisualization class implements nice drawing of boudning boxes.
 82 | 
 83 |     # Arguments
 84 |       cls_dict: a dictionary used to translate class id to its name.
 85 |     """
 86 | 
 87 |     def __init__(self, cls_dict):
 88 |         self.cls_dict = cls_dict
 89 |         self.colors = gen_colors(len(cls_dict))
 90 | 
 91 |     def draw_bboxes(self, img, boxes, confs, clss):
 92 |         """Draw detected bounding boxes on the original image."""
 93 |         for bb, cf, cl in zip(boxes, confs, clss):
 94 |             cl = int(cl)
 95 |             x_min, y_min, x_max, y_max = bb[0], bb[1], bb[2], bb[3]
 96 |             color = self.colors[cl]
 97 |             cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color, 2)
 98 |             txt_loc = (max(x_min+2, 0), max(y_min+2, 0))
 99 |             cls_name = self.cls_dict.get(cl, 'CLS{}'.format(cl))
100 |             txt = '{} {:.2f}'.format(cls_name, cf)
101 |             img = draw_boxed_text(img, txt, txt_loc, color)
102 |         return img
103 | 


--------------------------------------------------------------------------------
/utils/writer.py:
--------------------------------------------------------------------------------
 1 | """writer.py
 2 | """
 3 | 
 4 | 
 5 | import subprocess
 6 | 
 7 | import cv2
 8 | 
 9 | 
10 | def get_video_writer(name, width, height, fps=30):
11 |     """Get a VideoWriter object for saving output video.
12 | 
13 |     This function tries to use Jetson's hardware H.264 encoder (omxh264enc)
14 |     if available, in which case the output video would be a MPEG-2 TS file.
15 |     Otherwise, it uses cv2's built-in encoding mechanism and saves a MP4
16 |     file.
17 |     """
18 |     gst_elements = str(subprocess.check_output('gst-inspect-1.0'))
19 |     if 'omxh264dec' in gst_elements:
20 |         filename = name + '.ts'  # Transport Stream
21 |         gst_str = ('appsrc ! videoconvert ! omxh264enc ! mpegtsmux ! '
22 |                    'filesink location=%s') % filename
23 |         return cv2.VideoWriter(
24 |             gst_str, cv2.CAP_GSTREAMER, 0, fps, (width, height))
25 |     else:
26 |         filename = name + '.mp4'  # MP4
27 |         return cv2.VideoWriter(
28 |             filename, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/utils/yolo_classes.py:
--------------------------------------------------------------------------------
  1 | """yolo_classes.py
  2 | 
  3 | NOTE: Number of YOLO COCO output classes differs from SSD COCO models.
  4 | """
  5 | 
  6 | COCO_CLASSES_LIST = [
  7 |     'person',
  8 |     'bicycle',
  9 |     'car',
 10 |     'motorbike',
 11 |     'aeroplane',
 12 |     'bus',
 13 |     'train',
 14 |     'truck',
 15 |     'boat',
 16 |     'traffic light',
 17 |     'fire hydrant',
 18 |     'stop sign',
 19 |     'parking meter',
 20 |     'bench',
 21 |     'bird',
 22 |     'cat',
 23 |     'dog',
 24 |     'horse',
 25 |     'sheep',
 26 |     'cow',
 27 |     'elephant',
 28 |     'bear',
 29 |     'zebra',
 30 |     'giraffe',
 31 |     'backpack',
 32 |     'umbrella',
 33 |     'handbag',
 34 |     'tie',
 35 |     'suitcase',
 36 |     'frisbee',
 37 |     'skis',
 38 |     'snowboard',
 39 |     'sports ball',
 40 |     'kite',
 41 |     'baseball bat',
 42 |     'baseball glove',
 43 |     'skateboard',
 44 |     'surfboard',
 45 |     'tennis racket',
 46 |     'bottle',
 47 |     'wine glass',
 48 |     'cup',
 49 |     'fork',
 50 |     'knife',
 51 |     'spoon',
 52 |     'bowl',
 53 |     'banana',
 54 |     'apple',
 55 |     'sandwich',
 56 |     'orange',
 57 |     'broccoli',
 58 |     'carrot',
 59 |     'hot dog',
 60 |     'pizza',
 61 |     'donut',
 62 |     'cake',
 63 |     'chair',
 64 |     'sofa',
 65 |     'pottedplant',
 66 |     'bed',
 67 |     'diningtable',
 68 |     'toilet',
 69 |     'tvmonitor',
 70 |     'laptop',
 71 |     'mouse',
 72 |     'remote',
 73 |     'keyboard',
 74 |     'cell phone',
 75 |     'microwave',
 76 |     'oven',
 77 |     'toaster',
 78 |     'sink',
 79 |     'refrigerator',
 80 |     'book',
 81 |     'clock',
 82 |     'vase',
 83 |     'scissors',
 84 |     'teddy bear',
 85 |     'hair drier',
 86 |     'toothbrush',
 87 | ]
 88 | 
 89 | # For translating YOLO class ids (0~79) to SSD class ids (0~90)
 90 | yolo_cls_to_ssd = [
 91 |     1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20,
 92 |     21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
 93 |     41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
 94 |     59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79,
 95 |     80, 81, 82, 84, 85, 86, 87, 88, 89, 90,
 96 | ]
 97 | 
 98 | 
 99 | def get_cls_dict(category_num):
100 |     """Get the class ID to name translation dictionary."""
101 |     if category_num == 80:
102 |         return {i: n for i, n in enumerate(COCO_CLASSES_LIST)}
103 |     else:
104 |         return {i: 'CLS%d' % i for i in range(category_num)}
105 | 


--------------------------------------------------------------------------------
/utils/yolo_with_plugins.py:
--------------------------------------------------------------------------------
  1 | """yolo_with_plugins.py
  2 | 
  3 | Implementation of TrtYOLO class with the yolo_layer plugins.
  4 | """
  5 | 
  6 | 
  7 | from __future__ import print_function
  8 | 
  9 | import ctypes
 10 | 
 11 | import numpy as np
 12 | import cv2
 13 | import tensorrt as trt
 14 | import pycuda.driver as cuda
 15 | 
 16 | 
 17 | try:
 18 |     ctypes.cdll.LoadLibrary('./plugins/libyolo_layer.so')
 19 | except OSError as e:
 20 |     raise SystemExit('ERROR: failed to load ./plugins/libyolo_layer.so.  '
 21 |                      'Did you forget to do a "make" in the "./plugins/" '
 22 |                      'subdirectory?') from e
 23 | 
 24 | 
 25 | def _preprocess_yolo(img, input_shape, letter_box=False):
 26 |     """Preprocess an image before TRT YOLO inferencing.
 27 | 
 28 |     # Args
 29 |         img: int8 numpy array of shape (img_h, img_w, 3)
 30 |         input_shape: a tuple of (H, W)
 31 |         letter_box: boolean, specifies whether to keep aspect ratio and
 32 |                     create a "letterboxed" image for inference
 33 | 
 34 |     # Returns
 35 |         preprocessed img: float32 numpy array of shape (3, H, W)
 36 |     """
 37 |     if letter_box:
 38 |         img_h, img_w, _ = img.shape
 39 |         new_h, new_w = input_shape[0], input_shape[1]
 40 |         offset_h, offset_w = 0, 0
 41 |         if (new_w / img_w) <= (new_h / img_h):
 42 |             new_h = int(img_h * new_w / img_w)
 43 |             offset_h = (input_shape[0] - new_h) // 2
 44 |         else:
 45 |             new_w = int(img_w * new_h / img_h)
 46 |             offset_w = (input_shape[1] - new_w) // 2
 47 |         resized = cv2.resize(img, (new_w, new_h))
 48 |         img = np.full((input_shape[0], input_shape[1], 3), 127, dtype=np.uint8)
 49 |         img[offset_h:(offset_h + new_h), offset_w:(offset_w + new_w), :] = resized
 50 |     else:
 51 |         img = cv2.resize(img, (input_shape[1], input_shape[0]))
 52 | 
 53 |     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 54 |     img = img.transpose((2, 0, 1)).astype(np.float32)
 55 |     img /= 255.0
 56 |     return img
 57 | 
 58 | 
 59 | def _nms_boxes(detections, nms_threshold):
 60 |     """Apply the Non-Maximum Suppression (NMS) algorithm on the bounding
 61 |     boxes with their confidence scores and return an array with the
 62 |     indexes of the bounding boxes we want to keep.
 63 | 
 64 |     # Args
 65 |         detections: Nx7 numpy arrays of
 66 |                     [[x, y, w, h, box_confidence, class_id, class_prob],
 67 |                      ......]
 68 |     """
 69 |     x_coord = detections[:, 0]
 70 |     y_coord = detections[:, 1]
 71 |     width = detections[:, 2]
 72 |     height = detections[:, 3]
 73 |     box_confidences = detections[:, 4] * detections[:, 6]
 74 | 
 75 |     areas = width * height
 76 |     ordered = box_confidences.argsort()[::-1]
 77 | 
 78 |     keep = list()
 79 |     while ordered.size > 0:
 80 |         # Index of the current element:
 81 |         i = ordered[0]
 82 |         keep.append(i)
 83 |         xx1 = np.maximum(x_coord[i], x_coord[ordered[1:]])
 84 |         yy1 = np.maximum(y_coord[i], y_coord[ordered[1:]])
 85 |         xx2 = np.minimum(x_coord[i] + width[i], x_coord[ordered[1:]] + width[ordered[1:]])
 86 |         yy2 = np.minimum(y_coord[i] + height[i], y_coord[ordered[1:]] + height[ordered[1:]])
 87 | 
 88 |         width1 = np.maximum(0.0, xx2 - xx1 + 1)
 89 |         height1 = np.maximum(0.0, yy2 - yy1 + 1)
 90 |         intersection = width1 * height1
 91 |         union = (areas[i] + areas[ordered[1:]] - intersection)
 92 |         iou = intersection / union
 93 |         indexes = np.where(iou <= nms_threshold)[0]
 94 |         ordered = ordered[indexes + 1]
 95 | 
 96 |     keep = np.array(keep)
 97 |     return keep
 98 | 
 99 | 
100 | def _postprocess_yolo(trt_outputs, img_w, img_h, conf_th, nms_threshold,
101 |                       input_shape, letter_box=False):
102 |     """Postprocess TensorRT outputs.
103 | 
104 |     # Args
105 |         trt_outputs: a list of 2 or 3 tensors, where each tensor
106 |                     contains a multiple of 7 float32 numbers in
107 |                     the order of [x, y, w, h, box_confidence, class_id, class_prob]
108 |         conf_th: confidence threshold
109 |         letter_box: boolean, referring to _preprocess_yolo()
110 | 
111 |     # Returns
112 |         boxes, scores, classes (after NMS)
113 |     """
114 |     # filter low-conf detections and concatenate results of all yolo layers
115 |     detections = []
116 |     for o in trt_outputs:
117 |         dets = o.reshape((-1, 7))
118 |         dets = dets[dets[:, 4] * dets[:, 6] >= conf_th]
119 |         detections.append(dets)
120 |     detections = np.concatenate(detections, axis=0)
121 | 
122 |     if len(detections) == 0:
123 |         boxes = np.zeros((0, 4), dtype=np.int)
124 |         scores = np.zeros((0,), dtype=np.float32)
125 |         classes = np.zeros((0,), dtype=np.float32)
126 |     else:
127 |         box_scores = detections[:, 4] * detections[:, 6]
128 | 
129 |         # scale x, y, w, h from [0, 1] to pixel values
130 |         old_h, old_w = img_h, img_w
131 |         offset_h, offset_w = 0, 0
132 |         if letter_box:
133 |             if (img_w / input_shape[1]) >= (img_h / input_shape[0]):
134 |                 old_h = int(input_shape[0] * img_w / input_shape[1])
135 |                 offset_h = (old_h - img_h) // 2
136 |             else:
137 |                 old_w = int(input_shape[1] * img_h / input_shape[0])
138 |                 offset_w = (old_w - img_w) // 2
139 |         detections[:, 0:4] *= np.array(
140 |             [old_w, old_h, old_w, old_h], dtype=np.float32)
141 | 
142 |         # NMS
143 |         nms_detections = np.zeros((0, 7), dtype=detections.dtype)
144 |         for class_id in set(detections[:, 5]):
145 |             idxs = np.where(detections[:, 5] == class_id)
146 |             cls_detections = detections[idxs]
147 |             keep = _nms_boxes(cls_detections, nms_threshold)
148 |             nms_detections = np.concatenate(
149 |                 [nms_detections, cls_detections[keep]], axis=0)
150 | 
151 |         xx = nms_detections[:, 0].reshape(-1, 1)
152 |         yy = nms_detections[:, 1].reshape(-1, 1)
153 |         if letter_box:
154 |             xx = xx - offset_w
155 |             yy = yy - offset_h
156 |         ww = nms_detections[:, 2].reshape(-1, 1)
157 |         hh = nms_detections[:, 3].reshape(-1, 1)
158 |         boxes = np.concatenate([xx, yy, xx+ww, yy+hh], axis=1) + 0.5
159 |         boxes = boxes.astype(np.int)
160 |         scores = nms_detections[:, 4] * nms_detections[:, 6]
161 |         classes = nms_detections[:, 5]
162 |     return boxes, scores, classes
163 | 
164 | 
165 | class HostDeviceMem(object):
166 |     """Simple helper data class that's a little nicer to use than a 2-tuple."""
167 |     def __init__(self, host_mem, device_mem):
168 |         self.host = host_mem
169 |         self.device = device_mem
170 | 
171 |     def __str__(self):
172 |         return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
173 | 
174 |     def __repr__(self):
175 |         return self.__str__()
176 | 
177 | 
178 | def get_input_shape(engine):
179 |     """Get input shape of the TensorRT YOLO engine."""
180 |     binding = engine[0]
181 |     assert engine.binding_is_input(binding)
182 |     binding_dims = engine.get_binding_shape(binding)
183 |     if len(binding_dims) == 4:
184 |         return tuple(binding_dims[2:])
185 |     elif len(binding_dims) == 3:
186 |         return tuple(binding_dims[1:])
187 |     else:
188 |         raise ValueError('bad dims of binding %s: %s' % (binding, str(binding_dims)))
189 | 
190 | 
191 | def allocate_buffers(engine):
192 |     """Allocates all host/device in/out buffers required for an engine."""
193 |     inputs = []
194 |     outputs = []
195 |     bindings = []
196 |     output_idx = 0
197 |     stream = cuda.Stream()
198 |     assert 3 <= len(engine) <= 5  # expect 1 input, plus 2~4 outpus
199 |     for binding in engine:
200 |         binding_dims = engine.get_binding_shape(binding)
201 |         if len(binding_dims) == 4:
202 |             # explicit batch case (TensorRT 7+)
203 |             size = trt.volume(binding_dims)
204 |         elif len(binding_dims) == 3:
205 |             # implicit batch case (TensorRT 6 or older)
206 |             size = trt.volume(binding_dims) * engine.max_batch_size
207 |         else:
208 |             raise ValueError('bad dims of binding %s: %s' % (binding, str(binding_dims)))
209 |         dtype = trt.nptype(engine.get_binding_dtype(binding))
210 |         # Allocate host and device buffers
211 |         host_mem = cuda.pagelocked_empty(size, dtype)
212 |         device_mem = cuda.mem_alloc(host_mem.nbytes)
213 |         # Append the device buffer to device bindings.
214 |         bindings.append(int(device_mem))
215 |         # Append to the appropriate list.
216 |         if engine.binding_is_input(binding):
217 |             inputs.append(HostDeviceMem(host_mem, device_mem))
218 |         else:
219 |             # each grid has 3 anchors, each anchor generates a detection
220 |             # output of 7 float32 values
221 |             assert size % 7 == 0
222 |             outputs.append(HostDeviceMem(host_mem, device_mem))
223 |             output_idx += 1
224 |     return inputs, outputs, bindings, stream
225 | 
226 | 
227 | def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
228 |     """do_inference (for TensorRT 6.x or lower)
229 | 
230 |     This function is generalized for multiple inputs/outputs.
231 |     Inputs and outputs are expected to be lists of HostDeviceMem objects.
232 |     """
233 |     # Transfer input data to the GPU.
234 |     [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
235 |     # Run inference.
236 |     context.execute_async(batch_size=batch_size,
237 |                           bindings=bindings,
238 |                           stream_handle=stream.handle)
239 |     # Transfer predictions back from the GPU.
240 |     [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
241 |     # Synchronize the stream
242 |     stream.synchronize()
243 |     # Return only the host outputs.
244 |     return [out.host for out in outputs]
245 | 
246 | 
247 | def do_inference_v2(context, bindings, inputs, outputs, stream):
248 |     """do_inference_v2 (for TensorRT 7.0+)
249 | 
250 |     This function is generalized for multiple inputs/outputs for full
251 |     dimension networks.
252 |     Inputs and outputs are expected to be lists of HostDeviceMem objects.
253 |     """
254 |     # Transfer input data to the GPU.
255 |     [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
256 |     # Run inference.
257 |     context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
258 |     # Transfer predictions back from the GPU.
259 |     [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
260 |     # Synchronize the stream
261 |     stream.synchronize()
262 |     # Return only the host outputs.
263 |     return [out.host for out in outputs]
264 | 
265 | 
266 | class TrtYOLO(object):
267 |     """TrtYOLO class encapsulates things needed to run TRT YOLO."""
268 | 
269 |     def _load_engine(self):
270 |         TRTbin = 'yolo/%s.trt' % self.model
271 |         with open(TRTbin, 'rb') as f, trt.Runtime(self.trt_logger) as runtime:
272 |             return runtime.deserialize_cuda_engine(f.read())
273 | 
274 |     def __init__(self, model, category_num=80, letter_box=False, cuda_ctx=None):
275 |         """Initialize TensorRT plugins, engine and conetxt."""
276 |         self.model = model
277 |         self.category_num = category_num
278 |         self.letter_box = letter_box
279 |         self.cuda_ctx = cuda_ctx
280 |         if self.cuda_ctx:
281 |             self.cuda_ctx.push()
282 | 
283 |         self.inference_fn = do_inference if trt.__version__[0] < '7' \
284 |                                          else do_inference_v2
285 |         self.trt_logger = trt.Logger(trt.Logger.INFO)
286 |         self.engine = self._load_engine()
287 | 
288 |         self.input_shape = get_input_shape(self.engine)
289 | 
290 |         try:
291 |             self.context = self.engine.create_execution_context()
292 |             self.inputs, self.outputs, self.bindings, self.stream = \
293 |                 allocate_buffers(self.engine)
294 |         except Exception as e:
295 |             raise RuntimeError('fail to allocate CUDA resources') from e
296 |         finally:
297 |             if self.cuda_ctx:
298 |                 self.cuda_ctx.pop()
299 | 
300 |     def __del__(self):
301 |         """Free CUDA memories."""
302 |         del self.outputs
303 |         del self.inputs
304 |         del self.stream
305 | 
306 |     def detect(self, img, conf_th=0.3, letter_box=None):
307 |         """Detect objects in the input image."""
308 |         letter_box = self.letter_box if letter_box is None else letter_box
309 |         img_resized = _preprocess_yolo(img, self.input_shape, letter_box)
310 | 
311 |         # Set host input to the image. The do_inference() function
312 |         # will copy the input to the GPU before executing.
313 |         self.inputs[0].host = np.ascontiguousarray(img_resized)
314 |         if self.cuda_ctx:
315 |             self.cuda_ctx.push()
316 |         trt_outputs = self.inference_fn(
317 |             context=self.context,
318 |             bindings=self.bindings,
319 |             inputs=self.inputs,
320 |             outputs=self.outputs,
321 |             stream=self.stream)
322 |         if self.cuda_ctx:
323 |             self.cuda_ctx.pop()
324 | 
325 |         boxes, scores, classes = _postprocess_yolo(
326 |             trt_outputs, img.shape[1], img.shape[0], conf_th,
327 |             nms_threshold=0.5, input_shape=self.input_shape,
328 |             letter_box=letter_box)
329 | 
330 |         # clip x1, y1, x2, y2 within original image
331 |         boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, img.shape[1]-1)
332 |         boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, img.shape[0]-1)
333 |         return boxes, scores, classes
334 | 


--------------------------------------------------------------------------------
/zed.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import numpy as np
  3 | import pyzed.sl as sl
  4 | import cv2
  5 | import math
  6 | import logging
  7 | import getopt
  8 | 
  9 | log = logging.getLogger(__name__)
 10 | logging.basicConfig(level=logging.INFO)
 11 | 
 12 | def main(argv) :
 13 |     config_path = "yolov4-tiny.cfg"
 14 |     weight_path = "yolov4-tiny.weights"
 15 |     meta_path = "coco.names"
 16 |     svo_path = None
 17 |     zed_id = 0
 18 | 
 19 |     help_str = 'zed_yolo.py -c <config> -w <weight> -m <meta> -s <svo_file> -z <zed_id>'
 20 | 
 21 |     try:
 22 |         opts, args = getopt.getopt(
 23 |             argv, "hc:w:m:s:z:", ["config=", "weight=", "meta=", "svo_file=", "zed_id="])
 24 |     except getopt.GetoptError:
 25 |         log.exception(help_str)
 26 |         sys.exit(2)
 27 | 
 28 |     for opt, arg in opts:
 29 |         if opt == '-h':
 30 |             log.info(help_str)
 31 |             sys.exit()
 32 |         elif opt in ("-c", "--config"):
 33 |             config_path = arg
 34 |         elif opt in ("-w", "--weight"):
 35 |             weight_path = arg
 36 |         elif opt in ("-m", "--meta"):
 37 |             meta_path = arg
 38 |         elif opt in ("-s", "--svo_file"):
 39 |             svo_path = arg
 40 |         elif opt in ("-z", "--zed_id"):
 41 |             zed_id = int(arg)
 42 | 
 43 |     # Set configuration parameters
 44 |     input_type = sl.InputType()
 45 | 
 46 |     if svo_path is not None:
 47 |         log.info("SVO file : " + svo_path)
 48 |         input_type.set_from_svo_file(svo_path)
 49 |     else:
 50 |         # Launch camera by id
 51 |         input_type.set_from_camera_id(zed_id)
 52 | 
 53 |     # Create a ZED camera object
 54 |     zed = sl.Camera()
 55 | 
 56 |     # Set configuration parameters
 57 |     input_type = sl.InputType()
 58 | 
 59 |     init = sl.InitParameters(input_t=input_type)
 60 |     init.camera_resolution = sl.RESOLUTION.HD1080
 61 |     init.depth_mode = sl.DEPTH_MODE.PERFORMANCE
 62 |     init.coordinate_units = sl.UNIT.MILLIMETER
 63 | 
 64 |     # Open the camera
 65 |     err = zed.open(init)
 66 |     if err != sl.ERROR_CODE.SUCCESS :
 67 |         print(repr(err))
 68 |         zed.close()
 69 |         exit(1)
 70 | 
 71 |   
 72 |     # Set runtime parameters after opening the camera
 73 |     runtime = sl.RuntimeParameters()
 74 |     runtime.sensing_mode = sl.SENSING_MODE.STANDARD
 75 | 
 76 |     # Prepare new image size to retrieve half-resolution images
 77 |     image_size = zed.get_camera_information().camera_resolution
 78 |     image_size.width = image_size.width 
 79 |     image_size.height = image_size.height 
 80 | 
 81 |     # Declare your sl.Mat matrices
 82 |     image_zed = sl.Mat(image_size.width, image_size.height, sl.MAT_TYPE.U8_C4)
 83 |     depth_image_zed = sl.Mat(image_size.width, image_size.height, sl.MAT_TYPE.U8_C4)
 84 |     point_cloud = sl.Mat()
 85 |     #=======================================  yolov4  video test et ============================================           
 86 |     #======== Yolov4 Tiny ağırlıklarını yüklemektedir ===================
 87 |     weightsPath_tiny = weight_path
 88 |     configPath_tiny = config_path
 89 | 
 90 |     net = cv2.dnn.readNet(weightsPath_tiny, configPath_tiny)
 91 |     net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
 92 |     net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16)
 93 |     model = cv2.dnn_DetectionModel(net)
 94 |     
 95 |  
 96 |     
 97 |     def YOLOv4_video(pred_image):
 98 |         model.setInputParams(size=(416, 416), scale=1/255, swapRB=True)
 99 |         image_test = cv2.cvtColor(pred_image, cv2.COLOR_RGBA2RGB)
100 |         image = image_test.copy()
101 |         print('image',image.shape)
102 |         confThreshold= 0.5
103 |         nmsThreshold = 0.4
104 |         classes, confidences, boxes = model.detect(image, confThreshold, nmsThreshold)
105 |         
106 |         return classes,confidences,boxes
107 |         
108 |     LABELS = []
109 |     with open(meta_path, 'r') as f:
110 |         LABELS = [cname.strip() for cname in f.readlines()]
111 | 
112 |     COLORS = [[0, 0, 255], [30, 255, 255], [0,255,0]]
113 | 
114 |     frame_count = 0
115 | 
116 |     exit_flag = True
117 | 
118 |     while(exit_flag == True):
119 |         err = zed.grab(runtime)
120 |         if err == sl.ERROR_CODE.SUCCESS :
121 |             # Retrieve the left image, depth image in the half-resolution
122 |             zed.retrieve_image(image_zed, sl.VIEW.LEFT, sl.MEM.CPU, image_size)
123 |             zed.retrieve_image(depth_image_zed, sl.VIEW.DEPTH, sl.MEM.CPU, image_size)
124 |             # Retrieve the RGBA point cloud in half resolution
125 |             zed.retrieve_measure(point_cloud, sl.MEASURE.XYZRGBA, sl.MEM.CPU, image_size)
126 |             
127 |             # Get and print distance value in mm at the center of the image
128 |             # We measure the distance camera - object using Euclidean distance
129 |             
130 |             # To recover data from sl.Mat to use it with opencv, use the get_data() method
131 |             # It returns a numpy array that can be used as a matrix with opencv
132 |             image_ocv = image_zed.get_data()
133 |             #depth_image_ocv = depth_image_zed.get_data()
134 |             classes,confidences,boxes = YOLOv4_video(image_ocv)
135 |             
136 |             for cl,score,(left,top,width,height) in zip(classes,confidences,boxes):
137 |                 start_pooint = (int(left),int(top))
138 |                 end_point = (int(left+width),int(top+height))
139 |                 
140 |                 x = int(left + width/2)
141 |                 y = int(top + height/2)
142 | 
143 |                 color = COLORS[0]
144 | 
145 |                 img =cv2.rectangle(image_ocv,start_pooint,end_point,color,3)
146 |                 img = cv2.circle(img,(x,y),5,[0,0,255],5)
147 |                 text = f'{LABELS[cl]}: {score:0.2f}'
148 |                 cv2.putText(img, text, (int(left), int(top-7)), cv2.FONT_ITALIC, 1, COLORS[0], 2 )
149 |                 
150 |                 x = round(x)
151 |                 y = round(y)
152 | 
153 |                 err, point_cloud_value = point_cloud.get_value(x, y)
154 |                 distance = math.sqrt(point_cloud_value[0] * point_cloud_value[0] + point_cloud_value[1] * point_cloud_value[1] + point_cloud_value[2] * point_cloud_value[2])
155 | 
156 |                 print("Distance to Camera at (class : {0}, score : {1:0.2f}): distance : {2:0.2f} mm".format(LABELS[cl], score, distance), end="\r")
157 |                 
158 |                 cv2.putText(img,"Distance: "+str(round(distance/1000,2))+'m', (int(left), int(top + 25)) , cv2.FONT_HERSHEY_COMPLEX, 1, COLORS[1], 2)
159 |                 
160 |             cv2.imshow("Image", img)
161 |             
162 |             frame_count = frame_count + 1
163 |             
164 |             if cv2.waitKey(1) & 0xFF == ord('q'):
165 |                 exit_flag = False
166 | 
167 | 
168 |     cv2.destroyAllWindows()
169 |     zed.close()
170 | 
171 |     print("\nFINISH")
172 | 
173 | if __name__ == "__main__":
174 |     main(sys.argv[1:])
175 | 


--------------------------------------------------------------------------------
/zed_trt.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import numpy as np
  3 | import pyzed.sl as sl
  4 | import cv2
  5 | import math
  6 | from utils.yolo_classes import get_cls_dict
  7 | from utils.visualization import BBoxVisualization
  8 | from utils.yolo_with_plugins import TrtYOLO
  9 | import os
 10 | import pycuda.autoinit
 11 | import time
 12 | def main() :
 13 | 
 14 |     # Create a ZED camera object
 15 |     zed = sl.Camera()
 16 | 
 17 |     # Set configuration parameters
 18 |     input_type = sl.InputType()
 19 |     if len(sys.argv) >= 2 :
 20 |         input_type.set_from_svo_file(sys.argv[1])
 21 |     init = sl.InitParameters(input_t=input_type)
 22 |     init.camera_resolution = sl.RESOLUTION.HD1080
 23 |     init.depth_mode = sl.DEPTH_MODE.PERFORMANCE
 24 |     init.coordinate_units = sl.UNIT.MILLIMETER
 25 | 
 26 |     # Open the camera
 27 |     err = zed.open(init)
 28 |     if err != sl.ERROR_CODE.SUCCESS :
 29 |         print(repr(err))
 30 |         zed.close()
 31 |         exit(1)
 32 | 
 33 |   
 34 |     # Set runtime parameters after opening the camera
 35 |     runtime = sl.RuntimeParameters()
 36 |     runtime.sensing_mode = sl.SENSING_MODE.STANDARD
 37 | 
 38 |     # Prepare new image size to retrieve half-resolution images
 39 |     image_size = zed.get_camera_information().camera_resolution
 40 |     #image_size.width = image_size.width /2
 41 |     #image_size.height = image_size.height /2
 42 | 
 43 |     # Declare your sl.Mat matrices
 44 |     image_zed = sl.Mat(image_size.width, image_size.height, sl.MAT_TYPE.U8_C4)
 45 |     depth_image_zed = sl.Mat(image_size.width, image_size.height, sl.MAT_TYPE.U8_C4)
 46 |     point_cloud = sl.Mat()
 47 |     
 48 |     
 49 |     #=========== Yolov4 TensorRt ağırlıkları yüklenmektedir =======================
 50 |     
 51 |     category_num = 17
 52 |     model_trt = 'yolov4'
 53 |     letter_box = False
 54 |     if category_num <= 0:
 55 |         raise SystemExit('ERROR: bad category_num (%d)!' % category_num)
 56 |     if not os.path.isfile('yolo/{}.trt'.format(model_trt)):
 57 |         raise SystemExit('ERROR: file (yolo/{}.trt) not found!'.format(model_trt))
 58 |     
 59 |     cls_dict = get_cls_dict(category_num)
 60 |     vis = BBoxVisualization(cls_dict)
 61 |     trt_yolov4 = TrtYOLO(model_trt, category_num, letter_box)
 62 |     
 63 |     def YOLOv4_video(pred_image):
 64 |         image_test = cv2.cvtColor(pred_image, cv2.COLOR_RGBA2RGB)
 65 |         image = image_test.copy()
 66 |         boxes, confs, clss = trt_yolov4.detect(image, conf_th=0.3)
 67 |         return clss,confs,boxes
 68 |         
 69 |         
 70 |     key = ' '
 71 |     LABELS = [ 'girilmez',
 72 |                 'tasit_trafigine_kapali',
 73 |                 'duz_veya_sola',
 74 |                 'duz_veya_saga',
 75 |                 'yalnizca_sola',
 76 |                 '20_hiz_limiti_sonu',
 77 |                 '30_limit',
 78 |                 '20_limit',
 79 |                 'yalnizca_saga',
 80 |                 'saga_donulmez',
 81 |                 'sola_donulmez',
 82 |                 'dur',
 83 |                 'park_yapilmaz',
 84 |                 'park',
 85 |                 'durak',
 86 |                 'kirmizi_isk',
 87 |                 'sari_isik',
 88 |                 'yesil_isik']
 89 | 
 90 |     COLORS = [[0, 0, 255]]
 91 |     prev_frame_time=0
 92 |     new_frame_time=0
 93 |     while key != 113 :
 94 |         err = zed.grab(runtime)
 95 |         if err == sl.ERROR_CODE.SUCCESS :
 96 |             # Retrieve the left image, depth image in the half-resolution
 97 |             zed.retrieve_image(image_zed, sl.VIEW.LEFT, sl.MEM.CPU, image_size)
 98 |             zed.retrieve_image(depth_image_zed, sl.VIEW.DEPTH, sl.MEM.CPU, image_size)
 99 |             # Retrieve the RGBA point cloud in half resolution
100 |             zed.retrieve_measure(point_cloud, sl.MEASURE.XYZRGBA, sl.MEM.CPU, image_size)
101 |             
102 |             # Get and print distance value in mm at the center of the image
103 |             # We measure the distance camera - object using Euclidean distance
104 |             
105 |             # To recover data from sl.Mat to use it with opencv, use the get_data() method
106 |             # It returns a numpy array that can be used as a matrix with opencv
107 |             image_ocv = image_zed.get_data()
108 |             #depth_image_ocv = depth_image_zed.get_data()
109 |             classes,confidences,boxes = YOLOv4_video(image_ocv)
110 |             
111 |             for cl,score,(x_min,y_min,x_max,y_max) in zip(classes,confidences,boxes):
112 |                 start_pooint = (int(x_min),int(y_min))
113 |                 end_point = (int(x_max),int(y_max))
114 |                 
115 |                 x = int(x_min +( x_max-x_min)/2)
116 |                 y = int(y_min + (y_max-y_min)/2)
117 |                 color = COLORS[0]
118 |                 img =cv2.rectangle(image_ocv,start_pooint,end_point,color,3)
119 |                 img = cv2.circle(img,(x,y),5,[0,0,255],5)
120 |                 text = f'{LABELS[int(cl)]}: {score:0.2f}'
121 |                 cv2.putText(img,text,(int(x_min),int(y_min-7)),cv2.FONT_ITALIC,1,COLORS[0],2 )
122 |                 
123 |                 x = round(x)
124 |                 y = round(y)
125 |                 err, point_cloud_value = point_cloud.get_value(x, y)
126 |                 distance = math.sqrt(point_cloud_value[0] * point_cloud_value[0] +
127 |                                     point_cloud_value[1] * point_cloud_value[1] +
128 |                                     point_cloud_value[2] * point_cloud_value[2])
129 |                 print("Distance to Camera at (class : {0}, score : {1:0.2f}): distance : {2:0.2f} mm".format(LABELS[int(cl)], score, distance), end="\r")
130 |                 cv2.putText(img,"Distance: "+str(round(distance/1000,2))+'m',(int(x_max-180),int(y_max+30)),cv2.FONT_HERSHEY_COMPLEX,1,(0,255,0),1)
131 |                 
132 |                 new_frame_time=time.time()
133 |                 fps = 1/(new_frame_time-prev_frame_time)
134 |                 prev_frame_time = new_frame_time
135 |                 
136 |                 print('FPS : %.2f  ' % fps)
137 |                 cv2.imshow("Image", img)
138 |                     
139 |             
140 |             #cv2.imshow("Image", image_ocv)
141 |             #cv2.imshow("Depth", depth_image_ocv)
142 |             
143 |             key = cv2.waitKey(1)
144 | 
145 | 
146 |     cv2.destroyAllWindows()
147 |     zed.close()
148 | 
149 |     print("\nFINISH")
150 | 
151 | if __name__ == "__main__":
152 |     main()


--------------------------------------------------------------------------------