├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── intro.gif ├── intro.mp4 ├── plugins ├── Makefile ├── README.md ├── gpu_cc.py ├── yolo_layer.cu ├── yolo_layer.h └── yolo_layer.o ├── utils ├── __init__.py ├── background.py ├── camera.py ├── display.py ├── mjpeg.py ├── modnet.py ├── mtcnn.py ├── ssd.py ├── ssd_classes.py ├── ssd_tf.py ├── visualization.py ├── writer.py ├── yolo_classes.py └── yolo_with_plugins.py ├── zed.py └── zed_trt.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .nox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | .pytest_cache/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | db.sqlite3 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # IPython 77 | profile_default/ 78 | ipython_config.py 79 | 80 | # pyenv 81 | .python-version 82 | 83 | # celery beat schedule file 84 | celerybeat-schedule 85 | 86 | # SageMath parsed files 87 | *.sage.py 88 | 89 | # Environments 90 | .env 91 | .venv 92 | env/ 93 | venv/ 94 | ENV/ 95 | env.bak/ 96 | venv.bak/ 97 | 98 | # Spyder project settings 99 | .spyderproject 100 | .spyproject 101 | 102 | # Rope project settings 103 | .ropeproject 104 | 105 | # mkdocs documentation 106 | /site 107 | 108 | # mypy 109 | .mypy_cache/ 110 | .dmypy.json 111 | dmypy.json 112 | 113 | # Pyre type checker 114 | .pyre/ 115 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Mehmet OKUYAR 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Yolo Object Detection and Distance Measurement with Zed camera 2 | 3 | you can test object detection and distance measurement with zed camera 4 | 5 | 6 | ### How to use 7 | You need to run this script like that `python zed.py ` 8 | If you use tensorRT yolo, You need to run this script like that `python zed_trt.py ` 9 | You need to edit the codes in `zed.py` line according to yourself. 10 | 11 | The default values for weight, config, names file and ZED camera ID are 12 | ~~~~~~~~~~~~ 13 | config_path = "yolov4-tiny.cfg" 14 | weight_path = "yolov4-tiny.weights" 15 | meta_path = "coco.names" 16 | svo_path = None 17 | zed_id = 0 18 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 19 | 20 | ### Download the model file, for instance Yolov4-Tiny 21 | wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights 22 | 23 | ### Making changes in the "zed.py" file 24 | You need to edit the following lines in `zed.py` 25 | 26 | Size should be changed according to image height and width value in .cfg file 27 | Default values are - width : 608, height : 608 28 | ~~~~~~ 29 | 98. model.setInputParams(size=(608, 608), scale=1/255, swapRB=True) 30 | ~~~~~~~~~~~~~~~~~~~~ 31 | ### Run the application 32 | To launch the ZED with YOLO simply run the script : 33 | 34 | python3 zed.py 35 | 36 | The input parameters can be changed using the command line : 37 | 38 | python3 zed.py -c -w -m -s -z 39 | 40 | For instance : 41 | 42 | python3 zed.py -c yolov4-tiny.cfg -w yolov4-tiny.weights -m coco.names -z 1 43 | 44 | For running with custom weights : 45 | 46 | python3 zed.py -c yolov4-custom.cfg -w yolov4-custom.weights -m obj.names -z 1 47 | 48 | To display the help : 49 | 50 | python3 zed.py -h 51 | 52 | that's all, if you have a zed camera you can easily find the distance of the objects you have detected 53 | ## You can see how the program works in the gif below. 54 | 55 | 56 |

57 | 58 |

59 | -------------------------------------------------------------------------------- /intro.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MehmetOKUYAR/Yolo-Object-Detection-and-Distance-Measurement-with-Zed-camera/a112c65013eab48cf3210fb1e9973aad3ac25cf1/intro.gif -------------------------------------------------------------------------------- /intro.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MehmetOKUYAR/Yolo-Object-Detection-and-Distance-Measurement-with-Zed-camera/a112c65013eab48cf3210fb1e9973aad3ac25cf1/intro.mp4 -------------------------------------------------------------------------------- /plugins/Makefile: -------------------------------------------------------------------------------- 1 | CC=g++ 2 | LD=ld 3 | CXXFLAGS=-Wall -std=c++11 -g -O 4 | 5 | NVCC=nvcc 6 | 7 | # space separated compute values ex: computes=70 75. If not present will fetch device's CC 8 | computes= 9 | 10 | ifeq ($(computes), ) 11 | computes= $(shell python gpu_cc.py) 12 | $(info computes: $(computes)) 13 | endif 14 | 15 | NVCCFLAGS= $(foreach compute, $(computes),-gencode arch=compute_$(compute),code=[sm_$(compute),compute_$(compute)]) 16 | $(info NVCCFLAGS: $(NVCCFLAGS)) 17 | 18 | # These are the directories where I installed TensorRT on my x86_64 PC. 19 | TENSORRT_INCS=-I"/usr/local/TensorRT-7.1.3.4/include" 20 | TENSORRT_LIBS=-L"/usr/local/TensorRT-7.1.3.4/lib" 21 | 22 | # INCS and LIBS 23 | INCS=-I"/usr/local/cuda/include" $(TENSORRT_INCS) -I"/usr/local/include" -I"plugin" 24 | LIBS=-L"/usr/local/cuda/lib64" $(TENSORRT_LIBS) -L"/usr/local/lib" -Wl,--start-group -lnvinfer -lnvparsers -lnvinfer_plugin -lcudnn -lcublas -lcudart_static -lnvToolsExt -lcudart -lrt -ldl -lpthread -Wl,--end-group 25 | 26 | .PHONY: all clean 27 | 28 | all: libyolo_layer.so 29 | 30 | clean: 31 | rm -f *.so *.o 32 | 33 | libyolo_layer.so: yolo_layer.o 34 | $(CC) -shared -o $@ $< $(LIBS) 35 | 36 | yolo_layer.o: yolo_layer.cu yolo_layer.h 37 | $(NVCC) -ccbin $(CC) $(INCS) $(NVCCFLAGS) -Xcompiler -fPIC -c -o $@ $< 38 | -------------------------------------------------------------------------------- /plugins/README.md: -------------------------------------------------------------------------------- 1 | The "yolo_layer.h" and "yolo_layer.cu" were taken and modified from [wang-xinyu/tensorrtx/yolov4](https://github.com/wang-xinyu/tensorrtx/tree/master/yolov4). The original code is under [MIT License](https://github.com/wang-xinyu/tensorrtx/blob/master/LICENSE). 2 | -------------------------------------------------------------------------------- /plugins/gpu_cc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | ''' 5 | # ported from https://gist.github.com/f0k/63a664160d016a491b2cbea15913d549 6 | ''' 7 | 8 | import ctypes 9 | 10 | CUDA_SUCCESS = 0 11 | 12 | def get_gpu_archs(): 13 | libnames = ('libcuda.so', 'libcuda.dylib', 'cuda.dll') 14 | for libname in libnames: 15 | try: 16 | cuda = ctypes.CDLL(libname) 17 | except OSError: 18 | continue 19 | else: 20 | break 21 | else: 22 | return 23 | 24 | gpu_archs = set() 25 | 26 | n_gpus = ctypes.c_int() 27 | cc_major = ctypes.c_int() 28 | cc_minor = ctypes.c_int() 29 | 30 | result = ctypes.c_int() 31 | device = ctypes.c_int() 32 | error_str = ctypes.c_char_p() 33 | 34 | result = cuda.cuInit(0) 35 | if result != CUDA_SUCCESS: 36 | cuda.cuGetErrorString(result, ctypes.byref(error_str)) 37 | # print('cuInit failed with error code %d: %s' % (result, error_str.value.decode())) 38 | return [] 39 | 40 | result = cuda.cuDeviceGetCount(ctypes.byref(n_gpus)) 41 | if result != CUDA_SUCCESS: 42 | cuda.cuGetErrorString(result, ctypes.byref(error_str)) 43 | # print('cuDeviceGetCount failed with error code %d: %s' % (result, error_str.value.decode())) 44 | return [] 45 | 46 | for i in range(n_gpus.value): 47 | if cuda.cuDeviceComputeCapability(ctypes.byref(cc_major), ctypes.byref(cc_minor), device) == CUDA_SUCCESS: 48 | gpu_archs.add(str(cc_major.value) + str(cc_minor.value)) 49 | 50 | return list(gpu_archs) 51 | 52 | if __name__ == '__main__': 53 | print(' '.join(get_gpu_archs())) 54 | -------------------------------------------------------------------------------- /plugins/yolo_layer.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * yolo_layer.cu 3 | * 4 | * This code was originally written by wang-xinyu under MIT license. 5 | * I took it from: 6 | * 7 | * https://github.com/wang-xinyu/tensorrtx/tree/master/yolov4 8 | * 9 | * and made necessary modifications. 10 | * 11 | * - JK Jung 12 | */ 13 | 14 | #include "yolo_layer.h" 15 | 16 | using namespace Yolo; 17 | 18 | namespace 19 | { 20 | // Write values into buffer 21 | template 22 | void write(char*& buffer, const T& val) 23 | { 24 | *reinterpret_cast(buffer) = val; 25 | buffer += sizeof(T); 26 | } 27 | 28 | // Read values from buffer 29 | template 30 | void read(const char*& buffer, T& val) 31 | { 32 | val = *reinterpret_cast(buffer); 33 | buffer += sizeof(T); 34 | } 35 | } // namespace 36 | 37 | namespace nvinfer1 38 | { 39 | YoloLayerPlugin::YoloLayerPlugin(int yolo_width, int yolo_height, int num_anchors, float* anchors, int num_classes, int input_width, int input_height, float scale_x_y, int new_coords) 40 | { 41 | mYoloWidth = yolo_width; 42 | mYoloHeight = yolo_height; 43 | mNumAnchors = num_anchors; 44 | memcpy(mAnchorsHost, anchors, num_anchors * 2 * sizeof(float)); 45 | mNumClasses = num_classes; 46 | mInputWidth = input_width; 47 | mInputHeight = input_height; 48 | mScaleXY = scale_x_y; 49 | mNewCoords = new_coords; 50 | 51 | CHECK(cudaMalloc(&mAnchors, MAX_ANCHORS * 2 * sizeof(float))); 52 | CHECK(cudaMemcpy(mAnchors, mAnchorsHost, mNumAnchors * 2 * sizeof(float), cudaMemcpyHostToDevice)); 53 | } 54 | 55 | YoloLayerPlugin::YoloLayerPlugin(const void* data, size_t length) 56 | { 57 | const char *d = reinterpret_cast(data), *a = d; 58 | read(d, mThreadCount); 59 | read(d, mYoloWidth); 60 | read(d, mYoloHeight); 61 | read(d, mNumAnchors); 62 | memcpy(mAnchorsHost, d, MAX_ANCHORS * 2 * sizeof(float)); 63 | d += MAX_ANCHORS * 2 * sizeof(float); 64 | read(d, mNumClasses); 65 | read(d, mInputWidth); 66 | read(d, mInputHeight); 67 | read(d, mScaleXY); 68 | read(d, mNewCoords); 69 | 70 | CHECK(cudaMalloc(&mAnchors, MAX_ANCHORS * 2 * sizeof(float))); 71 | CHECK(cudaMemcpy(mAnchors, mAnchorsHost, mNumAnchors * 2 * sizeof(float), cudaMemcpyHostToDevice)); 72 | 73 | assert(d == a + length); 74 | } 75 | 76 | void YoloLayerPlugin::serialize(void* buffer) const 77 | { 78 | char* d = static_cast(buffer), *a = d; 79 | write(d, mThreadCount); 80 | write(d, mYoloWidth); 81 | write(d, mYoloHeight); 82 | write(d, mNumAnchors); 83 | memcpy(d, mAnchorsHost, MAX_ANCHORS * 2 * sizeof(float)); 84 | d += MAX_ANCHORS * 2 * sizeof(float); 85 | write(d, mNumClasses); 86 | write(d, mInputWidth); 87 | write(d, mInputHeight); 88 | write(d, mScaleXY); 89 | write(d, mNewCoords); 90 | 91 | assert(d == a + getSerializationSize()); 92 | } 93 | 94 | size_t YoloLayerPlugin::getSerializationSize() const 95 | { 96 | return sizeof(mThreadCount) + \ 97 | sizeof(mYoloWidth) + sizeof(mYoloHeight) + \ 98 | sizeof(mNumAnchors) + MAX_ANCHORS * 2 * sizeof(float) + \ 99 | sizeof(mNumClasses) + \ 100 | sizeof(mInputWidth) + sizeof(mInputHeight) + \ 101 | sizeof(mScaleXY) + sizeof(mNewCoords); 102 | } 103 | 104 | int YoloLayerPlugin::initialize() 105 | { 106 | return 0; 107 | } 108 | 109 | void YoloLayerPlugin::terminate() 110 | { 111 | CHECK(cudaFree(mAnchors)); 112 | } 113 | 114 | Dims YoloLayerPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims) 115 | { 116 | assert(index == 0); 117 | assert(nbInputDims == 1); 118 | assert(inputs[0].d[0] == (mNumClasses + 5) * mNumAnchors); 119 | assert(inputs[0].d[1] == mYoloHeight); 120 | assert(inputs[0].d[2] == mYoloWidth); 121 | // output detection results to the channel dimension 122 | int totalsize = mYoloWidth * mYoloHeight * mNumAnchors * sizeof(Detection) / sizeof(float); 123 | return Dims3(totalsize, 1, 1); 124 | } 125 | 126 | void YoloLayerPlugin::setPluginNamespace(const char* pluginNamespace) 127 | { 128 | mPluginNamespace = pluginNamespace; 129 | } 130 | 131 | const char* YoloLayerPlugin::getPluginNamespace() const 132 | { 133 | return mPluginNamespace; 134 | } 135 | 136 | // Return the DataType of the plugin output at the requested index 137 | DataType YoloLayerPlugin::getOutputDataType(int index, const DataType* inputTypes, int nbInputs) const 138 | { 139 | return DataType::kFLOAT; 140 | } 141 | 142 | // Return true if output tensor is broadcast across a batch. 143 | bool YoloLayerPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const 144 | { 145 | return false; 146 | } 147 | 148 | // Return true if plugin can use input that is broadcast across batch without replication. 149 | bool YoloLayerPlugin::canBroadcastInputAcrossBatch(int inputIndex) const 150 | { 151 | return false; 152 | } 153 | 154 | void YoloLayerPlugin::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) 155 | { 156 | } 157 | 158 | // Attach the plugin object to an execution context and grant the plugin the access to some context resource. 159 | void YoloLayerPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) 160 | { 161 | } 162 | 163 | // Detach the plugin object from its execution context. 164 | void YoloLayerPlugin::detachFromContext() 165 | { 166 | } 167 | 168 | const char* YoloLayerPlugin::getPluginType() const 169 | { 170 | return "YoloLayer_TRT"; 171 | } 172 | 173 | const char* YoloLayerPlugin::getPluginVersion() const 174 | { 175 | return "1"; 176 | } 177 | 178 | void YoloLayerPlugin::destroy() 179 | { 180 | delete this; 181 | } 182 | 183 | // Clone the plugin 184 | IPluginV2IOExt* YoloLayerPlugin::clone() const 185 | { 186 | YoloLayerPlugin *p = new YoloLayerPlugin(mYoloWidth, mYoloHeight, mNumAnchors, (float*) mAnchorsHost, mNumClasses, mInputWidth, mInputHeight, mScaleXY, mNewCoords); 187 | p->setPluginNamespace(mPluginNamespace); 188 | return p; 189 | } 190 | 191 | inline __device__ float sigmoidGPU(float x) { return 1.0f / (1.0f + __expf(-x)); } 192 | 193 | inline __device__ float scale_sigmoidGPU(float x, float s) 194 | { 195 | return s * sigmoidGPU(x) - (s - 1.0f) * 0.5f; 196 | } 197 | 198 | // CalDetection(): This kernel processes 1 yolo layer calculation. It 199 | // distributes calculations so that 1 GPU thread would be responsible 200 | // for each grid/anchor combination. 201 | // NOTE: The output (x, y, w, h) are between 0.0 and 1.0 202 | // (relative to orginal image width and height). 203 | __global__ void CalDetection(const float *input, float *output, 204 | int batch_size, 205 | int yolo_width, int yolo_height, 206 | int num_anchors, const float *anchors, 207 | int num_classes, int input_w, int input_h, 208 | float scale_x_y) 209 | { 210 | int idx = threadIdx.x + blockDim.x * blockIdx.x; 211 | Detection* det = ((Detection*) output) + idx; 212 | int total_grids = yolo_width * yolo_height; 213 | if (idx >= batch_size * total_grids * num_anchors) return; 214 | 215 | int info_len = 5 + num_classes; 216 | //int batch_idx = idx / (total_grids * num_anchors); 217 | int group_idx = idx / total_grids; 218 | int anchor_idx = group_idx % num_anchors; 219 | const float* cur_input = input + group_idx * (info_len * total_grids) + (idx % total_grids); 220 | 221 | int class_id; 222 | float max_cls_logit = -CUDART_INF_F; // minus infinity 223 | for (int i = 5; i < info_len; ++i) { 224 | float l = *(cur_input + i * total_grids); 225 | if (l > max_cls_logit) { 226 | max_cls_logit = l; 227 | class_id = i - 5; 228 | } 229 | } 230 | float max_cls_prob = sigmoidGPU(max_cls_logit); 231 | float box_prob = sigmoidGPU(*(cur_input + 4 * total_grids)); 232 | //if (max_cls_prob < IGNORE_THRESH || box_prob < IGNORE_THRESH) 233 | // return; 234 | 235 | int row = (idx % total_grids) / yolo_width; 236 | int col = (idx % total_grids) % yolo_width; 237 | 238 | det->bbox[0] = (col + scale_sigmoidGPU(*(cur_input + 0 * total_grids), scale_x_y)) / yolo_width; // [0, 1] 239 | det->bbox[1] = (row + scale_sigmoidGPU(*(cur_input + 1 * total_grids), scale_x_y)) / yolo_height; // [0, 1] 240 | det->bbox[2] = __expf(*(cur_input + 2 * total_grids)) * *(anchors + 2 * anchor_idx + 0) / input_w; // [0, 1] 241 | det->bbox[3] = __expf(*(cur_input + 3 * total_grids)) * *(anchors + 2 * anchor_idx + 1) / input_h; // [0, 1] 242 | 243 | det->bbox[0] -= det->bbox[2] / 2; // shift from center to top-left 244 | det->bbox[1] -= det->bbox[3] / 2; 245 | 246 | det->det_confidence = box_prob; 247 | det->class_id = class_id; 248 | det->class_confidence = max_cls_prob; 249 | } 250 | 251 | inline __device__ float scale(float x, float s) 252 | { 253 | return s * x - (s - 1.0f) * 0.5f; 254 | } 255 | 256 | inline __device__ float square(float x) 257 | { 258 | return x * x; 259 | } 260 | 261 | __global__ void CalDetection_NewCoords(const float *input, float *output, 262 | int batch_size, 263 | int yolo_width, int yolo_height, 264 | int num_anchors, const float *anchors, 265 | int num_classes, int input_w, int input_h, 266 | float scale_x_y) 267 | { 268 | int idx = threadIdx.x + blockDim.x * blockIdx.x; 269 | Detection* det = ((Detection*) output) + idx; 270 | int total_grids = yolo_width * yolo_height; 271 | if (idx >= batch_size * total_grids * num_anchors) return; 272 | 273 | int info_len = 5 + num_classes; 274 | //int batch_idx = idx / (total_grids * num_anchors); 275 | int group_idx = idx / total_grids; 276 | int anchor_idx = group_idx % num_anchors; 277 | const float* cur_input = input + group_idx * (info_len * total_grids) + (idx % total_grids); 278 | 279 | int class_id; 280 | float max_cls_prob = -CUDART_INF_F; // minus infinity 281 | for (int i = 5; i < info_len; ++i) { 282 | float l = *(cur_input + i * total_grids); 283 | if (l > max_cls_prob) { 284 | max_cls_prob = l; 285 | class_id = i - 5; 286 | } 287 | } 288 | float box_prob = *(cur_input + 4 * total_grids); 289 | //if (max_cls_prob < IGNORE_THRESH || box_prob < IGNORE_THRESH) 290 | // return; 291 | 292 | int row = (idx % total_grids) / yolo_width; 293 | int col = (idx % total_grids) % yolo_width; 294 | 295 | det->bbox[0] = (col + scale(*(cur_input + 0 * total_grids), scale_x_y)) / yolo_width; // [0, 1] 296 | det->bbox[1] = (row + scale(*(cur_input + 1 * total_grids), scale_x_y)) / yolo_height; // [0, 1] 297 | det->bbox[2] = square(*(cur_input + 2 * total_grids)) * 4 * *(anchors + 2 * anchor_idx + 0) / input_w; // [0, 1] 298 | det->bbox[3] = square(*(cur_input + 3 * total_grids)) * 4 * *(anchors + 2 * anchor_idx + 1) / input_h; // [0, 1] 299 | 300 | det->bbox[0] -= det->bbox[2] / 2; // shift from center to top-left 301 | det->bbox[1] -= det->bbox[3] / 2; 302 | 303 | det->det_confidence = box_prob; 304 | det->class_id = class_id; 305 | det->class_confidence = max_cls_prob; 306 | } 307 | 308 | void YoloLayerPlugin::forwardGpu(const float* const* inputs, float* output, cudaStream_t stream, int batchSize) 309 | { 310 | int num_elements = batchSize * mNumAnchors * mYoloWidth * mYoloHeight; 311 | 312 | //CHECK(cudaMemset(output, 0, num_elements * sizeof(Detection))); 313 | 314 | if (mNewCoords) { 315 | CalDetection_NewCoords<<<(num_elements + mThreadCount - 1) / mThreadCount, mThreadCount, 0, stream>>> 316 | (inputs[0], output, batchSize, mYoloWidth, mYoloHeight, mNumAnchors, (const float*) mAnchors, mNumClasses, mInputWidth, mInputHeight, mScaleXY); 317 | } else { 318 | CalDetection<<<(num_elements + mThreadCount - 1) / mThreadCount, mThreadCount, 0, stream>>> 319 | (inputs[0], output, batchSize, mYoloWidth, mYoloHeight, mNumAnchors, (const float*) mAnchors, mNumClasses, mInputWidth, mInputHeight, mScaleXY); 320 | } 321 | } 322 | 323 | int YoloLayerPlugin::enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) 324 | { 325 | forwardGpu((const float* const*)inputs, (float*)outputs[0], stream, batchSize); 326 | return 0; 327 | } 328 | 329 | YoloPluginCreator::YoloPluginCreator() 330 | { 331 | mPluginAttributes.clear(); 332 | 333 | mFC.nbFields = mPluginAttributes.size(); 334 | mFC.fields = mPluginAttributes.data(); 335 | } 336 | 337 | const char* YoloPluginCreator::getPluginName() const 338 | { 339 | return "YoloLayer_TRT"; 340 | } 341 | 342 | const char* YoloPluginCreator::getPluginVersion() const 343 | { 344 | return "1"; 345 | } 346 | 347 | const PluginFieldCollection* YoloPluginCreator::getFieldNames() 348 | { 349 | return &mFC; 350 | } 351 | 352 | IPluginV2IOExt* YoloPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) 353 | { 354 | assert(!strcmp(name, getPluginName())); 355 | const PluginField* fields = fc->fields; 356 | int yolo_width, yolo_height, num_anchors = 0; 357 | float anchors[MAX_ANCHORS * 2]; 358 | int num_classes, input_multiplier, new_coords = 0; 359 | float scale_x_y = 1.0; 360 | 361 | for (int i = 0; i < fc->nbFields; ++i) 362 | { 363 | const char* attrName = fields[i].name; 364 | if (!strcmp(attrName, "yoloWidth")) 365 | { 366 | assert(fields[i].type == PluginFieldType::kINT32); 367 | yolo_width = *(static_cast(fields[i].data)); 368 | } 369 | else if (!strcmp(attrName, "yoloHeight")) 370 | { 371 | assert(fields[i].type == PluginFieldType::kINT32); 372 | yolo_height = *(static_cast(fields[i].data)); 373 | } 374 | else if (!strcmp(attrName, "numAnchors")) 375 | { 376 | assert(fields[i].type == PluginFieldType::kINT32); 377 | num_anchors = *(static_cast(fields[i].data)); 378 | } 379 | else if (!strcmp(attrName, "numClasses")) 380 | { 381 | assert(fields[i].type == PluginFieldType::kINT32); 382 | num_classes = *(static_cast(fields[i].data)); 383 | } 384 | else if (!strcmp(attrName, "inputMultiplier")) 385 | { 386 | assert(fields[i].type == PluginFieldType::kINT32); 387 | input_multiplier = *(static_cast(fields[i].data)); 388 | } 389 | else if (!strcmp(attrName, "anchors")){ 390 | assert(num_anchors > 0 && num_anchors <= MAX_ANCHORS); 391 | assert(fields[i].type == PluginFieldType::kFLOAT32); 392 | memcpy(anchors, static_cast(fields[i].data), num_anchors * 2 * sizeof(float)); 393 | } 394 | else if (!strcmp(attrName, "scaleXY")) 395 | { 396 | assert(fields[i].type == PluginFieldType::kFLOAT32); 397 | scale_x_y = *(static_cast(fields[i].data)); 398 | } 399 | else if (!strcmp(attrName, "newCoords")) 400 | { 401 | assert(fields[i].type == PluginFieldType::kINT32); 402 | new_coords = *(static_cast(fields[i].data)); 403 | } 404 | else 405 | { 406 | std::cerr << "Unknown attribute: " << attrName << std::endl; 407 | assert(0); 408 | } 409 | } 410 | assert(yolo_width > 0 && yolo_height > 0); 411 | assert(anchors[0] > 0.0f && anchors[1] > 0.0f); 412 | assert(num_classes > 0); 413 | assert(input_multiplier == 8 || input_multiplier == 16 || input_multiplier == 32); 414 | assert(scale_x_y >= 1.0); 415 | 416 | YoloLayerPlugin* obj = new YoloLayerPlugin(yolo_width, yolo_height, num_anchors, anchors, num_classes, yolo_width * input_multiplier, yolo_height * input_multiplier, scale_x_y, new_coords); 417 | obj->setPluginNamespace(mNamespace.c_str()); 418 | return obj; 419 | } 420 | 421 | IPluginV2IOExt* YoloPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) 422 | { 423 | YoloLayerPlugin* obj = new YoloLayerPlugin(serialData, serialLength); 424 | obj->setPluginNamespace(mNamespace.c_str()); 425 | return obj; 426 | } 427 | 428 | PluginFieldCollection YoloPluginCreator::mFC{}; 429 | std::vector YoloPluginCreator::mPluginAttributes; 430 | } // namespace nvinfer1 431 | -------------------------------------------------------------------------------- /plugins/yolo_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef _YOLO_LAYER_H 2 | #define _YOLO_LAYER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "math_constants.h" 9 | #include "NvInfer.h" 10 | 11 | #define MAX_ANCHORS 6 12 | 13 | #define CHECK(status) \ 14 | do { \ 15 | auto ret = status; \ 16 | if (ret != 0) { \ 17 | std::cerr << "Cuda failure in file '" << __FILE__ \ 18 | << "' line " << __LINE__ \ 19 | << ": " << ret << std::endl; \ 20 | abort(); \ 21 | } \ 22 | } while (0) 23 | 24 | namespace Yolo 25 | { 26 | static constexpr float IGNORE_THRESH = 0.01f; 27 | 28 | struct alignas(float) Detection { 29 | float bbox[4]; // x, y, w, h 30 | float det_confidence; 31 | float class_id; 32 | float class_confidence; 33 | }; 34 | } 35 | 36 | namespace nvinfer1 37 | { 38 | class YoloLayerPlugin: public IPluginV2IOExt 39 | { 40 | public: 41 | YoloLayerPlugin(int yolo_width, int yolo_height, int num_anchors, float* anchors, int num_classes, int input_width, int input_height, float scale_x_y, int new_coords); 42 | YoloLayerPlugin(const void* data, size_t length); 43 | 44 | ~YoloLayerPlugin() override = default; 45 | 46 | int getNbOutputs() const override 47 | { 48 | return 1; 49 | } 50 | 51 | Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override; 52 | 53 | int initialize() override; 54 | 55 | void terminate() override; 56 | 57 | virtual size_t getWorkspaceSize(int maxBatchSize) const override { return 0;} 58 | 59 | virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override; 60 | 61 | virtual size_t getSerializationSize() const override; 62 | 63 | virtual void serialize(void* buffer) const override; 64 | 65 | bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const override { 66 | return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT; 67 | } 68 | 69 | const char* getPluginType() const override; 70 | 71 | const char* getPluginVersion() const override; 72 | 73 | void destroy() override; 74 | 75 | IPluginV2IOExt* clone() const override; 76 | 77 | void setPluginNamespace(const char* pluginNamespace) override; 78 | 79 | const char* getPluginNamespace() const override; 80 | 81 | DataType getOutputDataType(int index, const DataType* inputTypes, int nbInputs) const override; 82 | 83 | bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const override; 84 | 85 | bool canBroadcastInputAcrossBatch(int inputIndex) const override; 86 | 87 | void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) override; 88 | 89 | void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) override TRTNOEXCEPT; 90 | 91 | void detachFromContext() override; 92 | 93 | private: 94 | void forwardGpu(const float* const* inputs, float* output, cudaStream_t stream, int batchSize = 1); 95 | 96 | int mThreadCount = 64; 97 | int mYoloWidth, mYoloHeight, mNumAnchors; 98 | float mAnchorsHost[MAX_ANCHORS * 2]; 99 | float *mAnchors; // allocated on GPU 100 | int mNumClasses; 101 | int mInputWidth, mInputHeight; 102 | float mScaleXY; 103 | int mNewCoords = 0; 104 | 105 | const char* mPluginNamespace; 106 | 107 | protected: 108 | using IPluginV2IOExt::configurePlugin; 109 | }; 110 | 111 | class YoloPluginCreator : public IPluginCreator 112 | { 113 | public: 114 | YoloPluginCreator(); 115 | 116 | ~YoloPluginCreator() override = default; 117 | 118 | const char* getPluginName() const override; 119 | 120 | const char* getPluginVersion() const override; 121 | 122 | const PluginFieldCollection* getFieldNames() override; 123 | 124 | IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) override; 125 | 126 | IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override; 127 | 128 | void setPluginNamespace(const char* libNamespace) override 129 | { 130 | mNamespace = libNamespace; 131 | } 132 | 133 | const char* getPluginNamespace() const override 134 | { 135 | return mNamespace.c_str(); 136 | } 137 | 138 | private: 139 | static PluginFieldCollection mFC; 140 | static std::vector mPluginAttributes; 141 | std::string mNamespace; 142 | }; 143 | 144 | REGISTER_TENSORRT_PLUGIN(YoloPluginCreator); 145 | }; 146 | 147 | #endif 148 | -------------------------------------------------------------------------------- /plugins/yolo_layer.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MehmetOKUYAR/Yolo-Object-Detection-and-Distance-Measurement-with-Zed-camera/a112c65013eab48cf3210fb1e9973aad3ac25cf1/plugins/yolo_layer.o -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MehmetOKUYAR/Yolo-Object-Detection-and-Distance-Measurement-with-Zed-camera/a112c65013eab48cf3210fb1e9973aad3ac25cf1/utils/__init__.py -------------------------------------------------------------------------------- /utils/background.py: -------------------------------------------------------------------------------- 1 | """background.py 2 | 3 | This code implements the Background class for the TensorRT MODNet 4 | demo. The Background class could generate background images from 5 | either a still image, a video file or nothing (pure black bg). 6 | """ 7 | 8 | 9 | import numpy as np 10 | import cv2 11 | 12 | 13 | class Background(): 14 | """Backgrounf class which supports one of the following sources: 15 | 16 | 1. Image (jpg, png, etc.) file, repeating indefinitely 17 | 2. Video file, looping forever 18 | 3. None -> black background 19 | 20 | # Arguments 21 | src: if not spcified, use black background; else, src should be 22 | a filename of an image (jpg/png) or video (mp4/ts) 23 | width & height: width & height of the output background image 24 | """ 25 | 26 | def __init__(self, src, width, height, demo_mode=False): 27 | self.src = src 28 | self.width = width 29 | self.height = height 30 | self.demo_mode = demo_mode 31 | if not src: # empty source: black background 32 | self.is_video = False 33 | self.bg_frame = np.zeros((height, width, 3), dtype=np.uint8) 34 | elif not isinstance(src, str): 35 | raise ValueError('bad src') 36 | elif src.endswith('.jpg') or src.endswith('.png'): 37 | self.is_video = False 38 | self.bg_frame = cv2.resize(cv2.imread(src), (width, height)) 39 | assert self.bg_frame is not None and self.bg_frame.ndim == 3 40 | elif src.endswith('.mp4') or src.endswith('.ts'): 41 | self.is_video = True 42 | self.cap = cv2.VideoCapture(src) 43 | assert self.cap.isOpened() 44 | else: 45 | raise ValueError('unknown src') 46 | 47 | def read(self): 48 | """Read a frame from the Background object.""" 49 | if self.is_video: 50 | _, frame = self.cap.read() 51 | if frame is None: 52 | # assume end of video file has been reached, so loop around 53 | self.cap.release() 54 | self.cap = cv2.VideoCapture(self.src) 55 | _, frame = self.cap.read() 56 | return cv2.resize(frame, (self.width, self.height)) 57 | else: 58 | return self.bg_frame.copy() 59 | 60 | def __del__(self): 61 | if self.is_video: 62 | try: 63 | self.cap.release() 64 | except: 65 | pass 66 | -------------------------------------------------------------------------------- /utils/camera.py: -------------------------------------------------------------------------------- 1 | """camera.py 2 | 3 | This code implements the Camera class, which encapsulates code to 4 | handle IP CAM, USB webcam or the Jetson onboard camera. In 5 | addition, this Camera class is further extended to take a video 6 | file or an image file as input. 7 | """ 8 | 9 | 10 | import logging 11 | import threading 12 | import subprocess 13 | 14 | import numpy as np 15 | import cv2 16 | 17 | 18 | # The following flag ise used to control whether to use a GStreamer 19 | # pipeline to open USB webcam source. If set to False, we just open 20 | # the webcam using cv2.VideoCapture(index) machinery. i.e. relying 21 | # on cv2's built-in function to capture images from the webcam. 22 | USB_GSTREAMER = True 23 | 24 | 25 | def add_camera_args(parser): 26 | """Add parser augument for camera options.""" 27 | parser.add_argument('--image', type=str, default=None, 28 | help='image file name, e.g. dog.jpg') 29 | parser.add_argument('--video', type=str, default=None, 30 | help='video file name, e.g. traffic.mp4') 31 | parser.add_argument('--video_looping', action='store_true', 32 | help='loop around the video file [False]') 33 | parser.add_argument('--rtsp', type=str, default=None, 34 | help=('RTSP H.264 stream, e.g. ' 35 | 'rtsp://admin:123456@192.168.1.64:554')) 36 | parser.add_argument('--rtsp_latency', type=int, default=200, 37 | help='RTSP latency in ms [200]') 38 | parser.add_argument('--usb', type=int, default=None, 39 | help='USB webcam device id (/dev/video?) [None]') 40 | parser.add_argument('--gstr', type=str, default=None, 41 | help='GStreamer string [None]') 42 | parser.add_argument('--onboard', type=int, default=None, 43 | help='Jetson onboard camera [None]') 44 | parser.add_argument('--copy_frame', action='store_true', 45 | help=('copy video frame internally [False]')) 46 | parser.add_argument('--do_resize', action='store_true', 47 | help=('resize image/video [False]')) 48 | parser.add_argument('--width', type=int, default=640, 49 | help='image width [640]') 50 | parser.add_argument('--height', type=int, default=480, 51 | help='image height [480]') 52 | return parser 53 | 54 | 55 | def open_cam_rtsp(uri, width, height, latency): 56 | """Open an RTSP URI (IP CAM).""" 57 | gst_elements = str(subprocess.check_output('gst-inspect-1.0')) 58 | if 'omxh264dec' in gst_elements: 59 | # Use hardware H.264 decoder on Jetson platforms 60 | gst_str = ('rtspsrc location={} latency={} ! ' 61 | 'rtph264depay ! h264parse ! omxh264dec ! ' 62 | 'nvvidconv ! ' 63 | 'video/x-raw, width=(int){}, height=(int){}, ' 64 | 'format=(string)BGRx ! videoconvert ! ' 65 | 'appsink').format(uri, latency, width, height) 66 | elif 'avdec_h264' in gst_elements: 67 | # Otherwise try to use the software decoder 'avdec_h264' 68 | # NOTE: in case resizing images is necessary, try adding 69 | # a 'videoscale' into the pipeline 70 | gst_str = ('rtspsrc location={} latency={} ! ' 71 | 'rtph264depay ! h264parse ! avdec_h264 ! ' 72 | 'videoconvert ! appsink').format(uri, latency) 73 | else: 74 | raise RuntimeError('H.264 decoder not found!') 75 | return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER) 76 | 77 | 78 | def open_cam_usb(dev, width, height): 79 | """Open a USB webcam.""" 80 | if USB_GSTREAMER: 81 | return cv2.VideoCapture(dev) 82 | else: 83 | return cv2.VideoCapture(dev) 84 | 85 | 86 | def open_cam_gstr(gstr, width, height): 87 | """Open camera using a GStreamer string. 88 | 89 | Example: 90 | gstr = 'v4l2src device=/dev/video0 ! video/x-raw, width=(int){width}, height=(int){height} ! videoconvert ! appsink' 91 | """ 92 | gst_str = gstr.format(width=width, height=height) 93 | return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER) 94 | 95 | 96 | def open_cam_onboard(width, height): 97 | """Open the Jetson onboard camera.""" 98 | gst_elements = str(subprocess.check_output('gst-inspect-1.0')) 99 | if 'nvcamerasrc' in gst_elements: 100 | # On versions of L4T prior to 28.1, you might need to add 101 | # 'flip-method=2' into gst_str below. 102 | gst_str = ('nvcamerasrc ! ' 103 | 'video/x-raw(memory:NVMM), ' 104 | 'width=(int)2592, height=(int)1458, ' 105 | 'format=(string)I420, framerate=(fraction)30/1 ! ' 106 | 'nvvidconv ! ' 107 | 'video/x-raw, width=(int){}, height=(int){}, ' 108 | 'format=(string)BGRx ! ' 109 | 'videoconvert ! appsink').format(width, height) 110 | elif 'nvarguscamerasrc' in gst_elements: 111 | gst_str = ('nvarguscamerasrc ! ' 112 | 'video/x-raw(memory:NVMM), ' 113 | 'width=(int)1920, height=(int)1080, ' 114 | 'format=(string)NV12, framerate=(fraction)30/1 ! ' 115 | 'nvvidconv flip-method=2 ! ' 116 | 'video/x-raw, width=(int){}, height=(int){}, ' 117 | 'format=(string)BGRx ! ' 118 | 'videoconvert ! appsink').format(width, height) 119 | else: 120 | raise RuntimeError('onboard camera source not found!') 121 | return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER) 122 | 123 | 124 | def grab_img(cam): 125 | """This 'grab_img' function is designed to be run in the sub-thread. 126 | Once started, this thread continues to grab a new image and put it 127 | into the global 'img_handle', until 'thread_running' is set to False. 128 | """ 129 | while cam.thread_running: 130 | _, cam.img_handle = cam.cap.read() 131 | if cam.img_handle is None: 132 | #logging.warning('Camera: cap.read() returns None...') 133 | break 134 | cam.thread_running = False 135 | 136 | 137 | class Camera(): 138 | """Camera class which supports reading images from theses video sources: 139 | 140 | 1. Image (jpg, png, etc.) file, repeating indefinitely 141 | 2. Video file 142 | 3. RTSP (IP CAM) 143 | 4. USB webcam 144 | 5. Jetson onboard camera 145 | """ 146 | 147 | def __init__(self, args): 148 | self.args = args 149 | self.is_opened = False 150 | self.video_file = '' 151 | self.video_looping = args.video_looping 152 | self.thread_running = False 153 | self.img_handle = None 154 | self.copy_frame = args.copy_frame 155 | self.do_resize = args.do_resize 156 | self.img_width = args.width 157 | self.img_height = args.height 158 | self.cap = None 159 | self.thread = None 160 | self._open() # try to open the camera 161 | 162 | def _open(self): 163 | """Open camera based on command line arguments.""" 164 | if self.cap is not None: 165 | raise RuntimeError('camera is already opened!') 166 | a = self.args 167 | if a.image: 168 | logging.info('Camera: using a image file %s' % a.image) 169 | self.cap = 'image' 170 | self.img_handle = cv2.imread(a.image) 171 | if self.img_handle is not None: 172 | if self.do_resize: 173 | self.img_handle = cv2.resize( 174 | self.img_handle, (a.width, a.height)) 175 | self.is_opened = True 176 | self.img_height, self.img_width, _ = self.img_handle.shape 177 | elif a.video: 178 | logging.info('Camera: using a video file %s' % a.video) 179 | self.video_file = a.video 180 | self.cap = cv2.VideoCapture(a.video) 181 | self._start() 182 | elif a.rtsp: 183 | logging.info('Camera: using RTSP stream %s' % a.rtsp) 184 | self.cap = open_cam_rtsp(a.rtsp, a.width, a.height, a.rtsp_latency) 185 | self._start() 186 | elif a.usb is not None: 187 | logging.info('Camera: using USB webcam /dev/video%d' % a.usb) 188 | self.cap = open_cam_usb(a.usb, a.width, a.height) 189 | self._start() 190 | elif a.gstr is not None: 191 | logging.info('Camera: using GStreamer string "%s"' % a.gstr) 192 | self.cap = open_cam_gstr(a.gstr, a.width, a.height) 193 | self._start() 194 | elif a.onboard is not None: 195 | logging.info('Camera: using Jetson onboard camera') 196 | self.cap = open_cam_onboard(a.width, a.height) 197 | self._start() 198 | else: 199 | raise RuntimeError('no camera type specified!') 200 | 201 | def isOpened(self): 202 | return self.is_opened 203 | 204 | def _start(self): 205 | if not self.cap.isOpened(): 206 | logging.warning('Camera: starting while cap is not opened!') 207 | return 208 | 209 | # Try to grab the 1st image and determine width and height 210 | _, self.img_handle = self.cap.read() 211 | if self.img_handle is None: 212 | logging.warning('Camera: cap.read() returns no image!') 213 | self.is_opened = False 214 | return 215 | 216 | self.is_opened = True 217 | if self.video_file: 218 | if not self.do_resize: 219 | self.img_height, self.img_width, _ = self.img_handle.shape 220 | else: 221 | self.img_height, self.img_width, _ = self.img_handle.shape 222 | # start the child thread if not using a video file source 223 | # i.e. rtsp, usb or onboard 224 | assert not self.thread_running 225 | self.thread_running = True 226 | self.thread = threading.Thread(target=grab_img, args=(self,)) 227 | self.thread.start() 228 | 229 | def _stop(self): 230 | if self.thread_running: 231 | self.thread_running = False 232 | #self.thread.join() 233 | 234 | def read(self): 235 | """Read a frame from the camera object. 236 | 237 | Returns None if the camera runs out of image or error. 238 | """ 239 | if not self.is_opened: 240 | return None 241 | 242 | if self.video_file: 243 | _, img = self.cap.read() 244 | if img is None: 245 | logging.info('Camera: reaching end of video file') 246 | if self.video_looping: 247 | self.cap.release() 248 | self.cap = cv2.VideoCapture(self.video_file) 249 | _, img = self.cap.read() 250 | if img is not None and self.do_resize: 251 | img = cv2.resize(img, (self.img_width, self.img_height)) 252 | return img 253 | elif self.cap == 'image': 254 | return np.copy(self.img_handle) 255 | else: 256 | if self.copy_frame: 257 | return self.img_handle.copy() 258 | else: 259 | return self.img_handle 260 | 261 | def release(self): 262 | self._stop() 263 | try: 264 | self.cap.release() 265 | except: 266 | pass 267 | self.is_opened = False 268 | 269 | def __del__(self): 270 | self.release() 271 | -------------------------------------------------------------------------------- /utils/display.py: -------------------------------------------------------------------------------- 1 | """display.py 2 | """ 3 | 4 | 5 | import time 6 | 7 | import cv2 8 | 9 | 10 | def open_window(window_name, title, width=None, height=None): 11 | """Open the display window.""" 12 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) 13 | cv2.setWindowTitle(window_name, title) 14 | if width and height: 15 | cv2.resizeWindow(window_name, width, height) 16 | 17 | 18 | def show_help_text(img, help_text): 19 | """Draw help text on image.""" 20 | cv2.putText(img, help_text, (11, 20), cv2.FONT_HERSHEY_PLAIN, 1.0, 21 | (32, 32, 32), 4, cv2.LINE_AA) 22 | cv2.putText(img, help_text, (10, 20), cv2.FONT_HERSHEY_PLAIN, 1.0, 23 | (240, 240, 240), 1, cv2.LINE_AA) 24 | return img 25 | 26 | 27 | def show_fps(img, fps): 28 | """Draw fps number at top-left corner of the image.""" 29 | font = cv2.FONT_HERSHEY_PLAIN 30 | line = cv2.LINE_AA 31 | fps_text = 'FPS: {:.2f}'.format(fps) 32 | cv2.putText(img, fps_text, (11, 20), font, 1.0, (32, 32, 32), 4, line) 33 | cv2.putText(img, fps_text, (10, 20), font, 1.0, (240, 240, 240), 1, line) 34 | return img 35 | 36 | 37 | def set_display(window_name, full_scrn): 38 | """Set disply window to either full screen or normal.""" 39 | if full_scrn: 40 | cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN, 41 | cv2.WINDOW_FULLSCREEN) 42 | else: 43 | cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN, 44 | cv2.WINDOW_NORMAL) 45 | 46 | 47 | class FpsCalculator(): 48 | """Helper class for calculating frames-per-second (FPS).""" 49 | 50 | def __init__(self, decay_factor=0.95): 51 | self.fps = 0.0 52 | self.tic = time.time() 53 | self.decay_factor = decay_factor 54 | 55 | def update(self): 56 | toc = time.time() 57 | curr_fps = 1.0 / (toc - self.tic) 58 | self.fps = curr_fps if self.fps == 0.0 else self.fps 59 | self.fps = self.fps * self.decay_factor + \ 60 | curr_fps * (1 - self.decay_factor) 61 | self.tic = toc 62 | return self.fps 63 | 64 | def reset(self): 65 | self.fps = 0.0 66 | 67 | 68 | class ScreenToggler(): 69 | """Helper class for toggling between non-fullscreen and fullscreen.""" 70 | 71 | def __init__(self): 72 | self.full_scrn = False 73 | 74 | def toggle(self): 75 | self.full_scrn = not self.full_scrn 76 | set_display(WINDOW_NAME, self.full_scrn) 77 | -------------------------------------------------------------------------------- /utils/mjpeg.py: -------------------------------------------------------------------------------- 1 | """mjpeg.py 2 | 3 | This module implements a simple MJPEG server which handles HTTP 4 | requests from remote clients. 5 | """ 6 | 7 | 8 | import time 9 | import queue 10 | import threading 11 | import socket 12 | from http.server import BaseHTTPRequestHandler, HTTPServer 13 | from socketserver import ThreadingMixIn 14 | 15 | import numpy as np 16 | import cv2 17 | 18 | 19 | # globals 20 | _MJPEG_QUEUE = queue.Queue(maxsize=2) 21 | _SLEEP_INTERVAL = 0.1 # update JPG roughly every 0.1 second 22 | 23 | 24 | class MjpegHandler(BaseHTTPRequestHandler): 25 | """A simple MJPEG handler which publishes images.""" 26 | 27 | def _handle_mjpeg(self): 28 | global _MJPEG_QUEUE 29 | img = _MJPEG_QUEUE.get() 30 | 31 | self.send_response(200) 32 | self.send_header( 33 | 'Content-type', 34 | 'multipart/x-mixed-replace; boundary=--jpgboundary' 35 | ) 36 | self.end_headers() 37 | 38 | while True: 39 | if not _MJPEG_QUEUE.empty(): 40 | img = _MJPEG_QUEUE.get() 41 | ret, jpg = cv2.imencode('.jpg', img) 42 | assert jpg is not None 43 | self.wfile.write("--jpgboundary".encode("utf-8")) 44 | self.send_header('Content-type', 'image/jpeg') 45 | self.send_header('Content-length', str(jpg.size)) 46 | self.end_headers() 47 | self.wfile.write(jpg.tostring()) 48 | time.sleep(_SLEEP_INTERVAL) 49 | 50 | def _handle_error(self): 51 | self.send_response(404) 52 | self.send_header('Content-type', 'text/html') 53 | self.end_headers() 54 | self.wfile.write('') 55 | self.wfile.write('

{0!s} not found

'.format(self.path)) 56 | self.wfile.write('') 57 | 58 | def do_GET(self): 59 | if self.path == '/mjpg' or self.path == '/': 60 | self._handle_mjpeg() 61 | else: 62 | #print('ERROR: ', self.path) 63 | self._handle_error() 64 | 65 | def handle(self): 66 | try: 67 | super().handle() 68 | except socket.error: 69 | # ignore BrokenPipeError, which is caused by the client 70 | # terminating the HTTP connection 71 | pass 72 | 73 | 74 | class ThreadedHTTPServer(ThreadingMixIn, HTTPServer): 75 | """Handle HTTP requests in a separate thread.""" 76 | # not used... 77 | 78 | 79 | def run_server(server): 80 | server.serve_forever() # this exits when server.shutdown() is called 81 | server.socket.shutdown(socket.SHUT_RDWR) 82 | server.socket.close() 83 | 84 | 85 | class MjpegServer(object): 86 | def __init__(self, init_img=None, ip='', port=8080): 87 | # initialize the queue with a dummy image 88 | global _MJPEG_QUEUE 89 | init_img = init_img if init_img else \ 90 | np.ones((480, 640, 3), np.uint8) * 255 # all white 91 | _MJPEG_QUEUE.put(init_img) 92 | # create the HTTP server and run it from the child thread 93 | self.server = HTTPServer((ip, port), MjpegHandler) 94 | self.run_thread = threading.Thread( 95 | target=run_server, args=(self.server,)) 96 | self.run_thread.start() 97 | 98 | def send_img(self, img): 99 | global _MJPEG_QUEUE 100 | try: 101 | _MJPEG_QUEUE.put(img, block=False) 102 | except queue.Full: 103 | pass 104 | 105 | def shutdown(self): 106 | self.server.shutdown() 107 | del self.server 108 | -------------------------------------------------------------------------------- /utils/modnet.py: -------------------------------------------------------------------------------- 1 | """modnet.py 2 | 3 | Implementation of TrtMODNet class. 4 | """ 5 | 6 | 7 | import numpy as np 8 | import cv2 9 | import tensorrt as trt 10 | import pycuda.driver as cuda 11 | 12 | 13 | # Code in this module is only for TensorRT 7+ 14 | if trt.__version__[0] < '7': 15 | raise SystemExit('TensorRT version < 7') 16 | 17 | 18 | def _preprocess_modnet(img, input_shape): 19 | """Preprocess an image before TRT MODNet inferencing. 20 | 21 | # Args 22 | img: int8 numpy array of shape (img_h, img_w, 3) 23 | input_shape: a tuple of (H, W) 24 | 25 | # Returns 26 | preprocessed img: float32 numpy array of shape (3, H, W) 27 | """ 28 | img = cv2.resize(img, (input_shape[1], input_shape[0]), cv2.INTER_AREA) 29 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 30 | img = img.transpose((2, 0, 1)).astype(np.float32) 31 | img = (img - 127.5) / 127.5 32 | return img 33 | 34 | 35 | def _postprocess_modnet(output, output_shape): 36 | """Postprocess TRT MODNet output. 37 | 38 | # Args 39 | output: inferenced output by the TensorRT engine 40 | output_shape: (H, W), e.g. (480, 640) 41 | """ 42 | matte = cv2.resize( 43 | output, (output_shape[1], output_shape[0]), 44 | interpolation=cv2.INTER_AREA) 45 | return matte 46 | 47 | 48 | class HostDeviceMem(object): 49 | """Simple helper data class that's a little nicer to use than a 2-tuple.""" 50 | def __init__(self, host_mem, device_mem): 51 | self.host = host_mem 52 | self.device = device_mem 53 | 54 | def __str__(self): 55 | return 'Host:\n' + str(self.host) + '\nDevice:\n' + str(self.device) 56 | 57 | def __repr__(self): 58 | return self.__str__() 59 | 60 | 61 | def allocate_buffers(engine, context): 62 | """Allocates all host/device in/out buffers required for an engine.""" 63 | assert len(engine) == 2 and engine[0] == 'input' and engine[1] == 'output' 64 | dtype = trt.nptype(engine.get_binding_dtype('input')) 65 | assert trt.nptype(engine.get_binding_dtype('output')) == dtype 66 | bindings = [] 67 | 68 | dims_in = context.get_binding_shape(0) 69 | assert len(dims_in) == 4 and dims_in[0] == 1 and dims_in[1] == 3 70 | hmem_in = cuda.pagelocked_empty(trt.volume(dims_in), dtype) 71 | dmem_in = cuda.mem_alloc(hmem_in.nbytes) 72 | bindings.append(int(dmem_in)) 73 | inputs = [HostDeviceMem(hmem_in, dmem_in)] 74 | 75 | dims_out = context.get_binding_shape(1) 76 | assert len(dims_out) == 4 and dims_out[0] == 1 and dims_out[1] == 1 77 | assert dims_out[2] == dims_in[2] and dims_out[3] == dims_in[3] 78 | hmem_out = cuda.pagelocked_empty(trt.volume(dims_out), dtype) 79 | dmem_out = cuda.mem_alloc(hmem_out.nbytes) 80 | bindings.append(int(dmem_out)) 81 | outputs = [HostDeviceMem(hmem_out, dmem_out)] 82 | 83 | return bindings, inputs, outputs 84 | 85 | 86 | def do_inference_v2(context, bindings, inputs, outputs, stream): 87 | """do_inference_v2 (for TensorRT 7.0+) 88 | 89 | This function is generalized for multiple inputs/outputs for full 90 | dimension networks. Inputs and outputs are expected to be lists 91 | of HostDeviceMem objects. 92 | """ 93 | # Transfer input data to the GPU. 94 | [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs] 95 | # Run inference. 96 | context.execute_async_v2(bindings=bindings, stream_handle=stream.handle) 97 | # Transfer predictions back from the GPU. 98 | [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs] 99 | # Synchronize the stream 100 | stream.synchronize() 101 | # Return only the host outputs. 102 | return [out.host for out in outputs] 103 | 104 | 105 | class TrtMODNet(object): 106 | """TrtMODNet class encapsulates things needed to run TRT MODNet.""" 107 | 108 | def __init__(self, cuda_ctx=None): 109 | """Initialize TensorRT plugins, engine and conetxt. 110 | 111 | # Arguments 112 | cuda_ctx: PyCUDA context for inferencing (usually only needed 113 | in multi-threaded cases 114 | """ 115 | self.cuda_ctx = cuda_ctx 116 | if self.cuda_ctx: 117 | self.cuda_ctx.push() 118 | self.trt_logger = trt.Logger(trt.Logger.INFO) 119 | self.engine = self._load_engine() 120 | assert self.engine.get_binding_dtype('input') == trt.tensorrt.DataType.FLOAT 121 | 122 | try: 123 | self.context = self.engine.create_execution_context() 124 | self.output_shape = self.context.get_binding_shape(1) # (1, 1, 480, 640) 125 | self.stream = cuda.Stream() 126 | self.bindings, self.inputs, self.outputs = allocate_buffers( 127 | self.engine, self.context) 128 | except Exception as e: 129 | raise RuntimeError('fail to allocate CUDA resources') from e 130 | finally: 131 | if self.cuda_ctx: 132 | self.cuda_ctx.pop() 133 | dims = self.context.get_binding_shape(0) # 'input' 134 | self.input_shape = (dims[2], dims[3]) 135 | 136 | def _load_engine(self): 137 | if not trt.init_libnvinfer_plugins(self.trt_logger, ''): 138 | raise RuntimeError('fail to init built-in plugins') 139 | engine_path = 'modnet/modnet.engine' 140 | with open(engine_path, 'rb') as f, trt.Runtime(self.trt_logger) as runtime: 141 | return runtime.deserialize_cuda_engine(f.read()) 142 | 143 | def infer(self, img): 144 | """Infer an image. 145 | 146 | The output is a matte (matting mask), which is a grayscale image 147 | with either 0 or 255 pixels. 148 | """ 149 | img_resized = _preprocess_modnet(img, self.input_shape) 150 | 151 | self.inputs[0].host = np.ascontiguousarray(img_resized) 152 | if self.cuda_ctx: 153 | self.cuda_ctx.push() 154 | trt_outputs = do_inference_v2( 155 | context=self.context, 156 | bindings=self.bindings, 157 | inputs=self.inputs, 158 | outputs=self.outputs, 159 | stream=self.stream) 160 | if self.cuda_ctx: 161 | self.cuda_ctx.pop() 162 | 163 | output = trt_outputs[0].reshape(self.output_shape[-2:]) 164 | return _postprocess_modnet(output, img.shape[:2]) 165 | -------------------------------------------------------------------------------- /utils/mtcnn.py: -------------------------------------------------------------------------------- 1 | """mtcnn_trt.py 2 | """ 3 | 4 | import numpy as np 5 | import cv2 6 | import pytrt 7 | 8 | 9 | PIXEL_MEAN = 127.5 10 | PIXEL_SCALE = 0.0078125 11 | 12 | 13 | def convert_to_1x1(boxes): 14 | """Convert detection boxes to 1:1 sizes 15 | 16 | # Arguments 17 | boxes: numpy array, shape (n,5), dtype=float32 18 | 19 | # Returns 20 | boxes_1x1 21 | """ 22 | boxes_1x1 = boxes.copy() 23 | hh = boxes[:, 3] - boxes[:, 1] + 1. 24 | ww = boxes[:, 2] - boxes[:, 0] + 1. 25 | mm = np.maximum(hh, ww) 26 | boxes_1x1[:, 0] = boxes[:, 0] + ww * 0.5 - mm * 0.5 27 | boxes_1x1[:, 1] = boxes[:, 1] + hh * 0.5 - mm * 0.5 28 | boxes_1x1[:, 2] = boxes_1x1[:, 0] + mm - 1. 29 | boxes_1x1[:, 3] = boxes_1x1[:, 1] + mm - 1. 30 | boxes_1x1[:, 0:4] = np.fix(boxes_1x1[:, 0:4]) 31 | return boxes_1x1 32 | 33 | 34 | def crop_img_with_padding(img, box, padding=0): 35 | """Crop a box from image, with out-of-boundary pixels padded 36 | 37 | # Arguments 38 | img: img as a numpy array, shape (H, W, 3) 39 | box: numpy array, shape (5,) or (4,) 40 | padding: integer value for padded pixels 41 | 42 | # Returns 43 | cropped_im: cropped image as a numpy array, shape (H, W, 3) 44 | """ 45 | img_h, img_w, _ = img.shape 46 | if box.shape[0] == 5: 47 | cx1, cy1, cx2, cy2, _ = box.astype(int) 48 | elif box.shape[0] == 4: 49 | cx1, cy1, cx2, cy2 = box.astype(int) 50 | else: 51 | raise ValueError 52 | cw = cx2 - cx1 + 1 53 | ch = cy2 - cy1 + 1 54 | cropped_im = np.zeros((ch, cw, 3), dtype=np.uint8) + padding 55 | ex1 = max(0, -cx1) # ex/ey's are the destination coordinates 56 | ey1 = max(0, -cy1) 57 | ex2 = min(cw, img_w - cx1) 58 | ey2 = min(ch, img_h - cy1) 59 | fx1 = max(cx1, 0) # fx/fy's are the source coordinates 60 | fy1 = max(cy1, 0) 61 | fx2 = min(cx2+1, img_w) 62 | fy2 = min(cy2+1, img_h) 63 | cropped_im[ey1:ey2, ex1:ex2, :] = img[fy1:fy2, fx1:fx2, :] 64 | return cropped_im 65 | 66 | 67 | def nms(boxes, threshold, type='Union'): 68 | """Non-Maximum Supression 69 | 70 | # Arguments 71 | boxes: numpy array [:, 0:5] of [x1, y1, x2, y2, score]'s 72 | threshold: confidence/score threshold, e.g. 0.5 73 | type: 'Union' or 'Min' 74 | 75 | # Returns 76 | A list of indices indicating the result of NMS 77 | """ 78 | if boxes.shape[0] == 0: 79 | return [] 80 | xx1, yy1, xx2, yy2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3] 81 | areas = np.multiply(xx2-xx1+1, yy2-yy1+1) 82 | sorted_idx = boxes[:, 4].argsort() 83 | 84 | pick = [] 85 | while len(sorted_idx) > 0: 86 | # In each loop, pick the last box (highest score) and remove 87 | # all other boxes with IoU over threshold 88 | tx1 = np.maximum(xx1[sorted_idx[-1]], xx1[sorted_idx[0:-1]]) 89 | ty1 = np.maximum(yy1[sorted_idx[-1]], yy1[sorted_idx[0:-1]]) 90 | tx2 = np.minimum(xx2[sorted_idx[-1]], xx2[sorted_idx[0:-1]]) 91 | ty2 = np.minimum(yy2[sorted_idx[-1]], yy2[sorted_idx[0:-1]]) 92 | tw = np.maximum(0.0, tx2 - tx1 + 1) 93 | th = np.maximum(0.0, ty2 - ty1 + 1) 94 | inter = tw * th 95 | if type == 'Min': 96 | iou = inter / \ 97 | np.minimum(areas[sorted_idx[-1]], areas[sorted_idx[0:-1]]) 98 | else: 99 | iou = inter / \ 100 | (areas[sorted_idx[-1]] + areas[sorted_idx[0:-1]] - inter) 101 | pick.append(sorted_idx[-1]) 102 | sorted_idx = sorted_idx[np.where(iou <= threshold)[0]] 103 | return pick 104 | 105 | 106 | def generate_pnet_bboxes(conf, reg, scale, t): 107 | """ 108 | # Arguments 109 | conf: softmax score (face or not) of each grid 110 | reg: regression values of x1, y1, x2, y2 coordinates. 111 | The values are normalized to grid width (12) and 112 | height (12). 113 | scale: scale-down factor with respect to original image 114 | t: confidence threshold 115 | 116 | # Returns 117 | A numpy array of bounding box coordinates and the 118 | cooresponding scores: [[x1, y1, x2, y2, score], ...] 119 | 120 | # Notes 121 | Top left corner coordinates of each grid is (x*2, y*2), 122 | or (x*2/scale, y*2/scale) in the original image. 123 | Bottom right corner coordinates is (x*2+12-1, y*2+12-1), 124 | or ((x*2+12-1)/scale, (y*2+12-1)/scale) in the original 125 | image. 126 | """ 127 | conf = conf.T # swap H and W dimensions 128 | dx1 = reg[0, :, :].T 129 | dy1 = reg[1, :, :].T 130 | dx2 = reg[2, :, :].T 131 | dy2 = reg[3, :, :].T 132 | (x, y) = np.where(conf >= t) 133 | if len(x) == 0: 134 | return np.zeros((0, 5), np.float32) 135 | 136 | score = np.array(conf[x, y]).reshape(-1, 1) # Nx1 137 | reg = np.array([dx1[x, y], dy1[x, y], 138 | dx2[x, y], dy2[x, y]]).T * 12. # Nx4 139 | topleft = np.array([x, y], dtype=np.float32).T * 2. # Nx2 140 | bottomright = topleft + np.array([11., 11.], dtype=np.float32) # Nx2 141 | boxes = (np.concatenate((topleft, bottomright), axis=1) + reg) / scale 142 | boxes = np.concatenate((boxes, score), axis=1) # Nx5 143 | # filter bboxes which are too small 144 | #boxes = boxes[boxes[:, 2]-boxes[:, 0] >= 12., :] 145 | #boxes = boxes[boxes[:, 3]-boxes[:, 1] >= 12., :] 146 | return boxes 147 | 148 | 149 | def generate_rnet_bboxes(conf, reg, pboxes, t): 150 | """ 151 | # Arguments 152 | conf: softmax score (face or not) of each box 153 | reg: regression values of x1, y1, x2, y2 coordinates. 154 | The values are normalized to box width and height. 155 | pboxes: input boxes to RNet 156 | t: confidence threshold 157 | 158 | # Returns 159 | boxes: a numpy array of box coordinates and cooresponding 160 | scores: [[x1, y1, x2, y2, score], ...] 161 | """ 162 | boxes = pboxes.copy() # make a copy 163 | assert boxes.shape[0] == conf.shape[0] 164 | boxes[:, 4] = conf # update 'score' of all boxes 165 | boxes = boxes[conf >= t, :] 166 | reg = reg[conf >= t, :] 167 | ww = (boxes[:, 2]-boxes[:, 0]+1).reshape(-1, 1) # x2 - x1 + 1 168 | hh = (boxes[:, 3]-boxes[:, 1]+1).reshape(-1, 1) # y2 - y1 + 1 169 | boxes[:, 0:4] += np.concatenate((ww, hh, ww, hh), axis=1) * reg 170 | return boxes 171 | 172 | 173 | def generate_onet_outputs(conf, reg_boxes, reg_marks, rboxes, t): 174 | """ 175 | # Arguments 176 | conf: softmax score (face or not) of each box 177 | reg_boxes: regression values of x1, y1, x2, y2 178 | The values are normalized to box width and height. 179 | reg_marks: regression values of the 5 facial landmark points 180 | rboxes: input boxes to ONet (already converted to 2x1) 181 | t: confidence threshold 182 | 183 | # Returns 184 | boxes: a numpy array of box coordinates and cooresponding 185 | scores: [[x1, y1, x2, y2,... , score], ...] 186 | landmarks: a numpy array of facial landmark coordinates: 187 | [[x1, x2, ..., x5, y1, y2, ..., y5], ...] 188 | """ 189 | boxes = rboxes.copy() # make a copy 190 | assert boxes.shape[0] == conf.shape[0] 191 | boxes[:, 4] = conf 192 | boxes = boxes[conf >= t, :] 193 | reg_boxes = reg_boxes[conf >= t, :] 194 | reg_marks = reg_marks[conf >= t, :] 195 | xx = boxes[:, 0].reshape(-1, 1) 196 | yy = boxes[:, 1].reshape(-1, 1) 197 | ww = (boxes[:, 2]-boxes[:, 0]).reshape(-1, 1) 198 | hh = (boxes[:, 3]-boxes[:, 1]).reshape(-1, 1) 199 | marks = np.concatenate((xx, xx, xx, xx, xx, yy, yy, yy, yy, yy), axis=1) 200 | marks += np.concatenate((ww, ww, ww, ww, ww, hh, hh, hh, hh, hh), axis=1) * reg_marks 201 | ww = ww + 1 202 | hh = hh + 1 203 | boxes[:, 0:4] += np.concatenate((ww, hh, ww, hh), axis=1) * reg_boxes 204 | return boxes, marks 205 | 206 | 207 | def clip_dets(dets, img_w, img_h): 208 | """Round and clip detection (x1, y1, ...) values. 209 | 210 | Note we exclude the last value of 'dets' in computation since 211 | it is 'conf'. 212 | """ 213 | dets[:, 0:-1] = np.fix(dets[:, 0:-1]) 214 | evens = np.arange(0, dets.shape[1]-1, 2) 215 | odds = np.arange(1, dets.shape[1]-1, 2) 216 | dets[:, evens] = np.clip(dets[:, evens], 0., float(img_w-1)) 217 | dets[:, odds] = np.clip(dets[:, odds], 0., float(img_h-1)) 218 | return dets 219 | 220 | 221 | class TrtPNet(object): 222 | """TrtPNet 223 | 224 | Refer to mtcnn/det1_relu.prototxt for calculation of input/output 225 | dimmensions of TrtPNet, as well as input H offsets (for all scales). 226 | The output H offsets are merely input offsets divided by stride (2). 227 | """ 228 | input_h_offsets = (0, 216, 370, 478, 556, 610, 648, 676, 696) 229 | output_h_offsets = (0, 108, 185, 239, 278, 305, 324, 338, 348) 230 | max_n_scales = 9 231 | 232 | def __init__(self, engine): 233 | """__init__ 234 | 235 | # Arguments 236 | engine: path to the TensorRT engine file 237 | """ 238 | self.trtnet = pytrt.PyTrtMtcnn(engine, 239 | (3, 710, 384), 240 | (2, 350, 187), 241 | (4, 350, 187)) 242 | self.trtnet.set_batchsize(1) 243 | 244 | def detect(self, img, minsize=40, factor=0.709, threshold=0.7): 245 | """Detect faces using PNet 246 | 247 | # Arguments 248 | img: input image as a RGB numpy array 249 | threshold: confidence threshold 250 | 251 | # Returns 252 | A numpy array of bounding box coordinates and the 253 | cooresponding scores: [[x1, y1, x2, y2, score], ...] 254 | """ 255 | if minsize < 40: 256 | raise ValueError("TrtPNet is currently designed with " 257 | "'minsize' >= 40") 258 | if factor > 0.709: 259 | raise ValueError("TrtPNet is currently designed with " 260 | "'factor' <= 0.709") 261 | m = 12.0 / minsize 262 | img_h, img_w, _ = img.shape 263 | minl = min(img_h, img_w) * m 264 | 265 | # create scale pyramid 266 | scales = [] 267 | while minl >= 12: 268 | scales.append(m) 269 | m *= factor 270 | minl *= factor 271 | if len(scales) > self.max_n_scales: # probably won't happen... 272 | raise ValueError('Too many scales, try increasing minsize ' 273 | 'or decreasing factor.') 274 | 275 | total_boxes = np.zeros((0, 5), dtype=np.float32) 276 | img = (img.astype(np.float32) - PIXEL_MEAN) * PIXEL_SCALE 277 | 278 | # stack all scales of the input image vertically into 1 big 279 | # image, and only do inferencing once 280 | im_data = np.zeros((1, 3, 710, 384), dtype=np.float32) 281 | for i, scale in enumerate(scales): 282 | h_offset = self.input_h_offsets[i] 283 | h = int(img_h * scale) 284 | w = int(img_w * scale) 285 | im_data[0, :, h_offset:(h_offset+h), :w] = \ 286 | cv2.resize(img, (w, h)).transpose((2, 0, 1)) 287 | 288 | out = self.trtnet.forward(im_data) 289 | 290 | # extract outputs of each scale from the big output blob 291 | for i, scale in enumerate(scales): 292 | h_offset = self.output_h_offsets[i] 293 | h = (int(img_h * scale) - 12) // 2 + 1 294 | w = (int(img_w * scale) - 12) // 2 + 1 295 | pp = out['prob1'][0, 1, h_offset:(h_offset+h), :w] 296 | cc = out['boxes'][0, :, h_offset:(h_offset+h), :w] 297 | boxes = generate_pnet_bboxes(pp, cc, scale, threshold) 298 | if boxes.shape[0] > 0: 299 | pick = nms(boxes, 0.5, 'Union') 300 | if len(pick) > 0: 301 | boxes = boxes[pick, :] 302 | if boxes.shape[0] > 0: 303 | total_boxes = np.concatenate((total_boxes, boxes), axis=0) 304 | 305 | if total_boxes.shape[0] == 0: 306 | return total_boxes 307 | pick = nms(total_boxes, 0.7, 'Union') 308 | dets = clip_dets(total_boxes[pick, :], img_w, img_h) 309 | return dets 310 | 311 | def destroy(self): 312 | self.trtnet.destroy() 313 | self.trtnet = None 314 | 315 | 316 | class TrtRNet(object): 317 | """TrtRNet 318 | 319 | # Arguments 320 | engine: path to the TensorRT engine (det2) file 321 | """ 322 | 323 | def __init__(self, engine): 324 | self.trtnet = pytrt.PyTrtMtcnn(engine, 325 | (3, 24, 24), 326 | (2, 1, 1), 327 | (4, 1, 1)) 328 | 329 | def detect(self, img, boxes, max_batch=256, threshold=0.6): 330 | """Detect faces using RNet 331 | 332 | # Arguments 333 | img: input image as a RGB numpy array 334 | boxes: detection results by PNet, a numpy array [:, 0:5] 335 | of [x1, y1, x2, y2, score]'s 336 | max_batch: only process these many top boxes from PNet 337 | threshold: confidence threshold 338 | 339 | # Returns 340 | A numpy array of bounding box coordinates and the 341 | cooresponding scores: [[x1, y1, x2, y2, score], ...] 342 | """ 343 | if max_batch > 256: 344 | raise ValueError('Bad max_batch: %d' % max_batch) 345 | boxes = boxes[:max_batch] # assuming boxes are sorted by score 346 | if boxes.shape[0] == 0: 347 | return boxes 348 | img_h, img_w, _ = img.shape 349 | boxes = convert_to_1x1(boxes) 350 | crops = np.zeros((boxes.shape[0], 24, 24, 3), dtype=np.uint8) 351 | for i, det in enumerate(boxes): 352 | cropped_im = crop_img_with_padding(img, det) 353 | # NOTE: H and W dimensions need to be transposed for RNet! 354 | crops[i, ...] = cv2.transpose(cv2.resize(cropped_im, (24, 24))) 355 | crops = crops.transpose((0, 3, 1, 2)) # NHWC -> NCHW 356 | crops = (crops.astype(np.float32) - PIXEL_MEAN) * PIXEL_SCALE 357 | 358 | self.trtnet.set_batchsize(crops.shape[0]) 359 | out = self.trtnet.forward(crops) 360 | 361 | pp = out['prob1'][:, 1, 0, 0] 362 | cc = out['boxes'][:, :, 0, 0] 363 | boxes = generate_rnet_bboxes(pp, cc, boxes, threshold) 364 | if boxes.shape[0] == 0: 365 | return boxes 366 | pick = nms(boxes, 0.7, 'Union') 367 | dets = clip_dets(boxes[pick, :], img_w, img_h) 368 | return dets 369 | 370 | def destroy(self): 371 | self.trtnet.destroy() 372 | self.trtnet = None 373 | 374 | 375 | class TrtONet(object): 376 | """TrtONet 377 | 378 | # Arguments 379 | engine: path to the TensorRT engine (det3) file 380 | """ 381 | 382 | def __init__(self, engine): 383 | self.trtnet = pytrt.PyTrtMtcnn(engine, 384 | (3, 48, 48), 385 | (2, 1, 1), 386 | (4, 1, 1), 387 | (10, 1, 1)) 388 | 389 | def detect(self, img, boxes, max_batch=64, threshold=0.7): 390 | """Detect faces using ONet 391 | 392 | # Arguments 393 | img: input image as a RGB numpy array 394 | boxes: detection results by RNet, a numpy array [:, 0:5] 395 | of [x1, y1, x2, y2, score]'s 396 | max_batch: only process these many top boxes from RNet 397 | threshold: confidence threshold 398 | 399 | # Returns 400 | dets: boxes and conf scores 401 | landmarks 402 | """ 403 | if max_batch > 64: 404 | raise ValueError('Bad max_batch: %d' % max_batch) 405 | if boxes.shape[0] == 0: 406 | return (np.zeros((0, 5), dtype=np.float32), 407 | np.zeros((0, 10), dtype=np.float32)) 408 | boxes = boxes[:max_batch] # assuming boxes are sorted by score 409 | img_h, img_w, _ = img.shape 410 | boxes = convert_to_1x1(boxes) 411 | crops = np.zeros((boxes.shape[0], 48, 48, 3), dtype=np.uint8) 412 | for i, det in enumerate(boxes): 413 | cropped_im = crop_img_with_padding(img, det) 414 | # NOTE: H and W dimensions need to be transposed for RNet! 415 | crops[i, ...] = cv2.transpose(cv2.resize(cropped_im, (48, 48))) 416 | crops = crops.transpose((0, 3, 1, 2)) # NHWC -> NCHW 417 | crops = (crops.astype(np.float32) - PIXEL_MEAN) * PIXEL_SCALE 418 | 419 | self.trtnet.set_batchsize(crops.shape[0]) 420 | out = self.trtnet.forward(crops) 421 | 422 | pp = out['prob1'][:, 1, 0, 0] 423 | cc = out['boxes'][:, :, 0, 0] 424 | mm = out['landmarks'][:, :, 0, 0] 425 | boxes, landmarks = generate_onet_outputs(pp, cc, mm, boxes, threshold) 426 | pick = nms(boxes, 0.7, 'Min') 427 | return (clip_dets(boxes[pick, :], img_w, img_h), 428 | np.fix(landmarks[pick, :])) 429 | 430 | def destroy(self): 431 | self.trtnet.destroy() 432 | self.trtnet = None 433 | 434 | 435 | class TrtMtcnn(object): 436 | """TrtMtcnn""" 437 | 438 | def __init__(self): 439 | self.pnet = TrtPNet('mtcnn/det1.engine') 440 | self.rnet = TrtRNet('mtcnn/det2.engine') 441 | self.onet = TrtONet('mtcnn/det3.engine') 442 | 443 | def __del__(self): 444 | self.onet.destroy() 445 | self.rnet.destroy() 446 | self.pnet.destroy() 447 | 448 | def _detect_1280x720(self, img, minsize): 449 | """_detec_1280x720() 450 | 451 | Assuming 'img' has been resized to less than 1280x720. 452 | """ 453 | # MTCNN model was trained with 'MATLAB' image so its channel 454 | # order is RGB instead of BGR. 455 | img = img[:, :, ::-1] # BGR -> RGB 456 | dets = self.pnet.detect(img, minsize=minsize) 457 | dets = self.rnet.detect(img, dets) 458 | dets, landmarks = self.onet.detect(img, dets) 459 | return dets, landmarks 460 | 461 | def detect(self, img, minsize=40): 462 | """detect() 463 | 464 | This function handles rescaling of the input image if it's 465 | larger than 1280x720. 466 | """ 467 | if img is None: 468 | raise ValueError 469 | img_h, img_w, _ = img.shape 470 | scale = min(720. / img_h, 1280. / img_w) 471 | if scale < 1.0: 472 | new_h = int(np.ceil(img_h * scale)) 473 | new_w = int(np.ceil(img_w * scale)) 474 | img = cv2.resize(img, (new_w, new_h)) 475 | minsize = max(int(np.ceil(minsize * scale)), 40) 476 | dets, landmarks = self._detect_1280x720(img, minsize) 477 | if scale < 1.0: 478 | dets[:, :-1] = np.fix(dets[:, :-1] / scale) 479 | landmarks = np.fix(landmarks / scale) 480 | return dets, landmarks 481 | -------------------------------------------------------------------------------- /utils/ssd.py: -------------------------------------------------------------------------------- 1 | """ssd.py 2 | 3 | This module implements the TrtSSD class. 4 | """ 5 | 6 | 7 | import ctypes 8 | 9 | import numpy as np 10 | import cv2 11 | import tensorrt as trt 12 | import pycuda.driver as cuda 13 | 14 | 15 | def _preprocess_trt(img, shape=(300, 300)): 16 | """Preprocess an image before TRT SSD inferencing.""" 17 | img = cv2.resize(img, shape) 18 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 19 | img = img.transpose((2, 0, 1)).astype(np.float32) 20 | img *= (2.0/255.0) 21 | img -= 1.0 22 | return img 23 | 24 | 25 | def _postprocess_trt(img, output, conf_th, output_layout=7): 26 | """Postprocess TRT SSD output.""" 27 | img_h, img_w, _ = img.shape 28 | boxes, confs, clss = [], [], [] 29 | for prefix in range(0, len(output), output_layout): 30 | #index = int(output[prefix+0]) 31 | conf = float(output[prefix+2]) 32 | if conf < conf_th: 33 | continue 34 | x1 = int(output[prefix+3] * img_w) 35 | y1 = int(output[prefix+4] * img_h) 36 | x2 = int(output[prefix+5] * img_w) 37 | y2 = int(output[prefix+6] * img_h) 38 | cls = int(output[prefix+1]) 39 | boxes.append((x1, y1, x2, y2)) 40 | confs.append(conf) 41 | clss.append(cls) 42 | return boxes, confs, clss 43 | 44 | 45 | class TrtSSD(object): 46 | """TrtSSD class encapsulates things needed to run TRT SSD.""" 47 | 48 | def _load_plugins(self): 49 | if trt.__version__[0] < '7': 50 | ctypes.CDLL("ssd/libflattenconcat.so") 51 | trt.init_libnvinfer_plugins(self.trt_logger, '') 52 | 53 | def _load_engine(self): 54 | TRTbin = 'ssd/TRT_%s.bin' % self.model 55 | with open(TRTbin, 'rb') as f, trt.Runtime(self.trt_logger) as runtime: 56 | return runtime.deserialize_cuda_engine(f.read()) 57 | 58 | def _allocate_buffers(self): 59 | host_inputs, host_outputs, cuda_inputs, cuda_outputs, bindings = \ 60 | [], [], [], [], [] 61 | for binding in self.engine: 62 | size = trt.volume(self.engine.get_binding_shape(binding)) * \ 63 | self.engine.max_batch_size 64 | host_mem = cuda.pagelocked_empty(size, np.float32) 65 | cuda_mem = cuda.mem_alloc(host_mem.nbytes) 66 | bindings.append(int(cuda_mem)) 67 | if self.engine.binding_is_input(binding): 68 | host_inputs.append(host_mem) 69 | cuda_inputs.append(cuda_mem) 70 | else: 71 | host_outputs.append(host_mem) 72 | cuda_outputs.append(cuda_mem) 73 | return host_inputs, host_outputs, cuda_inputs, cuda_outputs, bindings 74 | 75 | def __init__(self, model, input_shape, cuda_ctx=None): 76 | """Initialize TensorRT plugins, engine and conetxt.""" 77 | self.model = model 78 | self.input_shape = input_shape 79 | self.cuda_ctx = cuda_ctx 80 | if self.cuda_ctx: 81 | self.cuda_ctx.push() 82 | 83 | self.trt_logger = trt.Logger(trt.Logger.INFO) 84 | self._load_plugins() 85 | self.engine = self._load_engine() 86 | 87 | try: 88 | self.context = self.engine.create_execution_context() 89 | self.stream = cuda.Stream() 90 | self.host_inputs, self.host_outputs, self.cuda_inputs, self.cuda_outputs, self.bindings = self._allocate_buffers() 91 | except Exception as e: 92 | raise RuntimeError('fail to allocate CUDA resources') from e 93 | finally: 94 | if self.cuda_ctx: 95 | self.cuda_ctx.pop() 96 | 97 | def __del__(self): 98 | """Free CUDA memories and context.""" 99 | del self.cuda_outputs 100 | del self.cuda_inputs 101 | del self.stream 102 | 103 | def detect(self, img, conf_th=0.3): 104 | """Detect objects in the input image.""" 105 | img_resized = _preprocess_trt(img, self.input_shape) 106 | np.copyto(self.host_inputs[0], img_resized.ravel()) 107 | 108 | if self.cuda_ctx: 109 | self.cuda_ctx.push() 110 | cuda.memcpy_htod_async( 111 | self.cuda_inputs[0], self.host_inputs[0], self.stream) 112 | self.context.execute_async( 113 | batch_size=1, 114 | bindings=self.bindings, 115 | stream_handle=self.stream.handle) 116 | cuda.memcpy_dtoh_async( 117 | self.host_outputs[1], self.cuda_outputs[1], self.stream) 118 | cuda.memcpy_dtoh_async( 119 | self.host_outputs[0], self.cuda_outputs[0], self.stream) 120 | self.stream.synchronize() 121 | if self.cuda_ctx: 122 | self.cuda_ctx.pop() 123 | 124 | output = self.host_outputs[0] 125 | return _postprocess_trt(img, output, conf_th) 126 | -------------------------------------------------------------------------------- /utils/ssd_classes.py: -------------------------------------------------------------------------------- 1 | """ssd_classes.py 2 | 3 | This file was modified from: 4 | http://github.com/AastaNV/TRT_object_detection/blob/master/coco.py 5 | """ 6 | 7 | COCO_CLASSES_LIST = [ 8 | 'background', # was 'unlabeled' 9 | 'person', 10 | 'bicycle', 11 | 'car', 12 | 'motorcycle', 13 | 'airplane', 14 | 'bus', 15 | 'train', 16 | 'truck', 17 | 'boat', 18 | 'traffic light', 19 | 'fire hydrant', 20 | 'street sign', 21 | 'stop sign', 22 | 'parking meter', 23 | 'bench', 24 | 'bird', 25 | 'cat', 26 | 'dog', 27 | 'horse', 28 | 'sheep', 29 | 'cow', 30 | 'elephant', 31 | 'bear', 32 | 'zebra', 33 | 'giraffe', 34 | 'hat', 35 | 'backpack', 36 | 'umbrella', 37 | 'shoe', 38 | 'eye glasses', 39 | 'handbag', 40 | 'tie', 41 | 'suitcase', 42 | 'frisbee', 43 | 'skis', 44 | 'snowboard', 45 | 'sports ball', 46 | 'kite', 47 | 'baseball bat', 48 | 'baseball glove', 49 | 'skateboard', 50 | 'surfboard', 51 | 'tennis racket', 52 | 'bottle', 53 | 'plate', 54 | 'wine glass', 55 | 'cup', 56 | 'fork', 57 | 'knife', 58 | 'spoon', 59 | 'bowl', 60 | 'banana', 61 | 'apple', 62 | 'sandwich', 63 | 'orange', 64 | 'broccoli', 65 | 'carrot', 66 | 'hot dog', 67 | 'pizza', 68 | 'donut', 69 | 'cake', 70 | 'chair', 71 | 'couch', 72 | 'potted plant', 73 | 'bed', 74 | 'mirror', 75 | 'dining table', 76 | 'window', 77 | 'desk', 78 | 'toilet', 79 | 'door', 80 | 'tv', 81 | 'laptop', 82 | 'mouse', 83 | 'remote', 84 | 'keyboard', 85 | 'cell phone', 86 | 'microwave', 87 | 'oven', 88 | 'toaster', 89 | 'sink', 90 | 'refrigerator', 91 | 'blender', 92 | 'book', 93 | 'clock', 94 | 'vase', 95 | 'scissors', 96 | 'teddy bear', 97 | 'hair drier', 98 | 'toothbrush', 99 | ] 100 | 101 | EGOHANDS_CLASSES_LIST = [ 102 | 'background', 103 | 'hand', 104 | ] 105 | 106 | 107 | def get_cls_dict(model): 108 | """Get the class ID to name translation dictionary.""" 109 | if model == 'coco': 110 | cls_list = COCO_CLASSES_LIST 111 | elif model == 'egohands': 112 | cls_list = EGOHANDS_CLASSES_LIST 113 | else: 114 | raise ValueError('Bad model name') 115 | return {i: n for i, n in enumerate(cls_list)} 116 | -------------------------------------------------------------------------------- /utils/ssd_tf.py: -------------------------------------------------------------------------------- 1 | """ssd_tf.py 2 | 3 | This module implements the TfSSD class. 4 | """ 5 | 6 | 7 | import numpy as np 8 | import cv2 9 | import tensorflow as tf 10 | 11 | 12 | def _postprocess_tf(img, boxes, scores, classes, conf_th): 13 | """Postprocess TensorFlow SSD output.""" 14 | h, w, _ = img.shape 15 | out_boxes = boxes[0] * np.array([h, w, h, w]) 16 | out_boxes = out_boxes.astype(np.int32) 17 | out_boxes = out_boxes[:, [1, 0, 3, 2]] # swap x's and y's 18 | out_confs = scores[0] 19 | out_clss = classes[0].astype(np.int32) 20 | 21 | # only return bboxes with confidence score above threshold 22 | mask = np.where(out_confs >= conf_th) 23 | return out_boxes[mask], out_confs[mask], out_clss[mask] 24 | 25 | 26 | class TfSSD(object): 27 | """TfSSD class encapsulates things needed to run TensorFlow SSD.""" 28 | 29 | def __init__(self, model, input_shape): 30 | self.model = model 31 | self.input_shape = input_shape 32 | 33 | # load detection graph 34 | ssd_graph = tf.Graph() 35 | with ssd_graph.as_default(): 36 | graph_def = tf.GraphDef() 37 | with tf.gfile.GFile('ssd/%s.pb' % model, 'rb') as fid: 38 | serialized_graph = fid.read() 39 | graph_def.ParseFromString(serialized_graph) 40 | tf.import_graph_def(graph_def, name='') 41 | 42 | # define input/output tensors 43 | self.image_tensor = ssd_graph.get_tensor_by_name('image_tensor:0') 44 | self.det_boxes = ssd_graph.get_tensor_by_name('detection_boxes:0') 45 | self.det_scores = ssd_graph.get_tensor_by_name('detection_scores:0') 46 | self.det_classes = ssd_graph.get_tensor_by_name('detection_classes:0') 47 | 48 | # create the session for inferencing 49 | self.sess = tf.Session(graph=ssd_graph) 50 | 51 | def __del__(self): 52 | self.sess.close() 53 | 54 | def detect(self, img, conf_th): 55 | img_resized = _preprocess_tf(img, self.input_shape) 56 | boxes, scores, classes = self.sess.run( 57 | [self.det_boxes, self.det_scores, self.det_classes], 58 | feed_dict={self.image_tensor: np.expand_dims(img_resized, 0)}) 59 | return _postprocess_tf(img, boxes, scores, classes, conf_th) 60 | -------------------------------------------------------------------------------- /utils/visualization.py: -------------------------------------------------------------------------------- 1 | """visualization.py 2 | 3 | The BBoxVisualization class implements drawing of nice looking 4 | bounding boxes based on object detection results. 5 | """ 6 | 7 | 8 | import numpy as np 9 | import cv2 10 | 11 | 12 | # Constants 13 | ALPHA = 0.5 14 | FONT = cv2.FONT_HERSHEY_PLAIN 15 | TEXT_SCALE = 1.0 16 | TEXT_THICKNESS = 1 17 | BLACK = (0, 0, 0) 18 | WHITE = (255, 255, 255) 19 | 20 | 21 | def gen_colors(num_colors): 22 | """Generate different colors. 23 | 24 | # Arguments 25 | num_colors: total number of colors/classes. 26 | 27 | # Output 28 | bgrs: a list of (B, G, R) tuples which correspond to each of 29 | the colors/classes. 30 | """ 31 | import random 32 | import colorsys 33 | 34 | hsvs = [[float(x) / num_colors, 1., 0.7] for x in range(num_colors)] 35 | random.seed(1234) 36 | random.shuffle(hsvs) 37 | rgbs = list(map(lambda x: list(colorsys.hsv_to_rgb(*x)), hsvs)) 38 | bgrs = [(int(rgb[2] * 255), int(rgb[1] * 255), int(rgb[0] * 255)) 39 | for rgb in rgbs] 40 | return bgrs 41 | 42 | 43 | def draw_boxed_text(img, text, topleft, color): 44 | """Draw a transluent boxed text in white, overlayed on top of a 45 | colored patch surrounded by a black border. FONT, TEXT_SCALE, 46 | TEXT_THICKNESS and ALPHA values are constants (fixed) as defined 47 | on top. 48 | 49 | # Arguments 50 | img: the input image as a numpy array. 51 | text: the text to be drawn. 52 | topleft: XY coordinate of the topleft corner of the boxed text. 53 | color: color of the patch, i.e. background of the text. 54 | 55 | # Output 56 | img: note the original image is modified inplace. 57 | """ 58 | assert img.dtype == np.uint8 59 | img_h, img_w, _ = img.shape 60 | if topleft[0] >= img_w or topleft[1] >= img_h: 61 | return img 62 | margin = 3 63 | size = cv2.getTextSize(text, FONT, TEXT_SCALE, TEXT_THICKNESS) 64 | w = size[0][0] + margin * 2 65 | h = size[0][1] + margin * 2 66 | # the patch is used to draw boxed text 67 | patch = np.zeros((h, w, 3), dtype=np.uint8) 68 | patch[...] = color 69 | cv2.putText(patch, text, (margin+1, h-margin-2), FONT, TEXT_SCALE, 70 | WHITE, thickness=TEXT_THICKNESS, lineType=cv2.LINE_8) 71 | cv2.rectangle(patch, (0, 0), (w-1, h-1), BLACK, thickness=1) 72 | w = min(w, img_w - topleft[0]) # clip overlay at image boundary 73 | h = min(h, img_h - topleft[1]) 74 | # Overlay the boxed text onto region of interest (roi) in img 75 | roi = img[topleft[1]:topleft[1]+h, topleft[0]:topleft[0]+w, :] 76 | cv2.addWeighted(patch[0:h, 0:w, :], ALPHA, roi, 1 - ALPHA, 0, roi) 77 | return img 78 | 79 | 80 | class BBoxVisualization(): 81 | """BBoxVisualization class implements nice drawing of boudning boxes. 82 | 83 | # Arguments 84 | cls_dict: a dictionary used to translate class id to its name. 85 | """ 86 | 87 | def __init__(self, cls_dict): 88 | self.cls_dict = cls_dict 89 | self.colors = gen_colors(len(cls_dict)) 90 | 91 | def draw_bboxes(self, img, boxes, confs, clss): 92 | """Draw detected bounding boxes on the original image.""" 93 | for bb, cf, cl in zip(boxes, confs, clss): 94 | cl = int(cl) 95 | x_min, y_min, x_max, y_max = bb[0], bb[1], bb[2], bb[3] 96 | color = self.colors[cl] 97 | cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color, 2) 98 | txt_loc = (max(x_min+2, 0), max(y_min+2, 0)) 99 | cls_name = self.cls_dict.get(cl, 'CLS{}'.format(cl)) 100 | txt = '{} {:.2f}'.format(cls_name, cf) 101 | img = draw_boxed_text(img, txt, txt_loc, color) 102 | return img 103 | -------------------------------------------------------------------------------- /utils/writer.py: -------------------------------------------------------------------------------- 1 | """writer.py 2 | """ 3 | 4 | 5 | import subprocess 6 | 7 | import cv2 8 | 9 | 10 | def get_video_writer(name, width, height, fps=30): 11 | """Get a VideoWriter object for saving output video. 12 | 13 | This function tries to use Jetson's hardware H.264 encoder (omxh264enc) 14 | if available, in which case the output video would be a MPEG-2 TS file. 15 | Otherwise, it uses cv2's built-in encoding mechanism and saves a MP4 16 | file. 17 | """ 18 | gst_elements = str(subprocess.check_output('gst-inspect-1.0')) 19 | if 'omxh264dec' in gst_elements: 20 | filename = name + '.ts' # Transport Stream 21 | gst_str = ('appsrc ! videoconvert ! omxh264enc ! mpegtsmux ! ' 22 | 'filesink location=%s') % filename 23 | return cv2.VideoWriter( 24 | gst_str, cv2.CAP_GSTREAMER, 0, fps, (width, height)) 25 | else: 26 | filename = name + '.mp4' # MP4 27 | return cv2.VideoWriter( 28 | filename, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height)) 29 | 30 | 31 | -------------------------------------------------------------------------------- /utils/yolo_classes.py: -------------------------------------------------------------------------------- 1 | """yolo_classes.py 2 | 3 | NOTE: Number of YOLO COCO output classes differs from SSD COCO models. 4 | """ 5 | 6 | COCO_CLASSES_LIST = [ 7 | 'person', 8 | 'bicycle', 9 | 'car', 10 | 'motorbike', 11 | 'aeroplane', 12 | 'bus', 13 | 'train', 14 | 'truck', 15 | 'boat', 16 | 'traffic light', 17 | 'fire hydrant', 18 | 'stop sign', 19 | 'parking meter', 20 | 'bench', 21 | 'bird', 22 | 'cat', 23 | 'dog', 24 | 'horse', 25 | 'sheep', 26 | 'cow', 27 | 'elephant', 28 | 'bear', 29 | 'zebra', 30 | 'giraffe', 31 | 'backpack', 32 | 'umbrella', 33 | 'handbag', 34 | 'tie', 35 | 'suitcase', 36 | 'frisbee', 37 | 'skis', 38 | 'snowboard', 39 | 'sports ball', 40 | 'kite', 41 | 'baseball bat', 42 | 'baseball glove', 43 | 'skateboard', 44 | 'surfboard', 45 | 'tennis racket', 46 | 'bottle', 47 | 'wine glass', 48 | 'cup', 49 | 'fork', 50 | 'knife', 51 | 'spoon', 52 | 'bowl', 53 | 'banana', 54 | 'apple', 55 | 'sandwich', 56 | 'orange', 57 | 'broccoli', 58 | 'carrot', 59 | 'hot dog', 60 | 'pizza', 61 | 'donut', 62 | 'cake', 63 | 'chair', 64 | 'sofa', 65 | 'pottedplant', 66 | 'bed', 67 | 'diningtable', 68 | 'toilet', 69 | 'tvmonitor', 70 | 'laptop', 71 | 'mouse', 72 | 'remote', 73 | 'keyboard', 74 | 'cell phone', 75 | 'microwave', 76 | 'oven', 77 | 'toaster', 78 | 'sink', 79 | 'refrigerator', 80 | 'book', 81 | 'clock', 82 | 'vase', 83 | 'scissors', 84 | 'teddy bear', 85 | 'hair drier', 86 | 'toothbrush', 87 | ] 88 | 89 | # For translating YOLO class ids (0~79) to SSD class ids (0~90) 90 | yolo_cls_to_ssd = [ 91 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 92 | 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 93 | 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 94 | 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 95 | 80, 81, 82, 84, 85, 86, 87, 88, 89, 90, 96 | ] 97 | 98 | 99 | def get_cls_dict(category_num): 100 | """Get the class ID to name translation dictionary.""" 101 | if category_num == 80: 102 | return {i: n for i, n in enumerate(COCO_CLASSES_LIST)} 103 | else: 104 | return {i: 'CLS%d' % i for i in range(category_num)} 105 | -------------------------------------------------------------------------------- /utils/yolo_with_plugins.py: -------------------------------------------------------------------------------- 1 | """yolo_with_plugins.py 2 | 3 | Implementation of TrtYOLO class with the yolo_layer plugins. 4 | """ 5 | 6 | 7 | from __future__ import print_function 8 | 9 | import ctypes 10 | 11 | import numpy as np 12 | import cv2 13 | import tensorrt as trt 14 | import pycuda.driver as cuda 15 | 16 | 17 | try: 18 | ctypes.cdll.LoadLibrary('./plugins/libyolo_layer.so') 19 | except OSError as e: 20 | raise SystemExit('ERROR: failed to load ./plugins/libyolo_layer.so. ' 21 | 'Did you forget to do a "make" in the "./plugins/" ' 22 | 'subdirectory?') from e 23 | 24 | 25 | def _preprocess_yolo(img, input_shape, letter_box=False): 26 | """Preprocess an image before TRT YOLO inferencing. 27 | 28 | # Args 29 | img: int8 numpy array of shape (img_h, img_w, 3) 30 | input_shape: a tuple of (H, W) 31 | letter_box: boolean, specifies whether to keep aspect ratio and 32 | create a "letterboxed" image for inference 33 | 34 | # Returns 35 | preprocessed img: float32 numpy array of shape (3, H, W) 36 | """ 37 | if letter_box: 38 | img_h, img_w, _ = img.shape 39 | new_h, new_w = input_shape[0], input_shape[1] 40 | offset_h, offset_w = 0, 0 41 | if (new_w / img_w) <= (new_h / img_h): 42 | new_h = int(img_h * new_w / img_w) 43 | offset_h = (input_shape[0] - new_h) // 2 44 | else: 45 | new_w = int(img_w * new_h / img_h) 46 | offset_w = (input_shape[1] - new_w) // 2 47 | resized = cv2.resize(img, (new_w, new_h)) 48 | img = np.full((input_shape[0], input_shape[1], 3), 127, dtype=np.uint8) 49 | img[offset_h:(offset_h + new_h), offset_w:(offset_w + new_w), :] = resized 50 | else: 51 | img = cv2.resize(img, (input_shape[1], input_shape[0])) 52 | 53 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 54 | img = img.transpose((2, 0, 1)).astype(np.float32) 55 | img /= 255.0 56 | return img 57 | 58 | 59 | def _nms_boxes(detections, nms_threshold): 60 | """Apply the Non-Maximum Suppression (NMS) algorithm on the bounding 61 | boxes with their confidence scores and return an array with the 62 | indexes of the bounding boxes we want to keep. 63 | 64 | # Args 65 | detections: Nx7 numpy arrays of 66 | [[x, y, w, h, box_confidence, class_id, class_prob], 67 | ......] 68 | """ 69 | x_coord = detections[:, 0] 70 | y_coord = detections[:, 1] 71 | width = detections[:, 2] 72 | height = detections[:, 3] 73 | box_confidences = detections[:, 4] * detections[:, 6] 74 | 75 | areas = width * height 76 | ordered = box_confidences.argsort()[::-1] 77 | 78 | keep = list() 79 | while ordered.size > 0: 80 | # Index of the current element: 81 | i = ordered[0] 82 | keep.append(i) 83 | xx1 = np.maximum(x_coord[i], x_coord[ordered[1:]]) 84 | yy1 = np.maximum(y_coord[i], y_coord[ordered[1:]]) 85 | xx2 = np.minimum(x_coord[i] + width[i], x_coord[ordered[1:]] + width[ordered[1:]]) 86 | yy2 = np.minimum(y_coord[i] + height[i], y_coord[ordered[1:]] + height[ordered[1:]]) 87 | 88 | width1 = np.maximum(0.0, xx2 - xx1 + 1) 89 | height1 = np.maximum(0.0, yy2 - yy1 + 1) 90 | intersection = width1 * height1 91 | union = (areas[i] + areas[ordered[1:]] - intersection) 92 | iou = intersection / union 93 | indexes = np.where(iou <= nms_threshold)[0] 94 | ordered = ordered[indexes + 1] 95 | 96 | keep = np.array(keep) 97 | return keep 98 | 99 | 100 | def _postprocess_yolo(trt_outputs, img_w, img_h, conf_th, nms_threshold, 101 | input_shape, letter_box=False): 102 | """Postprocess TensorRT outputs. 103 | 104 | # Args 105 | trt_outputs: a list of 2 or 3 tensors, where each tensor 106 | contains a multiple of 7 float32 numbers in 107 | the order of [x, y, w, h, box_confidence, class_id, class_prob] 108 | conf_th: confidence threshold 109 | letter_box: boolean, referring to _preprocess_yolo() 110 | 111 | # Returns 112 | boxes, scores, classes (after NMS) 113 | """ 114 | # filter low-conf detections and concatenate results of all yolo layers 115 | detections = [] 116 | for o in trt_outputs: 117 | dets = o.reshape((-1, 7)) 118 | dets = dets[dets[:, 4] * dets[:, 6] >= conf_th] 119 | detections.append(dets) 120 | detections = np.concatenate(detections, axis=0) 121 | 122 | if len(detections) == 0: 123 | boxes = np.zeros((0, 4), dtype=np.int) 124 | scores = np.zeros((0,), dtype=np.float32) 125 | classes = np.zeros((0,), dtype=np.float32) 126 | else: 127 | box_scores = detections[:, 4] * detections[:, 6] 128 | 129 | # scale x, y, w, h from [0, 1] to pixel values 130 | old_h, old_w = img_h, img_w 131 | offset_h, offset_w = 0, 0 132 | if letter_box: 133 | if (img_w / input_shape[1]) >= (img_h / input_shape[0]): 134 | old_h = int(input_shape[0] * img_w / input_shape[1]) 135 | offset_h = (old_h - img_h) // 2 136 | else: 137 | old_w = int(input_shape[1] * img_h / input_shape[0]) 138 | offset_w = (old_w - img_w) // 2 139 | detections[:, 0:4] *= np.array( 140 | [old_w, old_h, old_w, old_h], dtype=np.float32) 141 | 142 | # NMS 143 | nms_detections = np.zeros((0, 7), dtype=detections.dtype) 144 | for class_id in set(detections[:, 5]): 145 | idxs = np.where(detections[:, 5] == class_id) 146 | cls_detections = detections[idxs] 147 | keep = _nms_boxes(cls_detections, nms_threshold) 148 | nms_detections = np.concatenate( 149 | [nms_detections, cls_detections[keep]], axis=0) 150 | 151 | xx = nms_detections[:, 0].reshape(-1, 1) 152 | yy = nms_detections[:, 1].reshape(-1, 1) 153 | if letter_box: 154 | xx = xx - offset_w 155 | yy = yy - offset_h 156 | ww = nms_detections[:, 2].reshape(-1, 1) 157 | hh = nms_detections[:, 3].reshape(-1, 1) 158 | boxes = np.concatenate([xx, yy, xx+ww, yy+hh], axis=1) + 0.5 159 | boxes = boxes.astype(np.int) 160 | scores = nms_detections[:, 4] * nms_detections[:, 6] 161 | classes = nms_detections[:, 5] 162 | return boxes, scores, classes 163 | 164 | 165 | class HostDeviceMem(object): 166 | """Simple helper data class that's a little nicer to use than a 2-tuple.""" 167 | def __init__(self, host_mem, device_mem): 168 | self.host = host_mem 169 | self.device = device_mem 170 | 171 | def __str__(self): 172 | return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device) 173 | 174 | def __repr__(self): 175 | return self.__str__() 176 | 177 | 178 | def get_input_shape(engine): 179 | """Get input shape of the TensorRT YOLO engine.""" 180 | binding = engine[0] 181 | assert engine.binding_is_input(binding) 182 | binding_dims = engine.get_binding_shape(binding) 183 | if len(binding_dims) == 4: 184 | return tuple(binding_dims[2:]) 185 | elif len(binding_dims) == 3: 186 | return tuple(binding_dims[1:]) 187 | else: 188 | raise ValueError('bad dims of binding %s: %s' % (binding, str(binding_dims))) 189 | 190 | 191 | def allocate_buffers(engine): 192 | """Allocates all host/device in/out buffers required for an engine.""" 193 | inputs = [] 194 | outputs = [] 195 | bindings = [] 196 | output_idx = 0 197 | stream = cuda.Stream() 198 | assert 3 <= len(engine) <= 5 # expect 1 input, plus 2~4 outpus 199 | for binding in engine: 200 | binding_dims = engine.get_binding_shape(binding) 201 | if len(binding_dims) == 4: 202 | # explicit batch case (TensorRT 7+) 203 | size = trt.volume(binding_dims) 204 | elif len(binding_dims) == 3: 205 | # implicit batch case (TensorRT 6 or older) 206 | size = trt.volume(binding_dims) * engine.max_batch_size 207 | else: 208 | raise ValueError('bad dims of binding %s: %s' % (binding, str(binding_dims))) 209 | dtype = trt.nptype(engine.get_binding_dtype(binding)) 210 | # Allocate host and device buffers 211 | host_mem = cuda.pagelocked_empty(size, dtype) 212 | device_mem = cuda.mem_alloc(host_mem.nbytes) 213 | # Append the device buffer to device bindings. 214 | bindings.append(int(device_mem)) 215 | # Append to the appropriate list. 216 | if engine.binding_is_input(binding): 217 | inputs.append(HostDeviceMem(host_mem, device_mem)) 218 | else: 219 | # each grid has 3 anchors, each anchor generates a detection 220 | # output of 7 float32 values 221 | assert size % 7 == 0 222 | outputs.append(HostDeviceMem(host_mem, device_mem)) 223 | output_idx += 1 224 | return inputs, outputs, bindings, stream 225 | 226 | 227 | def do_inference(context, bindings, inputs, outputs, stream, batch_size=1): 228 | """do_inference (for TensorRT 6.x or lower) 229 | 230 | This function is generalized for multiple inputs/outputs. 231 | Inputs and outputs are expected to be lists of HostDeviceMem objects. 232 | """ 233 | # Transfer input data to the GPU. 234 | [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs] 235 | # Run inference. 236 | context.execute_async(batch_size=batch_size, 237 | bindings=bindings, 238 | stream_handle=stream.handle) 239 | # Transfer predictions back from the GPU. 240 | [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs] 241 | # Synchronize the stream 242 | stream.synchronize() 243 | # Return only the host outputs. 244 | return [out.host for out in outputs] 245 | 246 | 247 | def do_inference_v2(context, bindings, inputs, outputs, stream): 248 | """do_inference_v2 (for TensorRT 7.0+) 249 | 250 | This function is generalized for multiple inputs/outputs for full 251 | dimension networks. 252 | Inputs and outputs are expected to be lists of HostDeviceMem objects. 253 | """ 254 | # Transfer input data to the GPU. 255 | [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs] 256 | # Run inference. 257 | context.execute_async_v2(bindings=bindings, stream_handle=stream.handle) 258 | # Transfer predictions back from the GPU. 259 | [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs] 260 | # Synchronize the stream 261 | stream.synchronize() 262 | # Return only the host outputs. 263 | return [out.host for out in outputs] 264 | 265 | 266 | class TrtYOLO(object): 267 | """TrtYOLO class encapsulates things needed to run TRT YOLO.""" 268 | 269 | def _load_engine(self): 270 | TRTbin = 'yolo/%s.trt' % self.model 271 | with open(TRTbin, 'rb') as f, trt.Runtime(self.trt_logger) as runtime: 272 | return runtime.deserialize_cuda_engine(f.read()) 273 | 274 | def __init__(self, model, category_num=80, letter_box=False, cuda_ctx=None): 275 | """Initialize TensorRT plugins, engine and conetxt.""" 276 | self.model = model 277 | self.category_num = category_num 278 | self.letter_box = letter_box 279 | self.cuda_ctx = cuda_ctx 280 | if self.cuda_ctx: 281 | self.cuda_ctx.push() 282 | 283 | self.inference_fn = do_inference if trt.__version__[0] < '7' \ 284 | else do_inference_v2 285 | self.trt_logger = trt.Logger(trt.Logger.INFO) 286 | self.engine = self._load_engine() 287 | 288 | self.input_shape = get_input_shape(self.engine) 289 | 290 | try: 291 | self.context = self.engine.create_execution_context() 292 | self.inputs, self.outputs, self.bindings, self.stream = \ 293 | allocate_buffers(self.engine) 294 | except Exception as e: 295 | raise RuntimeError('fail to allocate CUDA resources') from e 296 | finally: 297 | if self.cuda_ctx: 298 | self.cuda_ctx.pop() 299 | 300 | def __del__(self): 301 | """Free CUDA memories.""" 302 | del self.outputs 303 | del self.inputs 304 | del self.stream 305 | 306 | def detect(self, img, conf_th=0.3, letter_box=None): 307 | """Detect objects in the input image.""" 308 | letter_box = self.letter_box if letter_box is None else letter_box 309 | img_resized = _preprocess_yolo(img, self.input_shape, letter_box) 310 | 311 | # Set host input to the image. The do_inference() function 312 | # will copy the input to the GPU before executing. 313 | self.inputs[0].host = np.ascontiguousarray(img_resized) 314 | if self.cuda_ctx: 315 | self.cuda_ctx.push() 316 | trt_outputs = self.inference_fn( 317 | context=self.context, 318 | bindings=self.bindings, 319 | inputs=self.inputs, 320 | outputs=self.outputs, 321 | stream=self.stream) 322 | if self.cuda_ctx: 323 | self.cuda_ctx.pop() 324 | 325 | boxes, scores, classes = _postprocess_yolo( 326 | trt_outputs, img.shape[1], img.shape[0], conf_th, 327 | nms_threshold=0.5, input_shape=self.input_shape, 328 | letter_box=letter_box) 329 | 330 | # clip x1, y1, x2, y2 within original image 331 | boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, img.shape[1]-1) 332 | boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, img.shape[0]-1) 333 | return boxes, scores, classes 334 | -------------------------------------------------------------------------------- /zed.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import pyzed.sl as sl 4 | import cv2 5 | import math 6 | import logging 7 | import getopt 8 | 9 | log = logging.getLogger(__name__) 10 | logging.basicConfig(level=logging.INFO) 11 | 12 | def main(argv) : 13 | config_path = "yolov4-tiny.cfg" 14 | weight_path = "yolov4-tiny.weights" 15 | meta_path = "coco.names" 16 | svo_path = None 17 | zed_id = 0 18 | 19 | help_str = 'zed_yolo.py -c -w -m -s -z ' 20 | 21 | try: 22 | opts, args = getopt.getopt( 23 | argv, "hc:w:m:s:z:", ["config=", "weight=", "meta=", "svo_file=", "zed_id="]) 24 | except getopt.GetoptError: 25 | log.exception(help_str) 26 | sys.exit(2) 27 | 28 | for opt, arg in opts: 29 | if opt == '-h': 30 | log.info(help_str) 31 | sys.exit() 32 | elif opt in ("-c", "--config"): 33 | config_path = arg 34 | elif opt in ("-w", "--weight"): 35 | weight_path = arg 36 | elif opt in ("-m", "--meta"): 37 | meta_path = arg 38 | elif opt in ("-s", "--svo_file"): 39 | svo_path = arg 40 | elif opt in ("-z", "--zed_id"): 41 | zed_id = int(arg) 42 | 43 | # Set configuration parameters 44 | input_type = sl.InputType() 45 | 46 | if svo_path is not None: 47 | log.info("SVO file : " + svo_path) 48 | input_type.set_from_svo_file(svo_path) 49 | else: 50 | # Launch camera by id 51 | input_type.set_from_camera_id(zed_id) 52 | 53 | # Create a ZED camera object 54 | zed = sl.Camera() 55 | 56 | # Set configuration parameters 57 | input_type = sl.InputType() 58 | 59 | init = sl.InitParameters(input_t=input_type) 60 | init.camera_resolution = sl.RESOLUTION.HD1080 61 | init.depth_mode = sl.DEPTH_MODE.PERFORMANCE 62 | init.coordinate_units = sl.UNIT.MILLIMETER 63 | 64 | # Open the camera 65 | err = zed.open(init) 66 | if err != sl.ERROR_CODE.SUCCESS : 67 | print(repr(err)) 68 | zed.close() 69 | exit(1) 70 | 71 | 72 | # Set runtime parameters after opening the camera 73 | runtime = sl.RuntimeParameters() 74 | runtime.sensing_mode = sl.SENSING_MODE.STANDARD 75 | 76 | # Prepare new image size to retrieve half-resolution images 77 | image_size = zed.get_camera_information().camera_resolution 78 | image_size.width = image_size.width 79 | image_size.height = image_size.height 80 | 81 | # Declare your sl.Mat matrices 82 | image_zed = sl.Mat(image_size.width, image_size.height, sl.MAT_TYPE.U8_C4) 83 | depth_image_zed = sl.Mat(image_size.width, image_size.height, sl.MAT_TYPE.U8_C4) 84 | point_cloud = sl.Mat() 85 | #======================================= yolov4 video test et ============================================ 86 | #======== Yolov4 Tiny ağırlıklarını yüklemektedir =================== 87 | weightsPath_tiny = weight_path 88 | configPath_tiny = config_path 89 | 90 | net = cv2.dnn.readNet(weightsPath_tiny, configPath_tiny) 91 | net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA) 92 | net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16) 93 | model = cv2.dnn_DetectionModel(net) 94 | 95 | 96 | 97 | def YOLOv4_video(pred_image): 98 | model.setInputParams(size=(416, 416), scale=1/255, swapRB=True) 99 | image_test = cv2.cvtColor(pred_image, cv2.COLOR_RGBA2RGB) 100 | image = image_test.copy() 101 | print('image',image.shape) 102 | confThreshold= 0.5 103 | nmsThreshold = 0.4 104 | classes, confidences, boxes = model.detect(image, confThreshold, nmsThreshold) 105 | 106 | return classes,confidences,boxes 107 | 108 | LABELS = [] 109 | with open(meta_path, 'r') as f: 110 | LABELS = [cname.strip() for cname in f.readlines()] 111 | 112 | COLORS = [[0, 0, 255], [30, 255, 255], [0,255,0]] 113 | 114 | frame_count = 0 115 | 116 | exit_flag = True 117 | 118 | while(exit_flag == True): 119 | err = zed.grab(runtime) 120 | if err == sl.ERROR_CODE.SUCCESS : 121 | # Retrieve the left image, depth image in the half-resolution 122 | zed.retrieve_image(image_zed, sl.VIEW.LEFT, sl.MEM.CPU, image_size) 123 | zed.retrieve_image(depth_image_zed, sl.VIEW.DEPTH, sl.MEM.CPU, image_size) 124 | # Retrieve the RGBA point cloud in half resolution 125 | zed.retrieve_measure(point_cloud, sl.MEASURE.XYZRGBA, sl.MEM.CPU, image_size) 126 | 127 | # Get and print distance value in mm at the center of the image 128 | # We measure the distance camera - object using Euclidean distance 129 | 130 | # To recover data from sl.Mat to use it with opencv, use the get_data() method 131 | # It returns a numpy array that can be used as a matrix with opencv 132 | image_ocv = image_zed.get_data() 133 | #depth_image_ocv = depth_image_zed.get_data() 134 | classes,confidences,boxes = YOLOv4_video(image_ocv) 135 | 136 | for cl,score,(left,top,width,height) in zip(classes,confidences,boxes): 137 | start_pooint = (int(left),int(top)) 138 | end_point = (int(left+width),int(top+height)) 139 | 140 | x = int(left + width/2) 141 | y = int(top + height/2) 142 | 143 | color = COLORS[0] 144 | 145 | img =cv2.rectangle(image_ocv,start_pooint,end_point,color,3) 146 | img = cv2.circle(img,(x,y),5,[0,0,255],5) 147 | text = f'{LABELS[cl]}: {score:0.2f}' 148 | cv2.putText(img, text, (int(left), int(top-7)), cv2.FONT_ITALIC, 1, COLORS[0], 2 ) 149 | 150 | x = round(x) 151 | y = round(y) 152 | 153 | err, point_cloud_value = point_cloud.get_value(x, y) 154 | distance = math.sqrt(point_cloud_value[0] * point_cloud_value[0] + point_cloud_value[1] * point_cloud_value[1] + point_cloud_value[2] * point_cloud_value[2]) 155 | 156 | print("Distance to Camera at (class : {0}, score : {1:0.2f}): distance : {2:0.2f} mm".format(LABELS[cl], score, distance), end="\r") 157 | 158 | cv2.putText(img,"Distance: "+str(round(distance/1000,2))+'m', (int(left), int(top + 25)) , cv2.FONT_HERSHEY_COMPLEX, 1, COLORS[1], 2) 159 | 160 | cv2.imshow("Image", img) 161 | 162 | frame_count = frame_count + 1 163 | 164 | if cv2.waitKey(1) & 0xFF == ord('q'): 165 | exit_flag = False 166 | 167 | 168 | cv2.destroyAllWindows() 169 | zed.close() 170 | 171 | print("\nFINISH") 172 | 173 | if __name__ == "__main__": 174 | main(sys.argv[1:]) 175 | -------------------------------------------------------------------------------- /zed_trt.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import pyzed.sl as sl 4 | import cv2 5 | import math 6 | from utils.yolo_classes import get_cls_dict 7 | from utils.visualization import BBoxVisualization 8 | from utils.yolo_with_plugins import TrtYOLO 9 | import os 10 | import pycuda.autoinit 11 | import time 12 | def main() : 13 | 14 | # Create a ZED camera object 15 | zed = sl.Camera() 16 | 17 | # Set configuration parameters 18 | input_type = sl.InputType() 19 | if len(sys.argv) >= 2 : 20 | input_type.set_from_svo_file(sys.argv[1]) 21 | init = sl.InitParameters(input_t=input_type) 22 | init.camera_resolution = sl.RESOLUTION.HD1080 23 | init.depth_mode = sl.DEPTH_MODE.PERFORMANCE 24 | init.coordinate_units = sl.UNIT.MILLIMETER 25 | 26 | # Open the camera 27 | err = zed.open(init) 28 | if err != sl.ERROR_CODE.SUCCESS : 29 | print(repr(err)) 30 | zed.close() 31 | exit(1) 32 | 33 | 34 | # Set runtime parameters after opening the camera 35 | runtime = sl.RuntimeParameters() 36 | runtime.sensing_mode = sl.SENSING_MODE.STANDARD 37 | 38 | # Prepare new image size to retrieve half-resolution images 39 | image_size = zed.get_camera_information().camera_resolution 40 | #image_size.width = image_size.width /2 41 | #image_size.height = image_size.height /2 42 | 43 | # Declare your sl.Mat matrices 44 | image_zed = sl.Mat(image_size.width, image_size.height, sl.MAT_TYPE.U8_C4) 45 | depth_image_zed = sl.Mat(image_size.width, image_size.height, sl.MAT_TYPE.U8_C4) 46 | point_cloud = sl.Mat() 47 | 48 | 49 | #=========== Yolov4 TensorRt ağırlıkları yüklenmektedir ======================= 50 | 51 | category_num = 17 52 | model_trt = 'yolov4' 53 | letter_box = False 54 | if category_num <= 0: 55 | raise SystemExit('ERROR: bad category_num (%d)!' % category_num) 56 | if not os.path.isfile('yolo/{}.trt'.format(model_trt)): 57 | raise SystemExit('ERROR: file (yolo/{}.trt) not found!'.format(model_trt)) 58 | 59 | cls_dict = get_cls_dict(category_num) 60 | vis = BBoxVisualization(cls_dict) 61 | trt_yolov4 = TrtYOLO(model_trt, category_num, letter_box) 62 | 63 | def YOLOv4_video(pred_image): 64 | image_test = cv2.cvtColor(pred_image, cv2.COLOR_RGBA2RGB) 65 | image = image_test.copy() 66 | boxes, confs, clss = trt_yolov4.detect(image, conf_th=0.3) 67 | return clss,confs,boxes 68 | 69 | 70 | key = ' ' 71 | LABELS = [ 'girilmez', 72 | 'tasit_trafigine_kapali', 73 | 'duz_veya_sola', 74 | 'duz_veya_saga', 75 | 'yalnizca_sola', 76 | '20_hiz_limiti_sonu', 77 | '30_limit', 78 | '20_limit', 79 | 'yalnizca_saga', 80 | 'saga_donulmez', 81 | 'sola_donulmez', 82 | 'dur', 83 | 'park_yapilmaz', 84 | 'park', 85 | 'durak', 86 | 'kirmizi_isk', 87 | 'sari_isik', 88 | 'yesil_isik'] 89 | 90 | COLORS = [[0, 0, 255]] 91 | prev_frame_time=0 92 | new_frame_time=0 93 | while key != 113 : 94 | err = zed.grab(runtime) 95 | if err == sl.ERROR_CODE.SUCCESS : 96 | # Retrieve the left image, depth image in the half-resolution 97 | zed.retrieve_image(image_zed, sl.VIEW.LEFT, sl.MEM.CPU, image_size) 98 | zed.retrieve_image(depth_image_zed, sl.VIEW.DEPTH, sl.MEM.CPU, image_size) 99 | # Retrieve the RGBA point cloud in half resolution 100 | zed.retrieve_measure(point_cloud, sl.MEASURE.XYZRGBA, sl.MEM.CPU, image_size) 101 | 102 | # Get and print distance value in mm at the center of the image 103 | # We measure the distance camera - object using Euclidean distance 104 | 105 | # To recover data from sl.Mat to use it with opencv, use the get_data() method 106 | # It returns a numpy array that can be used as a matrix with opencv 107 | image_ocv = image_zed.get_data() 108 | #depth_image_ocv = depth_image_zed.get_data() 109 | classes,confidences,boxes = YOLOv4_video(image_ocv) 110 | 111 | for cl,score,(x_min,y_min,x_max,y_max) in zip(classes,confidences,boxes): 112 | start_pooint = (int(x_min),int(y_min)) 113 | end_point = (int(x_max),int(y_max)) 114 | 115 | x = int(x_min +( x_max-x_min)/2) 116 | y = int(y_min + (y_max-y_min)/2) 117 | color = COLORS[0] 118 | img =cv2.rectangle(image_ocv,start_pooint,end_point,color,3) 119 | img = cv2.circle(img,(x,y),5,[0,0,255],5) 120 | text = f'{LABELS[int(cl)]}: {score:0.2f}' 121 | cv2.putText(img,text,(int(x_min),int(y_min-7)),cv2.FONT_ITALIC,1,COLORS[0],2 ) 122 | 123 | x = round(x) 124 | y = round(y) 125 | err, point_cloud_value = point_cloud.get_value(x, y) 126 | distance = math.sqrt(point_cloud_value[0] * point_cloud_value[0] + 127 | point_cloud_value[1] * point_cloud_value[1] + 128 | point_cloud_value[2] * point_cloud_value[2]) 129 | print("Distance to Camera at (class : {0}, score : {1:0.2f}): distance : {2:0.2f} mm".format(LABELS[int(cl)], score, distance), end="\r") 130 | cv2.putText(img,"Distance: "+str(round(distance/1000,2))+'m',(int(x_max-180),int(y_max+30)),cv2.FONT_HERSHEY_COMPLEX,1,(0,255,0),1) 131 | 132 | new_frame_time=time.time() 133 | fps = 1/(new_frame_time-prev_frame_time) 134 | prev_frame_time = new_frame_time 135 | 136 | print('FPS : %.2f ' % fps) 137 | cv2.imshow("Image", img) 138 | 139 | 140 | #cv2.imshow("Image", image_ocv) 141 | #cv2.imshow("Depth", depth_image_ocv) 142 | 143 | key = cv2.waitKey(1) 144 | 145 | 146 | cv2.destroyAllWindows() 147 | zed.close() 148 | 149 | print("\nFINISH") 150 | 151 | if __name__ == "__main__": 152 | main() --------------------------------------------------------------------------------