├── workspace ├── jupyter_notebook │ ├── images │ │ ├── nvosd.png │ │ ├── pads.png │ │ ├── test2.png │ │ ├── ngc_key.png │ │ ├── nvinfer.png │ │ ├── pipeline.png │ │ ├── augmenting.png │ │ ├── nvtracker.png │ │ ├── nvvidconv.png │ │ ├── res_mario.jpg │ │ ├── yolo_kitti.png │ │ ├── yolo_label.png │ │ ├── yolo_mark.png │ │ ├── architecture.jpg │ │ ├── ds_overview.png │ │ ├── ds_workflow.png │ │ ├── git_branching.jpg │ │ ├── ngc_setup_key.png │ │ ├── prep_pipeline.png │ │ ├── tao_toolkit.jpeg │ │ ├── label_studio_1.png │ │ ├── label_studio_10.png │ │ ├── label_studio_11.png │ │ ├── label_studio_2.png │ │ ├── label_studio_3.png │ │ ├── label_studio_4.png │ │ ├── label_studio_5.png │ │ ├── label_studio_6.png │ │ ├── label_studio_7.png │ │ ├── label_studio_8.png │ │ ├── label_studio_9.png │ │ ├── tao_deepstream.jpeg │ │ ├── pruned_vs_unpruned.png │ │ ├── deepstream_overview.jpg │ │ ├── tao_cv_qat_workflow.png │ │ ├── dstream_deploy_options.png │ │ ├── tao_tf_user_interaction.png │ │ ├── triton_inference_server.jpg │ │ └── deepstream_python_bindings.png │ ├── 1.Data_labeling_and_preprocessing.ipynb │ └── 6.Challenge_DeepStream.ipynb ├── specs │ ├── yolo_v4_tfrecords_kitti_val.txt │ ├── yolo_v4_tfrecords_kitti_train.txt │ ├── default_spec.txt │ ├── yolo_v4_retrain_resnet18_kitti_seq.txt │ ├── yolo_v4_train_resnet18_kitti_seq.txt │ ├── yolo_v4_retrain_resnet18_kitti.txt │ └── yolo_v4_train_resnet18_kitti.txt ├── source_code │ ├── dataset.py │ ├── N4 │ │ ├── dstest2_tracker_config.txt │ │ ├── bus_call.py │ │ ├── pgie_yolov4_tao_config.txt │ │ └── config_tracker_NvDCF_perf.yml │ ├── N3 │ │ ├── user_data.py │ │ ├── postprocessor.py │ │ ├── utils.py │ │ ├── triton_model.py │ │ ├── preprocess_input.py │ │ ├── yolov4_model.py │ │ ├── yolov4_postprocessor.py │ │ └── frame.py │ ├── N5 │ │ └── calc_object_size.py │ └── N2 │ │ └── generate_val_dataset.py └── Start_here.ipynb ├── Singularity_triton ├── Singularity_tao ├── Dockerfile_deepstream ├── Singularity_deepstream ├── README.md ├── CONTRIBUTING.md ├── Deployment_Guide.md └── LICENSE /workspace/jupyter_notebook/images/nvosd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/nvosd.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/pads.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/pads.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/test2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/test2.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/ngc_key.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/ngc_key.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/nvinfer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/nvinfer.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/pipeline.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/augmenting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/augmenting.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/nvtracker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/nvtracker.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/nvvidconv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/nvvidconv.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/res_mario.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/res_mario.jpg -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/yolo_kitti.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/yolo_kitti.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/yolo_label.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/yolo_label.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/yolo_mark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/yolo_mark.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/architecture.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/architecture.jpg -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/ds_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/ds_overview.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/ds_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/ds_workflow.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/git_branching.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/git_branching.jpg -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/ngc_setup_key.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/ngc_setup_key.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/prep_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/prep_pipeline.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/tao_toolkit.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/tao_toolkit.jpeg -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/label_studio_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/label_studio_1.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/label_studio_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/label_studio_10.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/label_studio_11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/label_studio_11.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/label_studio_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/label_studio_2.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/label_studio_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/label_studio_3.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/label_studio_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/label_studio_4.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/label_studio_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/label_studio_5.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/label_studio_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/label_studio_6.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/label_studio_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/label_studio_7.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/label_studio_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/label_studio_8.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/label_studio_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/label_studio_9.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/tao_deepstream.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/tao_deepstream.jpeg -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/pruned_vs_unpruned.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/pruned_vs_unpruned.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/deepstream_overview.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/deepstream_overview.jpg -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/tao_cv_qat_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/tao_cv_qat_workflow.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/dstream_deploy_options.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/dstream_deploy_options.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/tao_tf_user_interaction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/tao_tf_user_interaction.png -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/triton_inference_server.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/triton_inference_server.jpg -------------------------------------------------------------------------------- /workspace/jupyter_notebook/images/deepstream_python_bindings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/main/workspace/jupyter_notebook/images/deepstream_python_bindings.png -------------------------------------------------------------------------------- /workspace/specs/yolo_v4_tfrecords_kitti_val.txt: -------------------------------------------------------------------------------- 1 | kitti_config { 2 | root_directory_path: "/workspace/tao-experiments/data/val" 3 | image_dir_name: "image" 4 | label_dir_name: "label" 5 | image_extension: ".png" 6 | partition_mode: "random" 7 | num_partitions: 2 8 | val_split: 14 9 | num_shards: 10 10 | } 11 | image_directory_path: "/workspace/tao-experiments/data/val" 12 | -------------------------------------------------------------------------------- /workspace/specs/yolo_v4_tfrecords_kitti_train.txt: -------------------------------------------------------------------------------- 1 | kitti_config { 2 | root_directory_path: "/workspace/tao-experiments/data/training" 3 | image_dir_name: "image_2" 4 | label_dir_name: "label_2" 5 | image_extension: ".png" 6 | partition_mode: "random" 7 | num_partitions: 2 8 | val_split: 14 9 | num_shards: 10 10 | } 11 | image_directory_path: "/workspace/tao-experiments/data/training" 12 | -------------------------------------------------------------------------------- /Singularity_triton: -------------------------------------------------------------------------------- 1 | Bootstrap: docker 2 | From: nvcr.io/nvidia/tritonserver:22.05-py3-sdk 3 | 4 | %environment 5 | export XDG_RUNTIME_DIR= 6 | 7 | %post 8 | apt-get update -y 9 | apt install ffmpeg -y 10 | 11 | # Install jupyterlab and packages 12 | pip3 install jupyterlab \ 13 | ipywidgets \ 14 | attrdict \ 15 | tqdm \ 16 | matplotlib \ 17 | protobuf==3.20.* 18 | 19 | %runscript 20 | "$@" 21 | 22 | %labels 23 | Author Massimiliano, Tosin 24 | 25 | -------------------------------------------------------------------------------- /Singularity_tao: -------------------------------------------------------------------------------- 1 | Bootstrap: docker 2 | From: nvcr.io/nvidia/tao/tao-toolkit-tf:v3.22.05-tf1.15.5-py3 3 | 4 | %environment 5 | export XDG_RUNTIME_DIR= 6 | export PATH="$PATH:/usr/local/bin" 7 | export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib" 8 | 9 | %post 10 | # Install jupyterlab and packages 11 | pip3 install jupyterlab \ 12 | ipywidgets \ 13 | matplotlib \ 14 | opencv-python-headless==4.5.4.60 \ 15 | fiftyone \ 16 | gdown 17 | 18 | %runscript 19 | "$@" 20 | 21 | %labels 22 | Author Massimiliano, Tosin 23 | 24 | -------------------------------------------------------------------------------- /workspace/specs/default_spec.txt: -------------------------------------------------------------------------------- 1 | # Spec file for tao-augment 2 | 3 | # Spatial augmentation config 4 | spatial_config{ 5 | flip_config{ 6 | flip_horizontal: true 7 | } 8 | rotation_config{ 9 | angle: 10.0 10 | units: "degrees" 11 | } 12 | translation_config{ 13 | translate_y: 20 14 | } 15 | } 16 | 17 | # Color augmentation config 18 | color_config{ 19 | hue_saturation_config{ 20 | hue_rotation_angle: 10.0 21 | saturation_shift: 1.0 22 | } 23 | brightness_config{ 24 | offset: 10 25 | } 26 | } 27 | 28 | # Blur config 29 | blur_config{ 30 | size: 5 31 | std: 1.0 32 | } 33 | 34 | # Data dimensions 35 | output_image_width: 640 36 | output_image_height: 384 37 | output_image_channel: 3 38 | image_extension: ".png" 39 | -------------------------------------------------------------------------------- /workspace/source_code/dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 OpenACC-Standard.org. This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, 2 | # under the Creative Commons Attribution 4.0 International (CC BY 4.0). These materials include references to hardware and software 3 | # developed by other entities; all applicable licensing and copyrights apply. 4 | 5 | import gdown 6 | import os 7 | import shutil 8 | 9 | #url = "https://drive.google.com/uc?id=1E8KaSkexo5U4OhiDIrfUipBbHcvwWCvJ&export=download" 10 | url = "https://drive.google.com/uc?id=1E8KaSkexo5U4OhiDIrfUipBbHcvwWCvJ&confirm=t" 11 | output = "dataset_E2ECV.zip" 12 | gdown.download(url, output, quiet=False, proxy=None) 13 | 14 | shutil.unpack_archive(output) 15 | 16 | if not os.path.exists("../data"): 17 | shutil.move("data", "../data") 18 | else: 19 | shutil.rmtree("data") 20 | 21 | if not os.path.exists("../source_code/N4/apples.h264"): 22 | shutil.move("apples.h264", "../source_code/N4/apples.h264") 23 | else: 24 | os.remove("apples.h264") 25 | 26 | if not os.path.exists("../source_code/N5/oranges.mp4"): 27 | shutil.move("oranges.mp4", "../source_code/N5/oranges.mp4") 28 | else: 29 | os.remove("oranges.mp4") 30 | 31 | if not os.path.exists("../source_code/N5/oranges"): 32 | shutil.move("oranges", "../source_code/N5/oranges") 33 | else: 34 | shutil.rmtree("oranges") 35 | 36 | os.remove(output) 37 | 38 | -------------------------------------------------------------------------------- /workspace/source_code/N4/dstest2_tracker_config.txt: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | ################################################################################ 17 | 18 | # Mandatory properties for the tracker: 19 | # tracker-width 20 | # tracker-height: needs to be multiple of 6 for NvDCF 21 | # gpu-id 22 | # ll-lib-file: path to low-level tracker lib 23 | # ll-config-file: required for NvDCF, optional for KLT and IOU 24 | # 25 | [tracker] 26 | tracker-width=640 27 | tracker-height=384 28 | gpu-id=0 29 | ll-lib-file=/opt/nvidia/deepstream/deepstream/lib/libnvds_nvmultiobjecttracker.so 30 | ll-config-file=config_tracker_NvDCF_perf.yml 31 | #enable-past-frame=1 32 | enable-batch-process=1 33 | -------------------------------------------------------------------------------- /workspace/source_code/N3/user_data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining 4 | # a copy of this software and associated documentation files (the 5 | # "Software"), to deal in the Software without restriction, including 6 | # without limitation the rights to use, copy, modify, merge, publish, 7 | # distribute, sublicense, and/or sell copies of the Software, and to 8 | # permit persons to whom the Software is furnished to do so, subject to 9 | # the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | 22 | """User data requests.""" 23 | 24 | import sys 25 | 26 | if sys.version_info >= (3, 0): 27 | import queue 28 | else: 29 | import Queue as queue 30 | 31 | class UserData: 32 | """Data structure to gather queued requests.""" 33 | 34 | def __init__(self): 35 | self._completed_requests = queue.Queue() 36 | -------------------------------------------------------------------------------- /workspace/source_code/N4/bus_call.py: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | ################################################################################ 17 | 18 | import gi 19 | import sys 20 | gi.require_version('Gst', '1.0') 21 | from gi.repository import Gst 22 | def bus_call(bus, message, loop): 23 | t = message.type 24 | if t == Gst.MessageType.EOS: 25 | sys.stdout.write("End-of-stream\n") 26 | loop.quit() 27 | elif t==Gst.MessageType.WARNING: 28 | err, debug = message.parse_warning() 29 | sys.stderr.write("Warning: %s: %s\n" % (err, debug)) 30 | elif t == Gst.MessageType.ERROR: 31 | err, debug = message.parse_error() 32 | sys.stderr.write("Error: %s: %s\n" % (err, debug)) 33 | loop.quit() 34 | return True 35 | 36 | -------------------------------------------------------------------------------- /Dockerfile_deepstream: -------------------------------------------------------------------------------- 1 | # Select base image 2 | FROM nvcr.io/nvidia/deepstream:6.1.1-devel 3 | 4 | # Install additional packages 5 | WORKDIR /opt/nvidia/deepstream/deepstream 6 | RUN ./user_additional_install.sh 7 | 8 | # Install required dependencies 9 | RUN apt install ffmpeg python3-gi python3-dev python3-gst-1.0 python-gi-dev git python-dev \ 10 | python3 python3-pip python3.8-dev cmake g++ build-essential libglib2.0-dev \ 11 | libglib2.0-dev-bin libgstreamer1.0-dev libtool m4 autoconf automake libgirepository1.0-dev libcairo2-dev -y 12 | 13 | # Initialization of submodules 14 | WORKDIR /opt/nvidia/deepstream/deepstream/sources 15 | RUN git clone https://github.com/NVIDIA-AI-IOT/deepstream_python_apps.git 16 | WORKDIR /opt/nvidia/deepstream/deepstream/sources/deepstream_python_apps 17 | RUN git submodule update --init 18 | 19 | # Installing Gst-python 20 | RUN apt-get install -y apt-transport-https ca-certificates -y 21 | RUN update-ca-certificates 22 | WORKDIR /opt/nvidia/deepstream/deepstream/sources/deepstream_python_apps/3rdparty/gst-python 23 | RUN ./autogen.sh 24 | RUN make 25 | RUN make install 26 | 27 | # Compiling python bindings 28 | WORKDIR /opt/nvidia/deepstream/deepstream/sources/deepstream_python_apps/bindings 29 | RUN mkdir build 30 | WORKDIR /opt/nvidia/deepstream/deepstream/sources/deepstream_python_apps/bindings/build 31 | RUN cmake .. -DPYTHON_MAJOR_VERSION=3 -DPYTHON_MINOR_VERSION=8 32 | RUN make 33 | 34 | # Installing python bindings 35 | RUN pip3 install ./pyds-1.1.4-py3-none*.whl 36 | 37 | # Install jupyterlab and packages 38 | WORKDIR /opt/nvidia/deepstream/deepstream-6.1 39 | RUN pip3 install jupyterlab \ 40 | ipywidgets \ 41 | matplotlib \ 42 | scipy \ 43 | imutils \ 44 | opencv-python 45 | 46 | -------------------------------------------------------------------------------- /Singularity_deepstream: -------------------------------------------------------------------------------- 1 | # Select base image 2 | Bootstrap: docker 3 | From: nvcr.io/nvidia/deepstream:6.1.1-devel 4 | 5 | %environment 6 | export XDG_RUNTIME_DIR= 7 | 8 | %post 9 | # Install additional packages 10 | cd /opt/nvidia/deepstream/deepstream 11 | ./user_additional_install.sh 12 | 13 | # Install required dependencies 14 | apt install ffmpeg python3-gi python3-dev python3-gst-1.0 python-gi-dev git python-dev \ 15 | python3 python3-pip python3.8-dev cmake g++ build-essential libglib2.0-dev \ 16 | libglib2.0-dev-bin libgstreamer1.0-dev libtool m4 autoconf automake libgirepository1.0-dev libcairo2-dev -y 17 | 18 | # Initialization of submodules 19 | cd /opt/nvidia/deepstream/deepstream/sources 20 | git clone https://github.com/NVIDIA-AI-IOT/deepstream_python_apps.git 21 | cd /opt/nvidia/deepstream/deepstream/sources/deepstream_python_apps 22 | git checkout v1.1.4 23 | git submodule update --init 24 | 25 | # Installing Gst-python 26 | apt-get install -y apt-transport-https ca-certificates -y 27 | update-ca-certificates 28 | cd /opt/nvidia/deepstream/deepstream/sources/deepstream_python_apps/3rdparty/gst-python 29 | ./autogen.sh 30 | make 31 | make install 32 | 33 | # Compiling python bindings 34 | cd /opt/nvidia/deepstream/deepstream/sources/deepstream_python_apps/bindings 35 | mkdir build 36 | cd /opt/nvidia/deepstream/deepstream/sources/deepstream_python_apps/bindings/build 37 | cmake .. -DPYTHON_MAJOR_VERSION=3 -DPYTHON_MINOR_VERSION=8 38 | make 39 | 40 | # Installing python bindings 41 | pip3 install ./pyds-1.1.4-py3-none*.whl 42 | 43 | # Install jupyterlab and packages 44 | pip3 install jupyterlab \ 45 | ipywidgets \ 46 | matplotlib \ 47 | scipy \ 48 | imutils \ 49 | opencv-python 50 | 51 | %runscript 52 | "$@" 53 | 54 | %labels 55 | Author Massimiliano, Tosin 56 | 57 | -------------------------------------------------------------------------------- /workspace/source_code/N3/postprocessor.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining 4 | # a copy of this software and associated documentation files (the 5 | # "Software"), to deal in the Software without restriction, including 6 | # without limitation the rights to use, copy, modify, merge, publish, 7 | # distribute, sublicense, and/or sell copies of the Software, and to 8 | # permit persons to whom the Software is furnished to do so, subject to 9 | # the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | 22 | """Simple class to run post processing of Triton Inference outputs.""" 23 | 24 | import os 25 | 26 | class Postprocessor(object): 27 | """Class to run post processing of Triton Tensors.""" 28 | 29 | def __init__(self, batch_size, frames, output_path, data_format): 30 | """Initialize a post processor class. 31 | 32 | Args: 33 | batch_size (int): Number of images in the batch. 34 | frames (list): List of images. 35 | output_path (str): Unix path to the output rendered images and labels. 36 | data_format (str): Order of the input model dimensions. 37 | "channels_first": CHW order. 38 | "channels_last": HWC order. 39 | """ 40 | self.batch_size = batch_size 41 | self.frames = frames 42 | self.output_path = output_path 43 | self.data_format = data_format 44 | if not os.path.exists(self.output_path): 45 | os.makedirs(self.output_path) 46 | self.initialized = True 47 | 48 | def apply(self, output_tensors, this_id, render=True): 49 | """Apply the post processor to the outputs.""" 50 | raise NotImplementedError("Base class doesn't implement any post-processing") 51 | -------------------------------------------------------------------------------- /workspace/source_code/N5/calc_object_size.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 OpenACC-Standard.org. This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, 2 | # under the Creative Commons Attribution 4.0 International (CC BY 4.0). These materials include references to hardware and software 3 | # developed by other entities; all applicable licensing and copyrights apply. 4 | 5 | import cv2 6 | import imutils 7 | import numpy as np 8 | 9 | 10 | def calc_object_size(img_path, output_path="output", pixels_per_metric=38): 11 | """Calculate the size of an object in the image using color thresholding""" 12 | 13 | # load image with OpenCV and blur it slightly 14 | image = cv2.imread(img_path) 15 | blurred = cv2.GaussianBlur(image, (3, 3), 0) 16 | # convert to hsv color space 17 | hsv = cv2.cvtColor(blurred, cv2.COLOR_BGR2HSV) 18 | 19 | # color thresholding 20 | # orange color range values: Hue (5 - 25) 21 | lower = np.array([5, 130, 155]) 22 | upper = np.array([25, 255, 255]) 23 | # using inRange function to get only orange colors 24 | mask = cv2.inRange(hsv, lower, upper) 25 | # remove noise 26 | mask = cv2.erode(mask, None, iterations=2) 27 | mask = cv2.dilate(mask, None, iterations=2) 28 | 29 | # find contours in the edge map 30 | cnts = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, 31 | cv2.CHAIN_APPROX_SIMPLE) 32 | cnts = imutils.grab_contours(cnts) 33 | # sort the contours and keep the largest 34 | cnts = sorted(cnts, key=cv2.contourArea, reverse=True) 35 | c = cnts[0] 36 | 37 | # compute the minimum enclosing circle of the contour 38 | orig = image.copy() 39 | (x, y), radius = cv2.minEnclosingCircle(c) 40 | 41 | # draw the circle 42 | cv2.circle(orig, (int(x), int(y)), int(radius), (0, 255, 0), 2) 43 | 44 | # draw a diameter and end points 45 | cv2.line(orig, (int(x - radius), int(y)), (int(x + radius), int(y)), 46 | (255, 0, 255), 2) 47 | cv2.circle(orig, (int(x - radius), int(y)), 5, (255, 0, 0), -1) 48 | cv2.circle(orig, (int(x + radius), int(y)), 5, (255, 0, 0), -1) 49 | 50 | # draw the center 51 | cv2.circle(orig, (int(x), int(y)), 5, (0, 0, 255), -1) 52 | 53 | # compute the size of the object 54 | dimR = radius / pixels_per_metric 55 | print(f"Diameter of the object: {2 * dimR:.1f}cm") 56 | 57 | # draw the object sizes on the image 58 | cv2.putText(orig, "{:.1f}cm".format(2 * dimR), 59 | (int(x - 15), int(y - 10)), cv2.FONT_HERSHEY_SIMPLEX, 60 | 0.65, (0, 0, 0), 2) 61 | 62 | # save the output image 63 | cv2.imwrite(output_path, orig) 64 | 65 | return 2 * dimR 66 | -------------------------------------------------------------------------------- /workspace/source_code/N2/generate_val_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. 2 | 3 | """Script to generate val dataset for SSD/DSSD tutorial.""" 4 | 5 | from __future__ import absolute_import 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | import argparse 10 | import os 11 | 12 | 13 | def parse_args(args=None): 14 | """parse the arguments.""" 15 | parser = argparse.ArgumentParser(description='Generate val dataset for SSD/DSSD tutorial') 16 | 17 | parser.add_argument( 18 | "--input_image_dir", 19 | type=str, 20 | required=True, 21 | help="Input directory to KITTI training dataset images." 22 | ) 23 | 24 | parser.add_argument( 25 | "--input_label_dir", 26 | type=str, 27 | required=True, 28 | help="Input directory to KITTI training dataset labels." 29 | ) 30 | 31 | parser.add_argument( 32 | "--output_dir", 33 | type=str, 34 | required=True, 35 | help="Ouput directory to TLT val dataset." 36 | ) 37 | 38 | parser.add_argument( 39 | "--val_split", 40 | type=int, 41 | required=False, 42 | default=10, 43 | help="Percentage of training dataset for generating val dataset" 44 | ) 45 | 46 | return parser.parse_args(args) 47 | 48 | 49 | def main(args=None): 50 | """Main function for data preparation.""" 51 | 52 | args = parse_args(args) 53 | 54 | img_files = [] 55 | for file_name in os.listdir(args.input_image_dir): 56 | if file_name.split(".")[-1] == "png": 57 | img_files.append(file_name) 58 | 59 | total_cnt = len(img_files) 60 | val_ratio = float(args.val_split) / 100.0 61 | val_cnt = int(total_cnt * val_ratio) 62 | train_cnt = total_cnt - val_cnt 63 | val_img_list = img_files[0:val_cnt] 64 | 65 | target_img_path = os.path.join(args.output_dir, "image") 66 | target_label_path = os.path.join(args.output_dir, "label") 67 | 68 | if not os.path.exists(target_img_path): 69 | os.makedirs(target_img_path) 70 | else: 71 | print("This script will not run as output image path already exists.") 72 | return 73 | 74 | if not os.path.exists(target_label_path): 75 | os.makedirs(target_label_path) 76 | else: 77 | print("This script will not run as output label path already exists.") 78 | return 79 | 80 | print("Total {} samples in KITTI training dataset".format(total_cnt)) 81 | print("{} for train and {} for val".format(train_cnt, val_cnt)) 82 | 83 | for img_name in val_img_list: 84 | label_name = img_name.split(".")[0] + ".txt" 85 | os.rename(os.path.join(args.input_image_dir, img_name), 86 | os.path.join(target_img_path, img_name)) 87 | os.rename(os.path.join(args.input_label_dir, label_name), 88 | os.path.join(target_label_path, label_name)) 89 | 90 | 91 | if __name__ == "__main__": 92 | main() 93 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # End-to-End Computer Vision Bootcamp 2 | 3 | The **End-to-End Computer Vision Bootcamp** is designed from a real-world perspective and follows the data processing, development, and deployment pipeline paradigm using a variety of tools. Through hands-on exercises, attendees will learn the fundamentals of preprocessing custom images, speeding the development process using transfer learning for model training, and deployment of trained models for fast and scalable AI in production. 4 | 5 | ## Bootcamp Content 6 | 7 | The content is structured in five modules with an additional introductory notebook and two challenge notebooks: 8 | 9 | - Welcome to **end-to-end computer vision** bootcamp 10 | - Lab 1: Data labeling and preprocessing 11 | - Lab 2: Object detection using TAO YOLOv4 12 | - Lab 3: Model deployment with Triton Inference Server 13 | - Lab 4: Model deployment with DeepStream 14 | - Lab 5: Measure object size using OpenCV 15 | - Challenge 1: DeepStream SDK 16 | - Challenge 2: Triton Inference Server 17 | 18 | ## Tools and Frameworks 19 | 20 | The tools and frameworks used in the bootcamp are as follows: 21 | 22 | - [NVIDIA® TAO Toolkit](https://developer.nvidia.com/tao-toolkit) 23 | - [NVIDIA DeepStream SDK](https://developer.nvidia.com/deepstream-sdk) 24 | - [NVIDIA Triton™ Inference Server](https://www.nvidia.com/en-us/ai-data-science/products/triton-inference-server/) 25 | - [NVIDIA TensorRT™](https://developer.nvidia.com/tensorrt) 26 | - [OpenCV](https://opencv.org/) 27 | - [Label Studio](https://labelstud.io/) 28 | 29 | 30 | ## Bootcamp Duration 31 | 32 | The total bootcamp material would take approximately 8.5 hours. It is recommended to divide the teaching of the material into two days, covering the first two notebooks (Lab 1 and Lab 2) in one session and the rest in the next session. 33 | 34 | 35 | ## Bootcamp Prerequisites 36 | 37 | A basic understanding of Deep Learning, Python programming, and familiarity with NVIDIA® NGC™ is required. 38 | 39 | 40 | ## Deploying the Bootcamp materials: 41 | 42 | To deploy the Labs, please refer to the Deployment guide presented [here](https://github.com/openhackathons-org/End-to-End-Computer-Vision/blob/main/Deployment_Guide.md) 43 | 44 | 45 | ## Attribution 46 | This material originates from the OpenHackathons Github repository. Check out additional materials [here](https://github.com/openhackathons-org) 47 | 48 | Don't forget to check out additional [Open Hackathons Resources](https://www.openhackathons.org/s/technical-resources) and join our [OpenACC and Hackathons Slack Channel](https://www.openacc.org/community#slack) to share your experience and get more help from the community. 49 | 50 | 51 | ## Licensing 52 | 53 | Copyright © 2023 OpenACC-Standard.org. This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0). These materials may include references to hardware and software developed by other entities; all applicable licensing and copyrights apply. 54 | 55 | -------------------------------------------------------------------------------- /workspace/specs/yolo_v4_retrain_resnet18_kitti_seq.txt: -------------------------------------------------------------------------------- 1 | random_seed: 42 2 | yolov4_config { 3 | big_anchor_shape: "[(566.97, 301.71), (526.13, 354.46), (569.45, 339.69)]" 4 | mid_anchor_shape: "[(502.15, 273.98), (507.71, 322.29), (444.95, 374.63)]" 5 | small_anchor_shape: "[(284.35, 384.00), (557.86, 223.83), (441.60, 297.71)]" 6 | box_matching_iou: 0.25 7 | matching_neutral_box_iou: 0.5 8 | arch: "resnet" 9 | nlayers: 18 10 | arch_conv_blocks: 2 11 | loss_loc_weight: 1.0 12 | loss_neg_obj_weights: 1.0 13 | loss_class_weights: 1.0 14 | label_smoothing: 0.0 15 | big_grid_xy_extend: 0.05 16 | mid_grid_xy_extend: 0.1 17 | small_grid_xy_extend: 0.2 18 | freeze_bn: false 19 | #freeze_blocks: [0, 1, 2] 20 | force_relu: false 21 | } 22 | training_config { 23 | batch_size_per_gpu: 8 24 | num_epochs: 15 25 | enable_qat: false 26 | checkpoint_interval: 3 27 | learning_rate { 28 | soft_start_cosine_annealing_schedule { 29 | min_learning_rate: 1e-7 30 | max_learning_rate: 1e-4 31 | soft_start: 0.3 32 | } 33 | } 34 | regularizer { 35 | type: NO_REG 36 | weight: 3e-9 37 | } 38 | optimizer { 39 | adam { 40 | epsilon: 1e-7 41 | beta1: 0.9 42 | beta2: 0.999 43 | amsgrad: false 44 | } 45 | } 46 | pruned_model_path: "EXPERIMENT_DIR/experiment_dir_pruned/yolov4_resnet18_pruned.tlt" 47 | } 48 | eval_config { 49 | average_precision_mode: SAMPLE 50 | batch_size: 8 51 | matching_iou_threshold: 0.5 52 | } 53 | nms_config { 54 | confidence_threshold: 0.001 55 | clustering_iou_threshold: 0.5 56 | top_k: 200 57 | } 58 | augmentation_config { 59 | hue: 0.1 60 | saturation: 1.5 61 | exposure: 1.5 62 | vertical_flip: 0.5 63 | horizontal_flip: 0.5 64 | jitter: 0.3 65 | output_width: 640 66 | output_height: 384 67 | output_channel: 3 68 | randomize_input_shape_period: 0 69 | mosaic_prob: 0.5 70 | mosaic_min_ratio: 0.2 71 | } 72 | dataset_config { 73 | data_sources: { 74 | label_directory_path: "/workspace/tao-experiments/data/training/label_2" 75 | image_directory_path: "/workspace/tao-experiments/data/training/image_2" 76 | } 77 | include_difficult_in_training: true 78 | target_class_mapping { 79 | key: "freshapple" 80 | value: "freshapple" 81 | } 82 | target_class_mapping { 83 | key: "freshbanana" 84 | value: "freshbanana" 85 | } 86 | target_class_mapping { 87 | key: "freshorange" 88 | value: "freshorange" 89 | } 90 | target_class_mapping { 91 | key: "rottenapple" 92 | value: "rottenapple" 93 | } 94 | target_class_mapping { 95 | key: "rottenbanana" 96 | value: "rottenbanana" 97 | } 98 | target_class_mapping { 99 | key: "rottenorange" 100 | value: "rottenorange" 101 | } 102 | validation_data_sources: { 103 | label_directory_path: "/workspace/tao-experiments/data/val/label" 104 | image_directory_path: "/workspace/tao-experiments/data/val/image" 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /workspace/specs/yolo_v4_train_resnet18_kitti_seq.txt: -------------------------------------------------------------------------------- 1 | random_seed: 42 2 | yolov4_config { 3 | big_anchor_shape: "[(566.97, 301.71), (526.13, 354.46), (569.45, 339.69)]" 4 | mid_anchor_shape: "[(502.15, 273.98), (507.71, 322.29), (444.95, 374.63)]" 5 | small_anchor_shape: "[(284.35, 384.00), (557.86, 223.83), (441.60, 297.71)]" 6 | box_matching_iou: 0.25 7 | matching_neutral_box_iou: 0.5 8 | arch: "resnet" 9 | nlayers: 18 10 | arch_conv_blocks: 2 11 | loss_loc_weight: 1.0 12 | loss_neg_obj_weights: 1.0 13 | loss_class_weights: 1.0 14 | label_smoothing: 0.0 15 | big_grid_xy_extend: 0.05 16 | mid_grid_xy_extend: 0.1 17 | small_grid_xy_extend: 0.2 18 | freeze_bn: false 19 | #freeze_blocks: [0, 1, 2] 20 | force_relu: false 21 | } 22 | training_config { 23 | batch_size_per_gpu: 8 24 | num_epochs: 30 25 | enable_qat: false 26 | checkpoint_interval: 3 27 | learning_rate { 28 | soft_start_cosine_annealing_schedule { 29 | min_learning_rate: 1e-7 30 | max_learning_rate: 1e-4 31 | soft_start: 0.3 32 | } 33 | } 34 | regularizer { 35 | type: L1 36 | weight: 3e-5 37 | } 38 | optimizer { 39 | adam { 40 | epsilon: 1e-7 41 | beta1: 0.9 42 | beta2: 0.999 43 | amsgrad: false 44 | } 45 | } 46 | pretrain_model_path: "EXPERIMENT_DIR/pretrained_resnet18/pretrained_object_detection_vresnet18/resnet_18.hdf5" 47 | } 48 | eval_config { 49 | average_precision_mode: SAMPLE 50 | batch_size: 8 51 | matching_iou_threshold: 0.5 52 | } 53 | nms_config { 54 | confidence_threshold: 0.001 55 | clustering_iou_threshold: 0.5 56 | top_k: 200 57 | } 58 | augmentation_config { 59 | hue: 0.1 60 | saturation: 1.5 61 | exposure: 1.5 62 | vertical_flip: 0.5 63 | horizontal_flip: 0.5 64 | jitter: 0.3 65 | output_width: 640 66 | output_height: 384 67 | output_channel: 3 68 | randomize_input_shape_period: 0 69 | mosaic_prob: 0.5 70 | mosaic_min_ratio: 0.2 71 | } 72 | dataset_config { 73 | data_sources: { 74 | label_directory_path: "/workspace/tao-experiments/data/training/label_2" 75 | image_directory_path: "/workspace/tao-experiments/data/training/image_2" 76 | } 77 | include_difficult_in_training: true 78 | target_class_mapping { 79 | key: "freshapple" 80 | value: "freshapple" 81 | } 82 | target_class_mapping { 83 | key: "freshbanana" 84 | value: "freshbanana" 85 | } 86 | target_class_mapping { 87 | key: "freshorange" 88 | value: "freshorange" 89 | } 90 | target_class_mapping { 91 | key: "rottenapple" 92 | value: "rottenapple" 93 | } 94 | target_class_mapping { 95 | key: "rottenbanana" 96 | value: "rottenbanana" 97 | } 98 | target_class_mapping { 99 | key: "rottenorange" 100 | value: "rottenorange" 101 | } 102 | validation_data_sources: { 103 | label_directory_path: "/workspace/tao-experiments/data/val/label" 104 | image_directory_path: "/workspace/tao-experiments/data/val/image" 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /workspace/source_code/N3/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining 4 | # a copy of this software and associated documentation files (the 5 | # "Software"), to deal in the Software without restriction, including 6 | # without limitation the rights to use, copy, modify, merge, publish, 7 | # distribute, sublicense, and/or sell copies of the Software, and to 8 | # permit persons to whom the Software is furnished to do so, subject to 9 | # the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | 22 | 23 | from attrdict import AttrDict 24 | 25 | import tritonclient.grpc as grpcclient 26 | import tritonclient.http as httpclient 27 | 28 | 29 | def completion_callback(user_data, result, error): 30 | """Callback function used for async_stream_infer().""" 31 | user_data._completed_requests.put((result, error)) 32 | 33 | 34 | def convert_http_metadata_config(_metadata, _config): 35 | """Convert to the http metadata to class Dict.""" 36 | _model_metadata = AttrDict(_metadata) 37 | _model_config = AttrDict(_config) 38 | 39 | return _model_metadata, _model_config 40 | 41 | 42 | def requestGenerator(batched_image_data, input_name, output_name, dtype, protocol, 43 | num_classes=0): 44 | """Generator for triton inference requests. 45 | 46 | Args: 47 | batch_image_data (np.ndarray): Numpy array of a batch of images. 48 | input_name (str): Name of the input array 49 | output_name (list(str)): Name of the model outputs 50 | dtype: Tensor data type for Triton 51 | protocol (str): The protocol used to communicated between the Triton 52 | server and TAO Toolkit client. 53 | num_classes (int): The number of classes in the network. 54 | 55 | Yields: 56 | inputs 57 | outputs 58 | made_name (str): Name of the triton model 59 | model_version (int): Version number 60 | """ 61 | if protocol == "grpc": 62 | client = grpcclient 63 | else: 64 | client = httpclient 65 | 66 | # Set the input data 67 | inputs = [client.InferInput(input_name, batched_image_data.shape, dtype)] 68 | inputs[0].set_data_from_numpy(batched_image_data) 69 | 70 | outputs = [ 71 | client.InferRequestedOutput( 72 | out_name, class_count=num_classes 73 | ) for out_name in output_name 74 | ] 75 | 76 | yield inputs, outputs 77 | -------------------------------------------------------------------------------- /workspace/specs/yolo_v4_retrain_resnet18_kitti.txt: -------------------------------------------------------------------------------- 1 | random_seed: 42 2 | yolov4_config { 3 | big_anchor_shape: "[(566.97, 301.71), (526.13, 354.46), (569.45, 339.69)]" 4 | mid_anchor_shape: "[(502.15, 273.98), (507.71, 322.29), (444.95, 374.63)]" 5 | small_anchor_shape: "[(284.35, 384.00), (557.86, 223.83), (441.60, 297.71)]" 6 | box_matching_iou: 0.25 7 | matching_neutral_box_iou: 0.5 8 | arch: "resnet" 9 | nlayers: 18 10 | arch_conv_blocks: 2 11 | loss_loc_weight: 1.0 12 | loss_neg_obj_weights: 1.0 13 | loss_class_weights: 1.0 14 | label_smoothing: 0.0 15 | big_grid_xy_extend: 0.05 16 | mid_grid_xy_extend: 0.1 17 | small_grid_xy_extend: 0.2 18 | freeze_bn: false 19 | #freeze_blocks: [0, 1, 2] 20 | force_relu: false 21 | } 22 | training_config { 23 | batch_size_per_gpu: 8 24 | num_epochs: 15 25 | enable_qat: false 26 | checkpoint_interval: 3 27 | learning_rate { 28 | soft_start_cosine_annealing_schedule { 29 | min_learning_rate: 1e-7 30 | max_learning_rate: 1e-4 31 | soft_start: 0.3 32 | } 33 | } 34 | regularizer { 35 | type: NO_REG 36 | weight: 3e-9 37 | } 38 | optimizer { 39 | adam { 40 | epsilon: 1e-7 41 | beta1: 0.9 42 | beta2: 0.999 43 | amsgrad: false 44 | } 45 | } 46 | pruned_model_path: "EXPERIMENT_DIR/experiment_dir_pruned/yolov4_resnet18_pruned.tlt" 47 | } 48 | eval_config { 49 | average_precision_mode: SAMPLE 50 | batch_size: 8 51 | matching_iou_threshold: 0.5 52 | } 53 | nms_config { 54 | confidence_threshold: 0.001 55 | clustering_iou_threshold: 0.5 56 | top_k: 200 57 | force_on_cpu: true 58 | } 59 | augmentation_config { 60 | hue: 0.1 61 | saturation: 1.5 62 | exposure: 1.5 63 | vertical_flip: 0.5 64 | horizontal_flip: 0.5 65 | jitter: 0.3 66 | output_width: 640 67 | output_height: 384 68 | output_channel: 3 69 | randomize_input_shape_period: 0 70 | mosaic_prob: 0.5 71 | mosaic_min_ratio: 0.2 72 | } 73 | dataset_config { 74 | data_sources: { 75 | tfrecords_path: "/workspace/tao-experiments/data/training/tfrecords/train*" 76 | image_directory_path: "/workspace/tao-experiments/data/training" 77 | } 78 | include_difficult_in_training: true 79 | image_extension: "png" 80 | target_class_mapping { 81 | key: "freshapple" 82 | value: "freshapple" 83 | } 84 | target_class_mapping { 85 | key: "freshbanana" 86 | value: "freshbanana" 87 | } 88 | target_class_mapping { 89 | key: "freshorange" 90 | value: "freshorange" 91 | } 92 | target_class_mapping { 93 | key: "rottenapple" 94 | value: "rottenapple" 95 | } 96 | target_class_mapping { 97 | key: "rottenbanana" 98 | value: "rottenbanana" 99 | } 100 | target_class_mapping { 101 | key: "rottenorange" 102 | value: "rottenorange" 103 | } 104 | validation_data_sources: { 105 | tfrecords_path: "/workspace/tao-experiments/data/val/tfrecords/val*" 106 | image_directory_path: "/workspace/tao-experiments/data/val" 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /workspace/specs/yolo_v4_train_resnet18_kitti.txt: -------------------------------------------------------------------------------- 1 | random_seed: 42 2 | yolov4_config { 3 | big_anchor_shape: "[(566.97, 301.71), (526.13, 354.46), (569.45, 339.69)]" 4 | mid_anchor_shape: "[(502.15, 273.98), (507.71, 322.29), (444.95, 374.63)]" 5 | small_anchor_shape: "[(284.35, 384.00), (557.86, 223.83), (441.60, 297.71)]" 6 | box_matching_iou: 0.25 7 | matching_neutral_box_iou: 0.5 8 | arch: "resnet" 9 | nlayers: 18 10 | arch_conv_blocks: 2 11 | loss_loc_weight: 1.0 12 | loss_neg_obj_weights: 1.0 13 | loss_class_weights: 1.0 14 | label_smoothing: 0.0 15 | big_grid_xy_extend: 0.05 16 | mid_grid_xy_extend: 0.1 17 | small_grid_xy_extend: 0.2 18 | freeze_bn: false 19 | #freeze_blocks: [0, 1, 2] 20 | force_relu: false 21 | } 22 | training_config { 23 | batch_size_per_gpu: 8 24 | num_epochs: 30 25 | enable_qat: false 26 | checkpoint_interval: 3 27 | learning_rate { 28 | soft_start_cosine_annealing_schedule { 29 | min_learning_rate: 1e-7 30 | max_learning_rate: 1e-4 31 | soft_start: 0.3 32 | } 33 | } 34 | regularizer { 35 | type: L1 36 | weight: 3e-5 37 | } 38 | optimizer { 39 | adam { 40 | epsilon: 1e-7 41 | beta1: 0.9 42 | beta2: 0.999 43 | amsgrad: false 44 | } 45 | } 46 | pretrain_model_path: "EXPERIMENT_DIR/pretrained_resnet18/pretrained_object_detection_vresnet18/resnet_18.hdf5" 47 | } 48 | eval_config { 49 | average_precision_mode: SAMPLE 50 | batch_size: 8 51 | matching_iou_threshold: 0.5 52 | } 53 | nms_config { 54 | confidence_threshold: 0.001 55 | clustering_iou_threshold: 0.5 56 | force_on_cpu: true 57 | top_k: 200 58 | } 59 | augmentation_config { 60 | hue: 0.1 61 | saturation: 1.5 62 | exposure: 1.5 63 | vertical_flip: 0.5 64 | horizontal_flip: 0.5 65 | jitter: 0.3 66 | output_width: 640 67 | output_height: 384 68 | output_channel: 3 69 | randomize_input_shape_period: 0 70 | mosaic_prob: 0.5 71 | mosaic_min_ratio: 0.2 72 | } 73 | dataset_config { 74 | data_sources: { 75 | tfrecords_path: "/workspace/tao-experiments/data/training/tfrecords/train*" 76 | image_directory_path: "/workspace/tao-experiments/data/training" 77 | } 78 | include_difficult_in_training: true 79 | image_extension: "png" 80 | target_class_mapping { 81 | key: "freshapple" 82 | value: "freshapple" 83 | } 84 | target_class_mapping { 85 | key: "freshbanana" 86 | value: "freshbanana" 87 | } 88 | target_class_mapping { 89 | key: "freshorange" 90 | value: "freshorange" 91 | } 92 | target_class_mapping { 93 | key: "rottenapple" 94 | value: "rottenapple" 95 | } 96 | target_class_mapping { 97 | key: "rottenbanana" 98 | value: "rottenbanana" 99 | } 100 | target_class_mapping { 101 | key: "rottenorange" 102 | value: "rottenorange" 103 | } 104 | validation_data_sources: { 105 | tfrecords_path: "/workspace/tao-experiments/data/val/tfrecords/val*" 106 | image_directory_path: "/workspace/tao-experiments/data/val" 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /workspace/source_code/N4/pgie_yolov4_tao_config.txt: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a 5 | # copy of this software and associated documentation files (the "Software"), 6 | # to deal in the Software without restriction, including without limitation 7 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | # and/or sell copies of the Software, and to permit persons to whom the 9 | # Software is furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in 12 | # all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | # DEALINGS IN THE SOFTWARE. 21 | ################################################################################ 22 | 23 | [property] 24 | gpu-id=0 25 | net-scale-factor=1.0 26 | offsets=103.939;116.779;123.68 27 | model-color-format=1 28 | ## Path to yolo_v4 labels.txt 29 | labelfile-path=../../yolo_v4/export/labels.txt 30 | ## Path to generated TensorRT engine 31 | #model-engine-file=../../yolo_v4/export/yolov4_resnet18_epoch_015.etlt_b1_gpu0_fp32.engine 32 | ## Path to int8-calib-file 33 | #int8-calib-file=../../yolo_v4/export/cal.bin 34 | ## Path to exported yolo_v4 model.etlt 35 | tlt-encoded-model=../../yolo_v4/export/yolov4_resnet18_epoch_015.etlt 36 | ## Key to decrypt model 37 | tlt-model-key=nvidia_tlt 38 | infer-dims=3;384;640 39 | maintain-aspect-ratio=0 40 | uff-input-order=0 41 | uff-input-blob-name=Input 42 | batch-size=1 43 | ## 0=FP32, 1=INT8, 2=FP16 mode 44 | network-mode=0 45 | num-detected-classes=6 46 | interval=0 47 | gie-unique-id=1 48 | is-classifier=0 49 | output-tensor-meta=0 50 | #network-type=0 51 | cluster-mode=3 52 | output-blob-names=BatchedNMS 53 | parse-bbox-func-name=NvDsInferParseCustomBatchedNMSTLT 54 | custom-lib-path=/opt/nvidia/deepstream/deepstream/lib/libnvds_infercustomparser.so 55 | layer-device-precision=cls/mul:fp32:gpu;box/mul_6:fp32:gpu;box/add:fp32:gpu;box/mul_4:fp32:gpu;box/add_1:fp32:gpu;cls/Reshape_reshape:fp32:gpu;box/Reshape_reshape:fp32:gpu;encoded_detections:fp32:gpu;bg_leaky_conv1024_lrelu:fp32:gpu;sm_bbox_processor/concat_concat:fp32:gpu;sm_bbox_processor/sub:fp32:gpu;sm_bbox_processor/Exp:fp32:gpu;yolo_conv1_4_lrelu:fp32:gpu;yolo_conv1_3_1_lrelu:fp32:gpu;md_leaky_conv512_lrelu:fp32:gpu;sm_bbox_processor/Reshape_reshape:fp32:gpu;conv_sm_object:fp32:gpu;yolo_conv5_1_lrelu:fp32:gpu;concatenate_6:fp32:gpu;yolo_conv3_1_lrelu:fp32:gpu;concatenate_5:fp32:gpu;yolo_neck_1_lrelu:fp32:gpu 56 | 57 | [class-attrs-all] 58 | pre-cluster-threshold=0.6 59 | roi-top-offset=0 60 | roi-bottom-offset=0 61 | detected-min-w=0 62 | detected-min-h=0 63 | detected-max-w=0 64 | detected-max-h=0 65 | -------------------------------------------------------------------------------- /workspace/source_code/N4/config_tracker_NvDCF_perf.yml: -------------------------------------------------------------------------------- 1 | %YAML:1.0 2 | ################################################################################ 3 | # SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | BaseConfig: 20 | minDetectorConfidence: 0 # If the confidence of a detector bbox is lower than this, then it won't be considered for tracking 21 | 22 | TargetManagement: 23 | enableBboxUnClipping: 1 # In case the bbox is likely to be clipped by image border, unclip bbox 24 | maxTargetsPerStream: 150 # Max number of targets to track per stream. Recommended to set >10. Note: this value should account for the targets being tracked in shadow mode as well. Max value depends on the GPU memory capacity 25 | 26 | # [Creation & Termination Policy] 27 | minIouDiff4NewTarget: 0.5 # If the IOU between the newly detected object and any of the existing targets is higher than this threshold, this newly detected object will be discarded. 28 | minTrackerConfidence: 0.2 # If the confidence of an object tracker is lower than this on the fly, then it will be tracked in shadow mode. Valid Range: [0.0, 1.0] 29 | probationAge: 3 # If the target's age exceeds this, the target will be considered to be valid. 30 | maxShadowTrackingAge: 30 # Max length of shadow tracking. If the shadowTrackingAge exceeds this limit, the tracker will be terminated. 31 | earlyTerminationAge: 1 # If the shadowTrackingAge reaches this threshold while in TENTATIVE period, the target will be terminated prematurely. 32 | 33 | TrajectoryManagement: 34 | useUniqueID: 0 # Use 64-bit long Unique ID when assignining tracker ID. Default is [true] 35 | 36 | DataAssociator: 37 | dataAssociatorType: 0 # the type of data associator among { DEFAULT= 0 } 38 | associationMatcherType: 0 # the type of matching algorithm among { GREEDY=0, GLOBAL=1 } 39 | checkClassMatch: 1 # If checked, only the same-class objects are associated with each other. Default: true 40 | 41 | # [Association Metric: Thresholds for valid candidates] 42 | minMatchingScore4Overall: 0.0 # Min total score 43 | minMatchingScore4SizeSimilarity: 0.6 # Min bbox size similarity score 44 | minMatchingScore4Iou: 0.0 # Min IOU score 45 | minMatchingScore4VisualSimilarity: 0.7 # Min visual similarity score 46 | 47 | # [Association Metric: Weights] 48 | matchingScoreWeight4VisualSimilarity: 0.6 # Weight for the visual similarity (in terms of correlation response ratio) 49 | matchingScoreWeight4SizeSimilarity: 0.0 # Weight for the Size-similarity score 50 | matchingScoreWeight4Iou: 0.4 # Weight for the IOU score 51 | 52 | StateEstimator: 53 | stateEstimatorType: 1 # the type of state estimator among { DUMMY=0, SIMPLE=1, REGULAR=2 } 54 | 55 | # [Dynamics Modeling] 56 | processNoiseVar4Loc: 2.0 # Process noise variance for bbox center 57 | processNoiseVar4Size: 1.0 # Process noise variance for bbox size 58 | processNoiseVar4Vel: 0.1 # Process noise variance for velocity 59 | measurementNoiseVar4Detector: 4.0 # Measurement noise variance for detector's detection 60 | measurementNoiseVar4Tracker: 16.0 # Measurement noise variance for tracker's localization 61 | 62 | VisualTracker: 63 | visualTrackerType: 1 # the type of visual tracker among { DUMMY=0, NvDCF=1 } 64 | 65 | # [NvDCF: Feature Extraction] 66 | useColorNames: 1 # Use ColorNames feature 67 | useHog: 0 # Use Histogram-of-Oriented-Gradient (HOG) feature 68 | featureImgSizeLevel: 2 # Size of a feature image. Valid range: {1, 2, 3, 4, 5}, from the smallest to the largest 69 | featureFocusOffsetFactor_y: -0.2 # The offset for the center of hanning window relative to the feature height. The center of hanning window would move by (featureFocusOffsetFactor_y*featureMatSize.height) in vertical direction 70 | 71 | # [NvDCF: Correlation Filter] 72 | filterLr: 0.075 # learning rate for DCF filter in exponential moving average. Valid Range: [0.0, 1.0] 73 | filterChannelWeightsLr: 0.1 # learning rate for the channel weights among feature channels. Valid Range: [0.0, 1.0] 74 | gaussianSigma: 0.75 # Standard deviation for Gaussian for desired response when creating DCF filter [pixels] 75 | -------------------------------------------------------------------------------- /workspace/source_code/N3/triton_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining 4 | # a copy of this software and associated documentation files (the 5 | # "Software"), to deal in the Software without restriction, including 6 | # without limitation the rights to use, copy, modify, merge, publish, 7 | # distribute, sublicense, and/or sell copies of the Software, and to 8 | # permit persons to whom the Software is furnished to do so, subject to 9 | # the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | 22 | """Triton inference client for TAO Toolkit model.""" 23 | 24 | 25 | import tritonclient.grpc.model_config_pb2 as mc 26 | import numpy as np 27 | 28 | 29 | class TritonModel(object): 30 | """Simple class to run model inference using Triton client.""" 31 | 32 | def __init__(self, max_batch_size, input_names, output_names, 33 | channels, height, width, data_format, triton_dtype): 34 | """Set up a detectnet_v2 triton model instance. 35 | 36 | Args: 37 | max_batch_size(int): The maximum batch size of the TensorRT engine. 38 | input_names (str): List of the input node names 39 | output_names (str): List of the output node names 40 | channels (int): Number of chanels in the input dimensions 41 | height (int): Height of the input 42 | width (int): Width of the input 43 | data_format (str): The input dimension order. This can be "channels_first" 44 | or "channels_last". "channels_first" is in the CHW order, 45 | and "channels_last" is in HWC order. 46 | triton_dtype (proto): Triton input data type. 47 | channel_mode (str): String order of the C dimension of the input. 48 | "RGB" or "BGR" 49 | 50 | Returns: 51 | An instance of the DetectnetModel. 52 | """ 53 | self.max_batch_size = max_batch_size 54 | self.input_names = input_names 55 | self.output_names = output_names 56 | self.c = channels 57 | assert channels in [1, 3], ( 58 | "TAO Toolkit models only support 1 or 3 channel inputs." 59 | ) 60 | self.h = height 61 | self.w = width 62 | self.data_format = data_format 63 | self.triton_dtype = triton_dtype 64 | self.scale = 1 65 | if channels == 3: 66 | self.mean = [0., 0., 0.] 67 | else: 68 | self.mean = [0] 69 | self.mean = np.asarray(self.mean).astype(np.float32) 70 | if self.data_format == mc.ModelInput.FORMAT_NCHW: 71 | self.mean = self.mean[:, np.newaxis, np.newaxis] 72 | 73 | @staticmethod 74 | def parse_model(model_metadata, model_config): 75 | """Simple class to parse model metadata and model config.""" 76 | raise NotImplementedError("Base class doesn't implement this method.") 77 | 78 | @classmethod 79 | def from_metadata(cls, model_metadata, model_config): 80 | """Parse a model from the metadata config.""" 81 | parsed_outputs = cls.parse_model(model_metadata, model_config) 82 | max_batch_size, input_names, output_names, channels, height, width, \ 83 | data_format, triton_dtype = parsed_outputs 84 | return cls( 85 | max_batch_size, input_names, output_names, 86 | channels, height, width, data_format, 87 | triton_dtype 88 | ) 89 | 90 | def get_config(self): 91 | """Get dictionary config.""" 92 | config_dict = { 93 | "data_format": self.data_format, 94 | "max_batch_size": self.max_batch_size, 95 | "channels": self.c, 96 | "width": self.w, 97 | "height": self.h, 98 | "input_names": self.input_names, 99 | "output_names": self.output_names, 100 | "triton_dtype": self.triton_dtype 101 | } 102 | return config_dict 103 | 104 | def preprocess(self, image): 105 | """Function to preprocess image 106 | 107 | Performs mean subtraction and then normalization. 108 | 109 | Args: 110 | image (np.ndarray): Numpy ndarray of an input batch. 111 | 112 | Returns: 113 | image (np.ndarray): Preprocessed input image. 114 | """ 115 | image = (image - self.mean) * self.scale 116 | return image 117 | -------------------------------------------------------------------------------- /workspace/Start_here.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# End-to-end computer vision bootcamp\n", 8 | "\n", 9 | "## Learning objectives\n", 10 | "\n", 11 | "The goal of this bootcamp is to build a complete end-to-end computer vision pipeline for an object detection application. This material will introduce participants to multiple NVIDIA® SDKs, most notably NVIDIA TAO Toolkit, NVIDIA TensorRT™, NVIDIA Triton™ Inference Server, and NVIDIA DeepStream SDK while giving hands-on experience in data preprocessing, model training, optimization, and deployment at scale.\n", 12 | "\n", 13 | "A key objective of this bootcamp is to show participants how to seamlessly combine these SDKs so that the same development methodology can be applied to similar use cases as well, even outside the domain of computer vision. With a broader idea of the production flow, participants will be able to better evaluate the complexity and progress of their machine learning projects.\n", 14 | "\n", 15 | "The bootcamp assumes familiarity with Deep Learning and Computer Vision fundamentals.\n", 16 | "\n", 17 | "* Language: Python\n", 18 | "* Frameworks: NVIDIA TAO Toolkit, NVIDIA TensorRT, NVIDIA Triton Inference Server, NVIDIA DeepStream SDK, OpenCV\n", 19 | "\n", 20 | "It is not required, although useful, to have more than one GPU for the bootcamp. Let's start by testing the GPUs you are running the code on in this bootcamp:" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "!nvidia-smi" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": { 35 | "jp-MarkdownHeadingCollapsed": true, 36 | "tags": [] 37 | }, 38 | "source": [ 39 | "## Bootcamp outline\n", 40 | "\n", 41 | "The following contents will be covered during the bootcamp:\n", 42 | "\n", 43 | "- [**Lab 1: Data labeling and preprocessing**](jupyter_notebook/1.Data_labeling_and_preprocessing.ipynb)\n", 44 | "- [**Lab 2: Object detection using TAO YOLOv4**](jupyter_notebook/2.Object_detection_using_TAO_YOLOv4.ipynb)\n", 45 | "- [**Lab 3: Model deployment with Triton Inference Server**](jupyter_notebook/3.Model_deployment_with_Triton_Inference_Server.ipynb)\n", 46 | "- [**Lab 4: Model deployment with DeepStream**](jupyter_notebook/4.Model_deployment_with_DeepStream.ipynb)\n", 47 | "- [**Lab 5: Measure object size using OpenCV**](jupyter_notebook/5.Measure_object_size_using_OpenCV.ipynb)\n", 48 | "- [**Challenge 1: DeepStream SDK**](jupyter_notebook/6.Challenge_DeepStream.ipynb)\n", 49 | "- [**Challenge 2: Triton Inference Server**](jupyter_notebook/7.Challenge_Triton.ipynb)\n", 50 | "\n", 51 | "Note: the challenges are extra modules that you can try after learning the individual modules to test your knowledge." 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": { 57 | "tags": [] 58 | }, 59 | "source": [ 60 | "## Bootcamp duration\n", 61 | "The lab material will be presented in a total of 8.5 hours. The link to the material is available for download at the end of the lab.\n", 62 | "\n", 63 | "## Content level\n", 64 | "Beginner, Intermediate.\n", 65 | "\n", 66 | "## Target audience and prerequisites\n", 67 | "The target audience for this lab is researchers/graduate students and developers who are interested in learning about building an end-to-end computer vision pipeline to bring their scientific or industrial application ideas to life.\n", 68 | "\n", 69 | "A basic understanding of Deep Learning and Computer Vision is required.\n", 70 | "\n", 71 | "**Hardware Note:** *all the material was tested and is set to run on a DGX machine equipped with an Ampere A100 GPU. The material was also tested using a workstation equipped with an NVIDIA RTX A3000 GPU with 6GB of VRAM, reducing all the batch sizes to 8 during training. The results may vary when using different hardware and some hyperparameters may not be ideal for fully taking advantage of the graphic card.*" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "--- \n", 79 | "\n", 80 | "## Licensing\n", 81 | "\n", 82 | "Copyright © 2022 OpenACC-Standard.org. This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0). These materials include references to hardware and software developed by other entities; all applicable licensing and copyrights apply." 83 | ] 84 | } 85 | ], 86 | "metadata": { 87 | "kernelspec": { 88 | "display_name": "Python 3 (ipykernel)", 89 | "language": "python", 90 | "name": "python3" 91 | }, 92 | "language_info": { 93 | "codemirror_mode": { 94 | "name": "ipython", 95 | "version": 3 96 | }, 97 | "file_extension": ".py", 98 | "mimetype": "text/x-python", 99 | "name": "python", 100 | "nbconvert_exporter": "python", 101 | "pygments_lexer": "ipython3", 102 | "version": "3.9.12" 103 | }, 104 | "toc-autonumbering": false 105 | }, 106 | "nbformat": 4, 107 | "nbformat_minor": 4 108 | } 109 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Contributing 2 | ------------ 3 | 4 | Please use the following guidelines when contributing to this project. 5 | 6 | Before contributing significant changes, please begin a discussion of the desired changes via a GitHub Issue to prevent doing unnecessary or overlapping work. 7 | 8 | ## License 9 | 10 | The preferred license for source code contributed to this project is the Apache License 2.0 (https://www.apache.org/licenses/LICENSE-2.0) and for documentation, including Jupyter notebooks and text documentation, is the Creative Commons Attribution 4.0 International (CC BY 4.0) (https://creativecommons.org/licenses/by/4.0/). Contributions under other, compatible licenses will be considered on a case-by-case basis. 11 | 12 | ## Styling 13 | 14 | Please use the following style guidelines when making contributions. 15 | 16 | ### Source Code 17 | * Two-space indentation, no tabs 18 | * To the extent possible, variable names should be descriptive 19 | * Code should be documentation with detail like what function does and returns making the code readable. The code should also have proper license at the beginning of the file. 20 | * The following file extensions should be used appropriately: 21 | * Python = .py 22 | 23 | ### Jupyter Notebooks & Markdown 24 | * When they appear inline with the text; directive names, clauses, function or subroutine names, variable names, file names, commands and command-line arguments should appear between two backticks. 25 | * Code blocks should begin with three backticks to enable appropriate source formatting and end with three backticks. 26 | * Leave an empty line before and after the codeblock. 27 | Emphasis, including quotes made for emphasis and introduction of new terms should be highlighted between a single pair of asterisks 28 | * A level 1 heading should appear at the top of the notebook as the title of the notebook. 29 | * A horizontal rule should appear between sections that begin with a level 2 heading. 30 | 31 | 32 | ## Contributing Labs/Modules 33 | #### DeepStream Triton Inference Server Integration 34 | * In the existing end-to-end CV repo, only models build from TAO or optimized by TRT can be deployed on DeepStream (streaming video). DeepStream Triton Inference Server Integration enables the use of trained model from desired framework, such as TensorFlow, TensorRT, PyTorch, or ONNX-Runtime, and directly run inferences on streaming video. 35 | * Task 1: Extend end-to-end CV repo with DeepStream Triton Inference Server Integration 36 | * Task 2: Upgrade end-to-end CV repo to TAO Toolkit 4.0.1 and Add AutoML section. AutoML is a TAO Toolkit API service that automatically selects deep learning hyperparameters for a chosen model and dataset. 37 | #### Body Pose Estimation 38 | * The use for Body pose estimation in CV domain include 39 | * Tracking customer who picked or dropped products in a retail store[real-time inventory] 40 | * Track the safety of factory personnel 41 | * E-health monitoring system 42 | * Task: Create an end-to-end pose body estimation material (dataset prep., TAO Train, and DeepStream deployment) 43 | 44 | 45 | ### Directory stucture for Github 46 | 47 | Before starting to work on new lab it is important to follow the recommended git structure as shown below to avoid reformatting. 48 | 49 | Each lab will have following files/directories consisting of training material for the lab. 50 | * jupyter_notebook folder: Consists of jupyter notebooks and its corresponding images. 51 | * source_code folder: Source codes are stored in a separate directory because sometime not all clusters may support jupyter notebooks. During such bootcamps, we should be able to use the source codes directly from this directory. 52 | * presentations: Consists of presentations for the labs ( pdf format is preferred ) 53 | * Dockerfile and Singularity: Each lab should have both Docker and Singularity recipes. 54 | 55 | The lab optionally may also add custom license in case of any deviation from the top level directory license ( Apache 2.0 ). 56 | 57 | 58 | ### Git Branching 59 | 60 | Adding a new feature/lab will follow a forking workflow. Which means a feature branch development will happen on a forked repo which later gets merged into our original project (GPUHackathons.org) repository. 61 | 62 | ![Git Branching Workflow](workspace/jupyter_notebook/images/git_branching.jpg) 63 | 64 | The 5 main steps depicted in image above are as follows: 65 | 1. Fork: To create a new lab/feature the GPUHackathons.org repository must be forked. Fork will create a snapshot of GPUHackathons.org repository at the time it was forked. Any new feature/lab that will be developed should be based on the develop branch of the repository. 66 | 2. Clone: Developer can than clone this new repository to local machine 67 | Create Feature Branch: Create a new branch with a feature name in which your changes will be done. Recommend naming convention of feature branch is naming convention for branch: ende2end-cv-. The new changes that developer makes can be added, committed and pushed 68 | 3. Push: After the changes are committed, the developer pushes the changes to the remote branch. Push command helps the local changes to github repository 69 | 4. Pull: Submit a pull request. Upon receiving pull request a Hackathon team reviewer/owner will review the changes and upon accepting it can be merged into the develop branch of GpuHacakthons.org 70 | 71 | Git Branch details are as follows: 72 | 73 | * master branch: Consists of the stable branch. 74 | * origin/master to be the main branch where the source code of HEAD always reflects a production-ready state 75 | * Merge request is possible through: develop branch 76 | * develop branch: branched from master branch 77 | * Must branch from: master branch 78 | * Must merge back into: master branch 79 | * It is the main development branch where the source code of HEAD always reflects a state with the latest delivered development changes for the next release. 80 | * When the source code in the develop branch reaches a stable point and is ready to be released, all of the changes should be merged back into master somehow and then tagged with a release number 81 | * All feature development should happen by forking GPUHackathons.org and branching from develop branch only. 82 | -------------------------------------------------------------------------------- /workspace/source_code/N3/preprocess_input.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining 4 | # a copy of this software and associated documentation files (the 5 | # "Software"), to deal in the Software without restriction, including 6 | # without limitation the rights to use, copy, modify, merge, publish, 7 | # distribute, sublicense, and/or sell copies of the Software, and to 8 | # permit persons to whom the Software is furnished to do so, subject to 9 | # the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | 22 | """Utilities for ImageNet data preprocessing & prediction decoding.""" 23 | 24 | from __future__ import absolute_import 25 | from __future__ import division 26 | from __future__ import print_function 27 | 28 | import logging 29 | import numpy as np 30 | logger = logging.getLogger(__name__) 31 | 32 | 33 | def _preprocess_numpy_input(x, data_format, mode, color_mode, img_mean, **kwargs): 34 | """Preprocesses a Numpy array encoding a batch of images. 35 | 36 | # Arguments 37 | x: Input array, 3D or 4D. 38 | data_format: Data format of the image array. 39 | mode: One of "caffe", "tf" or "torch". 40 | - caffe: will convert the images from RGB to BGR, 41 | then will zero-center each color channel with 42 | respect to the ImageNet dataset, 43 | without scaling. 44 | - tf: will scale pixels between -1 and 1, 45 | sample-wise. 46 | - torch: will scale pixels between 0 and 1 and then 47 | will normalize each channel with respect to the 48 | ImageNet dataset. 49 | 50 | # Returns 51 | Preprocessed Numpy array. 52 | """ 53 | if not issubclass(x.dtype.type, np.floating): 54 | #x = x.astype(backend.floatx(), copy=False) 55 | x = x.astype(float32, copy=False) 56 | 57 | if mode == 'tf': 58 | if img_mean and len(img_mean) > 0: 59 | logger.debug("image_mean is ignored in tf mode.") 60 | x /= 127.5 61 | x -= 1. 62 | return x 63 | 64 | if mode == 'torch': 65 | if img_mean and len(img_mean) > 0: 66 | logger.debug("image_mean is ignored in torch mode.") 67 | x /= 255. 68 | if color_mode == "rgb": 69 | mean = [0.485, 0.456, 0.406] 70 | std = [0.224, 0.224, 0.224] 71 | elif color_mode == "grayscale": 72 | mean = [0.449] 73 | std = [0.224] 74 | else: 75 | raise NotImplementedError("Invalid color mode: {}".format(color_mode)) 76 | else: 77 | if color_mode == "rgb": 78 | if data_format == 'channels_first': 79 | # 'RGB'->'BGR' 80 | if x.ndim == 3: 81 | x = x[::-1, ...] 82 | else: 83 | x = x[:, ::-1, ...] 84 | else: 85 | # 'RGB'->'BGR' 86 | x = x[..., ::-1] 87 | if not img_mean: 88 | mean = [103.939, 116.779, 123.68] 89 | else: 90 | assert len(img_mean) == 3, "image_mean must be a list of 3 values \ 91 | for RGB input." 92 | mean = img_mean 93 | std = None 94 | else: 95 | if not img_mean: 96 | mean = [117.3786] 97 | else: 98 | assert len(img_mean) == 1, "image_mean must be a list of a single value \ 99 | for gray image input." 100 | mean = img_mean 101 | std = None 102 | 103 | # Zero-center by mean pixel 104 | if data_format == 'channels_first': 105 | for idx in range(len(mean)): 106 | if x.ndim == 3: 107 | x[idx, :, :] -= mean[idx] 108 | if std is not None: 109 | x[idx, :, :] /= std[idx] 110 | else: 111 | x[:, idx, :, :] -= mean[idx] 112 | if std is not None: 113 | x[:, idx, :, :] /= std[idx] 114 | else: 115 | for idx in range(len(mean)): 116 | x[..., idx] -= mean[idx] 117 | if std is not None: 118 | x[..., idx] /= std[idx] 119 | return x 120 | 121 | 122 | 123 | def preprocess_input(x, data_format=None, mode='caffe', color_mode="rgb", img_mean=None, **kwargs): 124 | """Preprocesses a tensor or Numpy array encoding a batch of images. 125 | 126 | # Arguments 127 | x: Input Numpy or symbolic tensor, 3D or 4D. 128 | The preprocessed data is written over the input data 129 | if the data types are compatible. To avoid this 130 | behaviour, `numpy.copy(x)` can be used. 131 | data_format: Data format of the image tensor/array. 132 | mode: One of "caffe", "tf" or "torch". 133 | - caffe: will convert the images from RGB to BGR, 134 | then will zero-center each color channel with 135 | respect to the ImageNet dataset, 136 | without scaling. 137 | - tf: will scale pixels between -1 and 1, 138 | sample-wise. 139 | - torch: will scale pixels between 0 and 1 and then 140 | will normalize each channel with respect to the 141 | ImageNet dataset. 142 | 143 | # Returns 144 | Preprocessed tensor or Numpy array. 145 | 146 | # Raises 147 | ValueError: In case of unknown `data_format` argument. 148 | """ 149 | data_format = "channels_first" 150 | 151 | return _preprocess_numpy_input(x, data_format=data_format, 152 | mode=mode, color_mode=color_mode, 153 | img_mean=img_mean, **kwargs) 154 | -------------------------------------------------------------------------------- /workspace/source_code/N3/yolov4_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining 4 | # a copy of this software and associated documentation files (the 5 | # "Software"), to deal in the Software without restriction, including 6 | # without limitation the rights to use, copy, modify, merge, publish, 7 | # distribute, sublicense, and/or sell copies of the Software, and to 8 | # permit persons to whom the Software is furnished to do so, subject to 9 | # the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | 22 | """Triton inference client for TAO Toolkit model.""" 23 | 24 | 25 | import tritonclient.grpc.model_config_pb2 as mc 26 | from triton_model import TritonModel 27 | 28 | CHANNEL_MODES = ["rgb", "bgr", "l"] 29 | 30 | 31 | class YOLOv4Model(TritonModel): 32 | """Simple class to run model inference using Triton client.""" 33 | 34 | def __init__(self, max_batch_size, input_names, output_names, 35 | channels, height, width, data_format, 36 | triton_dtype, channel_mode="RGB"): 37 | """Set up a yolov4 triton model instance. 38 | 39 | Args: 40 | max_batch_size(int): The maximum batch size of the TensorRT engine. 41 | input_names (str): List of the input node names 42 | output_names (str): List of the output node names 43 | channels (int): Number of chanels in the input dimensions 44 | height (int): Height of the input 45 | width (int): Width of the input 46 | data_format (str): The input dimension order. This can be "channels_first" 47 | or "channels_last". "channels_first" is in the CHW order, 48 | and "channels_last" is in HWC order. 49 | triton_dtype (proto): Triton input data type. 50 | channel_mode (str): String order of the C dimension of the input. 51 | "RGB" or "BGR" 52 | 53 | Returns: 54 | An instance of the YOLOv4Model. 55 | """ 56 | super().__init__(max_batch_size, input_names, output_names, 57 | channels, height, width, data_format, 58 | triton_dtype) 59 | self.scale = 1.0 60 | 61 | @staticmethod 62 | def parse_model(model_metadata, model_config): 63 | """Parse model metadata and model config from the triton server.""" 64 | if len(model_metadata.inputs) != 1: 65 | raise Exception("expecting 1 input, got {}".format( 66 | len(model_metadata.inputs))) 67 | 68 | if len(model_metadata.outputs) != 4: 69 | raise Exception("expecting 4 output, got {}".format( 70 | len(model_metadata.outputs))) 71 | 72 | if len(model_config.input) != 1: 73 | raise Exception( 74 | "expecting 1 input in model configuration, got {}".format( 75 | len(model_config.input))) 76 | 77 | if len(model_config.output) != 4: 78 | raise Exception( 79 | "expecting 2 input in model configuration, got {}".format( 80 | len(model_config.input))) 81 | 82 | input_metadata = model_metadata.inputs[0] 83 | input_config = model_config.input[0] 84 | output_metadata = model_metadata.outputs 85 | 86 | 87 | for _, data in enumerate(output_metadata): 88 | if _ == 0 : 89 | if data.datatype != "INT32": 90 | raise Exception("expecting output datatype to be INT32, model '" + 91 | data.name + "' output type is " + 92 | data.datatype) 93 | if _ != 0 : 94 | if data.datatype != "FP32": 95 | raise Exception("expecting output datatype to be FP32, model '" + 96 | data.name + "' output type is " + 97 | data.datatype) 98 | 99 | # Model input must have 3 dims, either CHW or HWC (not counting 100 | # the batch dimension), either CHW or HWC 101 | input_batch_dim = (model_config.max_batch_size > 0) 102 | expected_input_dims = 3 + (1 if input_batch_dim else 0) 103 | if len(input_metadata.shape) != expected_input_dims: 104 | raise Exception( 105 | "expecting input to have {} dimensions, model '{}' input has {}". 106 | format(expected_input_dims, model_metadata.name, 107 | len(input_metadata.shape))) 108 | 109 | if type(input_config.format) == str: 110 | FORMAT_ENUM_TO_INT = dict(mc.ModelInput.Format.items()) 111 | input_config.format = FORMAT_ENUM_TO_INT[input_config.format] 112 | 113 | if ((input_config.format != mc.ModelInput.FORMAT_NCHW) and 114 | (input_config.format != mc.ModelInput.FORMAT_NHWC)): 115 | raise Exception("unexpected input format " + 116 | mc.ModelInput.Format.Name(input_config.format) + 117 | ", expecting " + 118 | mc.ModelInput.Format.Name(mc.ModelInput.FORMAT_NCHW) + 119 | " or " + 120 | mc.ModelInput.Format.Name(mc.ModelInput.FORMAT_NHWC)) 121 | 122 | if input_config.format == mc.ModelInput.FORMAT_NHWC: 123 | h = input_metadata.shape[1 if input_batch_dim else 0] 124 | w = input_metadata.shape[2 if input_batch_dim else 1] 125 | c = input_metadata.shape[3 if input_batch_dim else 2] 126 | else: 127 | c = input_metadata.shape[1 if input_batch_dim else 0] 128 | h = input_metadata.shape[2 if input_batch_dim else 1] 129 | w = input_metadata.shape[3 if input_batch_dim else 2] 130 | 131 | print(model_config.max_batch_size, input_metadata.name, 132 | [data.name for data in output_metadata], c, h, w, input_config.format, 133 | input_metadata.datatype) 134 | 135 | return (model_config.max_batch_size, input_metadata.name, 136 | [data.name for data in output_metadata], c, h, w, input_config.format, 137 | input_metadata.datatype) 138 | -------------------------------------------------------------------------------- /workspace/source_code/N3/yolov4_postprocessor.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining 4 | # a copy of this software and associated documentation files (the 5 | # "Software"), to deal in the Software without restriction, including 6 | # without limitation the rights to use, copy, modify, merge, publish, 7 | # distribute, sublicense, and/or sell copies of the Software, and to 8 | # permit persons to whom the Software is furnished to do so, subject to 9 | # the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | 22 | """Simple class to run post processing of Triton Inference outputs.""" 23 | 24 | import os 25 | import numpy as np 26 | from PIL import Image, ImageDraw 27 | 28 | from postprocessor import Postprocessor 29 | 30 | 31 | def trt_output_process_fn(y_encoded): 32 | "function to process TRT model output." 33 | keep_k, boxes, scores, cls_id = y_encoded 34 | result = [] 35 | for idx, k in enumerate(keep_k.reshape(-1)): 36 | mul = np.array([640, 37 | 384, 38 | 640, 39 | 384]) 40 | loc = boxes[idx].reshape(-1, 4)[:k] * mul 41 | cid = cls_id[idx].reshape(-1, 1)[:k] 42 | conf = scores[idx].reshape(-1, 1)[:k] 43 | result.append(np.concatenate((cid, conf, loc), axis=-1)) 44 | return result 45 | 46 | 47 | class YOLOv4Postprocessor(Postprocessor): 48 | """Class to run post processing of Triton Tensors.""" 49 | 50 | def __init__(self, batch_size, frames, output_path, data_format): 51 | """Initialize a post processor class for a yolov4 model. 52 | 53 | Args: 54 | batch_size (int): Number of images in the batch. 55 | frames (list): List of images. 56 | output_path (str): Unix path to the output rendered images and labels. 57 | data_format (str): Order of the input model dimensions. 58 | "channels_first": CHW order. 59 | "channels_last": HWC order. 60 | """ 61 | super().__init__(batch_size, frames, output_path, data_format) 62 | self.output_names = ["BatchedNMS", 63 | "BatchedNMS_1", 64 | "BatchedNMS_2", 65 | "BatchedNMS_3"] 66 | self.threshold = 0.6 67 | self.keep_aspect_ratio = True 68 | self.class_mapping = {0: 'freshapple', 1: 'freshbanana', 2: 'freshorange', 3: 'rottenapple', 4: 'rottenbanana', 5: 'rottenorange'} 69 | 70 | def _get_bbox_and_kitti_label_single_img( 71 | self, img, img_ratio, y_decoded, 72 | is_draw_img, is_kitti_export 73 | ): 74 | """helper function to draw bbox on original img and get kitti label on single image. 75 | 76 | Note: img will be modified in-place. 77 | """ 78 | kitti_txt = "" 79 | draw = ImageDraw.Draw(img) 80 | color_list = ['Black', 'Red', 'Blue', 'Gold', 'Purple', 'Green'] 81 | for i in y_decoded: 82 | if float(i[1]) < self.threshold: 83 | continue 84 | 85 | if self.keep_aspect_ratio: 86 | i[2:6] *= img_ratio 87 | else: 88 | orig_w, orig_h = img.size 89 | ratio_w = float(orig_w) / self.model_input_width 90 | ratio_h = float(orig_h) / self.model_input_height 91 | i[2] *= ratio_w 92 | i[3] *= ratio_h 93 | i[4] *= ratio_w 94 | i[5] *= ratio_h 95 | 96 | if is_kitti_export: 97 | kitti_txt += self.class_mapping[int(i[0])] + ' 0 0 0 ' + \ 98 | ' '.join([str(x) for x in i[2:6]])+' 0 0 0 0 0 0 0 ' + str(i[1])+'\n' 99 | 100 | if is_draw_img: 101 | draw.rectangle( 102 | ((i[2], i[3]), (i[4], i[5])), 103 | outline=color_list[int(i[0]) % len(color_list)] 104 | ) 105 | # txt pad 106 | draw.rectangle(((i[2], i[3]), (i[2] + 100, i[3]+10)), 107 | fill=color_list[int(i[0]) % len(color_list)]) 108 | 109 | draw.text((i[2], i[3]), "{0}: {1:.2f}".format(self.class_mapping[int(i[0])], i[1])) 110 | 111 | 112 | return img, kitti_txt 113 | 114 | 115 | def apply(self, results, this_id, render=True, batching=True): 116 | """Apply the post processor to the outputs to the yolov4 outputs.""" 117 | 118 | #output_array = {} 119 | output_array = [] 120 | 121 | for output_name in self.output_names: 122 | #print(results.as_numpy(output_name)) 123 | output_array.append(results.as_numpy(output_name)) 124 | 125 | for image_idx in range(self.batch_size): 126 | y_pred = [i[image_idx] for i in output_array] 127 | y_pred_decoded = trt_output_process_fn(y_pred) 128 | 129 | current_idx = (int(this_id) - 1) * self.batch_size + image_idx 130 | if current_idx >= len(self.frames): 131 | break 132 | current_frame = self.frames[current_idx] 133 | filename = os.path.basename(current_frame._image_path) 134 | 135 | img = Image.open(current_frame._image_path) 136 | orig_w, orig_h = img.size 137 | ratio = min(current_frame.w/float(orig_w), current_frame.h/float(orig_h)) 138 | new_w = int(round(orig_w*ratio)) 139 | ratio = float(orig_w)/new_w 140 | 141 | output_label_file = os.path.join( 142 | self.output_path, "infer_labels", 143 | "{}.txt".format(os.path.splitext(filename)[0]) 144 | ) 145 | output_image_file = os.path.join( 146 | self.output_path, "infer_images", 147 | "{}.png".format(os.path.splitext(filename)[0]) 148 | ) 149 | if not os.path.exists(os.path.dirname(output_label_file)): 150 | os.makedirs(os.path.dirname(output_label_file)) 151 | if not os.path.exists(os.path.dirname(output_image_file)): 152 | os.makedirs(os.path.dirname(output_image_file)) 153 | 154 | img, kitti_txt = self._get_bbox_and_kitti_label_single_img(img, ratio, y_pred_decoded[0], output_image_file, output_label_file) 155 | 156 | img.save(output_image_file) 157 | 158 | open(output_label_file, 'w').write(kitti_txt) 159 | -------------------------------------------------------------------------------- /workspace/source_code/N3/frame.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining 4 | # a copy of this software and associated documentation files (the 5 | # "Software"), to deal in the Software without restriction, including 6 | # without limitation the rights to use, copy, modify, merge, publish, 7 | # distribute, sublicense, and/or sell copies of the Software, and to 8 | # permit persons to whom the Software is furnished to do so, subject to 9 | # the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | 22 | import os 23 | 24 | from PIL import Image 25 | import numpy as np 26 | 27 | import tritonclient.grpc.model_config_pb2 as mc 28 | from preprocess_input import preprocess_input 29 | 30 | class Frame(object): 31 | """Data structure to contain an image.""" 32 | 33 | def __init__(self, image_path, data_format, dtype, target_shape): 34 | """Instantiate a frame object.""" 35 | self._image_path = image_path 36 | if data_format not in [mc.ModelInput.FORMAT_NCHW, mc.ModelInput.FORMAT_NHWC]: 37 | raise NotImplementedError( 38 | "Data format not in the supported data format: {}".format(data_format) 39 | ) 40 | self.data_format = data_format 41 | self.height = None 42 | self.width = None 43 | self.dtype = dtype 44 | assert len(target_shape) == 3, ( 45 | "3 dimensions are required for input definitions. Got {}".format(len(target_shape)) 46 | ) 47 | if self.data_format == mc.ModelInput.FORMAT_NCHW: 48 | self.c, self.h, self.w = target_shape 49 | else: 50 | self.h, self.w, self.c = target_shape 51 | assert self.c in [1, 3], ( 52 | "Number of channels should be 1 or 3. Got {}".format(self.c)) 53 | self.target_shape = target_shape 54 | 55 | self.model_img_mode = 'RGB' if self.c == 3 else 'L' 56 | self.keep_aspect_ratio = True 57 | self.img_mean = [103.939, 116.779, 123.68] 58 | 59 | def load_image(self): 60 | """Load the image defined.""" 61 | if not os.path.exists(self._image_path): 62 | raise NotFoundError("Cannot find image at {}".format(self._image_path)) 63 | image = Image.open(self._image_path) 64 | self.width, self.height = image.size 65 | 66 | if self.c == 1: 67 | image = image.convert("L") 68 | else: 69 | image = image.convert("RGB") 70 | return image 71 | 72 | def as_numpy(self, image): 73 | """Return a numpy array.""" 74 | image = image.resize((self.w, self.h), Image.ANTIALIAS) 75 | nparray = np.asarray(image).astype(self.dtype) 76 | if nparray.ndim == 2: 77 | nparray = nparray[:, :, np.newaxis] 78 | if self.data_format == mc.ModelInput.FORMAT_NCHW: 79 | nparray = np.transpose(nparray, (2, 0, 1)) 80 | return nparray 81 | 82 | def _load_img(self): 83 | """load an image and returns the original image and a numpy array for model to consume. 84 | 85 | Args: 86 | img_path (str): path to an image 87 | Returns: 88 | img (PIL.Image): PIL image of original image. 89 | ratio (float): resize ratio of original image over processed image 90 | inference_input (array): numpy array for processed image 91 | """ 92 | img = Image.open(self._image_path) 93 | orig_w, orig_h = img.size 94 | ratio = min(self.w/float(orig_w), self.h/float(orig_h)) 95 | 96 | # do not change aspect ratio 97 | new_w = int(round(orig_w*ratio)) 98 | new_h = int(round(orig_h*ratio)) 99 | 100 | if self.keep_aspect_ratio: 101 | im = img.resize((new_w, new_h), Image.ANTIALIAS) 102 | else: 103 | im = img.resize((self.w, self.h), Image.ANTIALIAS) 104 | 105 | if im.mode in ('RGBA', 'LA') or \ 106 | (im.mode == 'P' and 'transparency' in im.info) and \ 107 | self.model_img_mode == 'L' : 108 | 109 | # Need to convert to RGBA if LA format due to a bug in PIL 110 | im = im.convert('RGBA') 111 | inf_img = Image.new("RGBA", (self.w, self.h)) 112 | inf_img.paste(im, (0, 0)) 113 | inf_img = inf_img.convert(self.model_img_mode) 114 | else: 115 | inf_img = Image.new( 116 | self.model_img_mode, 117 | (self.w, self.h) 118 | ) 119 | inf_img.paste(im, (0, 0)) 120 | 121 | inf_img = np.array(inf_img).astype(np.float32) 122 | if self.model_img_mode == 'L': 123 | inf_img = np.expand_dims(inf_img, axis=2) 124 | inference_input = inf_img.transpose(2, 0, 1) - 117.3786 125 | else: 126 | inference_input = preprocess_input(inf_img.transpose(2, 0, 1), 127 | img_mean=self.img_mean) 128 | 129 | return inference_input 130 | 131 | def _load_img_maskrcnn(self): 132 | """load an image and returns the original image and a numpy array for model to consume. 133 | 134 | Args: 135 | img_path (str): path to an image 136 | Returns: 137 | img (PIL.Image): PIL image of original image. 138 | ratio (float): resize ratio of original image over processed image 139 | inference_input (array): numpy array for processed image 140 | """ 141 | 142 | img = Image.open(self._image_path) 143 | orig_w, orig_h = img.size 144 | ratio = min(self.w/float(orig_w), self.h/float(orig_h)) 145 | 146 | # do not change aspect ratio 147 | new_w = int(round(orig_w*ratio)) 148 | new_h = int(round(orig_h*ratio)) 149 | 150 | if self.keep_aspect_ratio: 151 | im = img.resize((new_w, new_h), Image.ANTIALIAS) 152 | else: 153 | im = img.resize((self.w, self.h), Image.ANTIALIAS) 154 | 155 | if im.mode in ('RGBA', 'LA') or \ 156 | (im.mode == 'P' and 'transparency' in im.info) and \ 157 | self.model_img_mode == 'L' : 158 | 159 | # Need to convert to RGBA if LA format due to a bug in PIL 160 | im = im.convert('RGBA') 161 | inf_img = Image.new("RGBA", (self.w, self.h)) 162 | inf_img.paste(im, (0, 0)) 163 | inf_img = inf_img.convert(self.model_img_mode) 164 | else: 165 | inf_img = Image.new( 166 | self.model_img_mode, 167 | (self.w, self.h) 168 | ) 169 | inf_img.paste(im, (0, 0)) 170 | 171 | inf_img = np.array(inf_img).astype(np.float32) 172 | if self.model_img_mode == 'L': 173 | inf_img = np.expand_dims(inf_img, axis=2) 174 | inference_input = inf_img.transpose(2, 0, 1) - 117.3786 175 | else: 176 | inference_input = preprocess_input(inf_img.transpose(2, 0, 1), mode="torch") 177 | 178 | return inference_input 179 | 180 | def _load_img_multitask_classification(self): 181 | """load an image and returns the original image and a numpy array for model to consume. 182 | 183 | Args: 184 | img_path (str): path to an image 185 | Returns: 186 | img (PIL.Image): PIL image of original image. 187 | inference_input (array): numpy array for processed image 188 | """ 189 | 190 | img = Image.open(self._image_path) 191 | image = img.resize((self.w, self.h), Image.ANTIALIAS).convert('RGB') 192 | inference_input = preprocess_input(np.array(image).astype(np.float32).transpose(2, 0, 1)) 193 | 194 | return inference_input 195 | -------------------------------------------------------------------------------- /Deployment_Guide.md: -------------------------------------------------------------------------------- 1 | # End-to-End Computer Vision Bootcamp 2 | 3 | The **End-to-End Computer Vision Bootcamp** is designed from a real-world perspective and follows the data processing, development, and deployment pipeline paradigm using a variety of tools. Through hands-on exercises, attendees will learn the fundamentals of preprocessing custom images, speeding the development process using transfer learning for model training, and deployment of trained models for fast and scalable AI in production. 4 | 5 | 6 | ## Deploying the Labs 7 | 8 | ### Prerequisites 9 | 10 | To run this tutorial you will need a Laptop/Workstation/DGX machine with NVIDIA GPU. 11 | 12 | - Install the latest [Docker](https://docs.docker.com/engine/install/) or [Singularity](https://sylabs.io/docs/). 13 | - Once you have installed **docker**, follow the [post-installation steps](https://docs.docker.com/engine/install/linux-postinstall/) to ensure that docker can be run without `sudo`. 14 | 15 | - Get an NGC account and API key: 16 | 17 | - Go to the [NGC](https://ngc.nvidia.com/) website and click on `Register for NGC`. 18 | - Click on the `Continue` button where `NVIDIA Account (Use existing or create a new NVIDIA account)` is written. 19 | - Fill in the required information and register, then proceed to log in with your new account credentials. 20 | - In the top right corner, click on your username and select `Setup` in the dropdown menu. 21 | - Proceed and click on the `Get API Key` button. 22 | - Next, you will find a `Generate API Key` button in the upper right corner. After clicking on this button, a dialog box should appear and you have to click on the `Confirm` button. 23 | - Finally, copy the generated API key and username and save them somewhere on your local system. 24 | 25 | ### Tested environment 26 | 27 | All Labs were tested and is set to run on a DGX machine equipped with an Ampere A100 GPU. It was also tested using a workstation equipped with an NVIDIA RTX A3000 GPU with 6GB of VRAM, reducing all the batch sizes to 8 during training. 28 | The results may vary when using different hardware and some hyperparameters may not be ideal for fully taking advantage of the graphic card. 29 | 30 | 31 | ### Deploying with container 32 | 33 | This material can be deployed with either Docker or Singularity container, refer to the respective sections for the instructions. 34 | 35 | #### Running Docker Container 36 | 37 | ##### Lab 1 & 2 38 | 39 | **Install dependencies** 40 | 41 | 1. Create a new `conda` environment using `miniconda`: 42 | 43 | - Install `Miniconda` by following the [official instructions](https://conda.io/projects/conda/en/latest/user-guide/install/). 44 | - Once you have installed `miniconda`, create a new environment by setting the Python version to 3.6: 45 | 46 | `conda create -n launcher python=3.6` 47 | 48 | - Activate the `conda` environment that you have just created: 49 | 50 | `conda activate launcher` 51 | 52 | - When you are done with your session, you may deactivate your `conda` environment using the `deactivate` command: 53 | 54 | `conda deactivate` 55 | 56 | 57 | 2. Install the TAO Launcher Python package called `nvidia-tao` into the conda launcher environment: 58 | 59 | `conda activate launcher` 60 | 61 | `pip3 install nvidia-tao` 62 | 63 | 3. Invoke the entrypoints using the this command `tao -h`. You should see the following output: 64 | ``` 65 | usage: tao 66 | {list,stop,info,augment,bpnet,classification,detectnet_v2,dssd,emotionnet,faster_rcnn,fpenet,gazenet,gesturenet, 67 | heartratenet,intent_slot_classification,lprnet,mask_rcnn,punctuation_and_capitalization,question_answering, 68 | retinanet,speech_to_text,ssd,text_classification,converter,token_classification,unet,yolo_v3,yolo_v4,yolo_v4_tiny} 69 | ... 70 | 71 | Launcher for TAO 72 | 73 | optional arguments: 74 | -h, --help show this help message and exit 75 | 76 | tasks: 77 | {list,stop,info,augment,bpnet,classification,detectnet_v2,dssd,emotionnet,faster_rcnn,fpenet,gazenet,gesturenet,heartratenet 78 | ,intent_slot_classification,lprnet,mask_rcnn,punctuation_and_capitalization,question_answering,retinanet,speech_to_text, 79 | ssd,text_classification,converter,token_classification,unet,yolo_v3,yolo_v4,yolo_v4_tiny} 80 | ``` 81 | 82 | For more info, visit the [TAO Toolkit documentation](https://docs.nvidia.com/tao/tao-toolkit/text/tao_toolkit_quick_start_guide.html). 83 | 84 | 4. Install other dependencies needed to run the lab: 85 | ``` 86 | pip install jupyterlab \ 87 | matplotlib \ 88 | fiftyone \ 89 | attrdict \ 90 | tqdm \ 91 | gdown \ 92 | nvidia-pyindex \ 93 | tritonclient[all] 94 | ``` 95 | 96 | **Run the Labs** 97 | 98 | Activate the conda launcher environment: `conda activate launcher` 99 | 100 | You are to run the first two notebooks `1.Data_labeling_and_preprocessing.ipynb` and `2.Object_detection_using_TAO_YOLOv4.ipynb` in the `launcher` environment. 101 | 102 | Launch the jupyter lab with: 103 | 104 | `jupyter-lab --no-browser --allow-root --ip=0.0.0.0 --port=8888 --NotebookApp.token="" --notebook-dir=~/End-to-End-Computer-Vision/workspace` 105 | 106 | Remember to set the `--notebook-dir` to the location where the `project folder` where this material is located. 107 | 108 | Then, open jupyter lab in the browser at http://localhost:8888 and start working on the lab by clicking on the `Start_here.ipynb` notebook. 109 | 110 | When you are done with `1.Data_labeling_and_preprocessing.ipynb` and `2.Object_detection_using_TAO_YOLOv4.ipynb`, move to the next section. 111 | 112 | ##### Lab 3 113 | 114 | To start the Triton Inference Server instance, you will need to run a container along with the `launcher` virtual environment. This is to emulate the client-server mechanism but on the same system. To start the server, `open a new terminal` and launch the command: 115 | ``` 116 | docker run \ 117 | --gpus=1 --rm \ 118 | -p 8000:8000 -p 8001:8001 -p 8002:8002 \ 119 | -v ~/End-to-End-Computer-Vision/workspace/models:/models \ 120 | nvcr.io/nvidia/tritonserver:22.05-py3 \ 121 | tritonserver \ 122 | --model-repository=/models \ 123 | --exit-on-error=false \ 124 | --model-control-mode=poll \ 125 | --repository-poll-secs 30 126 | ``` 127 | In order to work properly in this lab, the triton server version should match the TAO Toolkit version that was installed (visible by running `tao info`). Containers with the same `yy.mm` tag avoid version mismatches and conflicts that may prevent you from running and deploying your models. The path to the local model repository needs to be set as well in order to be mapped inside the container. 128 | 129 | After starting Triton Server, you will see an output on the terminal showing `the server starting up and loading models`. This implies Triton is ready to accept inference requests. 130 | ``` 131 | +----------------------+---------+--------+ 132 | | Model | Version | Status | 133 | +----------------------+---------+--------+ 134 | | | | READY | 135 | | .. | . | .. | 136 | | .. | . | .. | 137 | +----------------------+---------+--------+ 138 | ... 139 | ... 140 | ... 141 | I1002 21:58:57.891440 62 grpc_server.cc:3914] Started GRPCInferenceService at 0.0.0.0:8001 142 | I1002 21:58:57.893177 62 http_server.cc:2717] Started HTTPService at 0.0.0.0:8000 143 | I1002 21:58:57.935518 62 http_server.cc:2736] Started Metrics Service at 0.0.0.0:8002 144 | ``` 145 | 146 | Now you can go back to your browser with jupyter lab open and run `3.Model_deployment_with_Triton_Inference_Server.ipynb`. 147 | 148 | When you are done with the notebook, shut down jupyter lab by selecting `File > Shut Down` as well as the Triton Docker container of the server by pressing `ctrl + c` in the logs terminal. 149 | 150 | 151 | ##### Lab 4 & 5 152 | 153 | To run the DeepStream content, build a Docker container by following these steps: 154 | 155 | - Open a terminal window, navigate to the directory where `Dockerfile_deepstream` is located (e.g. `cd ~/End-to-End-Computer-Vision`) 156 | - Run `sudo docker build -f Dockerfile_deepstream --network=host -t : .`, for instance: `sudo docker build -f Dockerfile_deepstream --network=host -t deepstream:1.0 .` 157 | - Next, execute the command: `sudo docker run --rm -it --gpus=all -v ~/End-to-End-Computer-Vision/workspace:/opt/nvidia/deepstream/deepstream-6.1/workspace --network=host -p 8888:8888 deepstream:1.0` 158 | 159 | flags: 160 | - `--rm` will delete the container when finished. 161 | - `-it` means run in interactive mode. 162 | - `--gpus` option makes GPUs accessible inside the container. 163 | - `-v` is used to mount host directories in the container filesystem. 164 | - `--network=host` will share the host’s network stack to the container. 165 | - `-p` flag explicitly maps a single port or range of ports. 166 | 167 | When you are inside the container, launch jupyter lab: 168 | `jupyter-lab --no-browser --allow-root --ip=0.0.0.0 --port=8888 --NotebookApp.token="" --notebook-dir=/opt/nvidia/deepstream/deepstream-6.1/workspace`. 169 | 170 | Open the browser at `http://localhost:8888` and start working on `4.Model_deployment_with_DeepStream.ipynb` notebook. Then, move to `5.Measure_object_size_using_OpenCV.ipynb` and complete the material. 171 | 172 | As soon as you are done with that, shut down jupyter lab by selecting `File > Shut Down` and the container by typing `exit` or pressing `ctrl d` in the terminal window. 173 | 174 | Congratulations, you've successfully built and deployed an end-to-end computer vision pipeline! 175 | 176 | 177 | #### Running Singularity Container 178 | 179 | ###### Lab 1 & 2 180 | 181 | To build the TAO Toolkit Singularity container, run: `singularity build --fakeroot --sandbox tao_e2ecv.simg Singularity_tao` 182 | 183 | Run the container with: `singularity run --fakeroot --nv -B ~/End-to-End-Computer-Vision/workspace:/workspace/tao-experiments tao_e2ecv.simg jupyter-lab --no-browser --allow-root --ip=0.0.0.0 --port=8888 --NotebookApp.token="" --notebook-dir=/workspace/tao-experiments` 184 | 185 | The `-B` flag mounts local directories in the container filesystem and ensures changes are stored locally in the project folder. Open jupyter lab in browser: http://localhost:8888 186 | 187 | You may now start working on the lab by clicking on the `Start_here.ipynb` notebook. 188 | 189 | When you are done with `1.Data_labeling_and_preprocessing.ipynb` and `2.Object_detection_using_TAO_YOLOv4.ipynb`, shut down jupyter lab by selecting `File > Shut Down` in the top left corner, then shut down the Singularity container by typing `exit` or pressing `ctrl + d` in the terminal window. 190 | 191 | 192 | ###### Lab 3 193 | 194 | To download the Triton Inference Server Singularity container for the Server run: `singularity pull tritonserver:22.05-py3.sif docker://nvcr.io/nvidia/tritonserver:22.05-py3` 195 | 196 | To build the Triton Inference Server Singularity container for the Client, run: `singularity build --fakeroot --sandbox triton_client_e2ecv.simg Singularity_triton` 197 | 198 | To activate the Triton Inference Server container, run: 199 | ``` 200 | singularity run \ 201 | --nv \ 202 | -B ~/End-to-End-Computer-Vision/workspace/models:/models \ 203 | /mnt/shared/bootcamps/tritonserver:22.05-py3.sif \ 204 | tritonserver \ 205 | --model-repository=/models \ 206 | --exit-on-error=false \ 207 | --model-control-mode=poll \ 208 | --repository-poll-secs 30 \ 209 | --http-port 8000 \ 210 | --grpc-port 8001 \ 211 | --metrics-port 8002 212 | ``` 213 | 214 | You may now activate the Triton Client container with: `singularity run --fakeroot --nv -B ~/End-to-End-Computer-Vision/workspace:/workspace triton_client_e2ecv.simg jupyter-lab --no-browser --allow-root --ip=0.0.0.0 --port=8888 --NotebookApp.token="" --notebook-dir=/workspace` 215 | 216 | Then, open jupyter lab in browser: http://localhost:8888 and continue the lab by running `3.Model_deployment_with_Triton_Inference_Server.ipynb`. 217 | 218 | **Note** 219 | 220 | In a cluster environment, the `Triton Inference Server` container should be launched on the computing node(eg. dgx05) why the `Triton Client` container should be run on the login node (cpu). Therefore, within the notebook, url variable should be modified as follows: 221 | 222 | 223 | ``` 224 | assume you are on dgx05 then, replace 225 | 226 | url = "localhost:8000" with url = "dgx05:8000" 227 | 228 | url = "localhost:8001" with url = "dgx05:8001" 229 | ``` 230 | 231 | As soon as you are done with that, shut down jupyter lab by selecting `File > Shut Down` and the Client container by typing `exit` or pressing `ctrl + d` in the terminal window. 232 | 233 | 234 | ###### Lab 4 & 5 235 | 236 | To build the DeepStream Singularity container, run: `sudo singularity build --sandbox deepstream_e2ecv.simg Singularity_deepstream` 237 | 238 | Run the DeepStream container with: `singularity run --fakeroot --nv -B ~/End-to-End-Computer-Vision/workspace:/opt/nvidia/deepstream/deepstream-6.1/workspace /mnt/shared/bootcamps/deepstream_e2ecv.simg jupyter-lab --no-browser --allow-root --ip=0.0.0.0 --port=8888 --NotebookApp.token="" --notebook-dir=/opt/nvidia/deepstream/deepstream-6.1/workspace` 239 | 240 | Open jupyter lab in browser: http://localhost:8888 and complete the material by running `4.Model_deployment_with_DeepStream.ipynb` and `5.Measure_object_size_using_OpenCV.ipynb`. 241 | 242 | Congratulations, you've successfully built and deployed an end-to-end computer vision pipeline! 243 | 244 | 245 | 246 | ## Known issues 247 | 248 | ### TAO 249 | 250 | a. When installing the TAO Toolkit Launcher to your host machine’s native python3 as opposed to the recommended route of using a virtual environment, you may get an error saying that `tao binary wasn’t found`. This is because the path to your `tao` binary installed by pip wasn’t added to the `PATH` environment variable in your local machine. In this case, please run the following command: 251 | 252 | `export PATH=$PATH:~/.local/bin` 253 | 254 | b. When training, you can see an error message stating: 255 | ``` 256 | Resource exhausted: OOM when allocating tensor... 257 | ERROR: Ran out of GPU memory, please lower the batch size, use a smaller input resolution, use a smaller backbone, or enable model parallelism for supported TLT architectures (see TLT documentation). 258 | ``` 259 | As the error says, you ran out of GPU memory. Try playing with batch size to reduce the memory footprint. 260 | 261 | ### NGC 262 | 263 | You can see an error message stating: 264 | 265 | `ngc: command not found ...` 266 | 267 | You can resolve this by setting the path to ngc within the conda launcher environment as: 268 | 269 | `echo "export PATH=\"\$PATH:$(pwd)/ngc-cli\"" >> ~/.bash_profile && source ~/.bash_profile` 270 | 271 | ### Triton Inference Server 272 | 273 | You can see in the server logs an error message stating something similar to: 274 | 275 | ``` 276 | E0930 06:24:12.416803 1 logging.cc:43] 1: [stdArchiveReader.cpp::StdArchiveReader::40] Error Code 1: Serialization (Serialization assertion stdVersionRead == serializationVersion failed.Version tag does not match. Note: Current Version: 213, Serialized Engine Version: 205) 277 | E0930 06:24:12.423693 1 logging.cc:43] 4: [runtime.cpp::deserializeCudaEngine::50] Error Code 4: Internal Error (Engine deserialization failed.) 278 | ``` 279 | 280 | The Server container is using a different version of TensorRT than the one the engine was generated with, so the Server is unable to load the model. Make sure to use containers with the same `` tag when pulling from NGC as this ensures there are no version mismatches. You can verify the version of TAO by running the `tao info` command and then pull the appropriate `nvcr.io/nvidia/tritonserver:yy.mm-py3` Server container to solve the issue. 281 | 282 | ### DeepStream 283 | 284 | You can see when running the pipeline an error similar to: 285 | 286 | ``` 287 | ERROR: [TRT]: 4: [runtime.cpp::deserializeCudaEngine::50] Error Code 4: Internal Error (Engine deserialization failed.) 288 | ERROR: ../nvdsinfer/nvdsinfer_model_builder.cpp:1528 Deserialize engine failed from file: /opt/nvidia/deepstream/deepstream-6.1/workspace/yolo_v4/export/trt.engine 289 | ``` 290 | The DeepStream container uses a different version of TensorRT than the one the engine was generated with, so it is unable to use the TensorRT engine for inference. Please set the `tlt-encoded-model` path in the configuration file so that if the engine deserialization fails, DeepStream will attempt to rebuild the engine internally. 291 | -------------------------------------------------------------------------------- /workspace/jupyter_notebook/1.Data_labeling_and_preprocessing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "

Home Page

\n", 8 | "\n", 9 | "
\n", 10 | " \n", 11 | " 1\n", 12 | " 2\n", 13 | " 3\n", 14 | " 4\n", 15 | " 5\n", 16 | " 6\n", 17 | " 7\n", 18 | " \n", 19 | " Next Notebook\n", 20 | "
" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "# Data labeling and preprocessing\n", 28 | "\n", 29 | "***" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "**The goal of this notebook is to make you understand how to:**\n", 37 | "\n", 38 | "- Label data for object detection applications\n", 39 | "- Convert a dataset into KITTI format\n", 40 | "\n", 41 | "**Contents of this notebook:**\n", 42 | "\n", 43 | "- [Custom data labeling](#Custom-data-labeling)\n", 44 | " - [Labeling with Label Studio](#Labeling-with-Label-Studio)\n", 45 | " - [Labeling with Yolo Mark](#Labeling-with-Yolo-Mark)\n", 46 | "- [Download data for the lab](#Download-data-for-the-lab)\n", 47 | "- [Conversion to KITTI format](#Conversion-to-KITTI-format)\n", 48 | " - [Load the dataset](#Load-the-dataset)\n", 49 | " - [Export to KITTI](#Export-to-KITTI)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "## Custom data labeling\n", 57 | "\n", 58 | "Training a deep learning model for an object detection task requires a meaningful amount of annotated data. A dataset for a specific domain application may not be available often or if it is, chances are it may not be labeled or adequate in size. In this notebook, we show how to annotate a custom dataset with bounding boxes and convert it into KITTI file format, useful to expand the number of samples with offline data augmentation or to train a model with transfer learning.\n", 59 | "\n", 60 | "\n", 61 | "\n", 62 | "We present two tools for data labeling operations:\n", 63 | "- Label Studio\n", 64 | "- Yolo Mark\n", 65 | "\n", 66 | "We recommend using Label Studio because of the more intuitive user interface and a better overall labeling experience." 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "## Download data for the lab\n", 74 | "\n", 75 | "In this lab, we will provide you with a labeled version of a dataset containing three types of fruit - `apples`, `bananas`, and `oranges` - each fresh or rotten, for a total of six classes. The dataset was labeled using Label Studio, as explained above. The project folder has been renamed to `label-studio`. Running the following cell will make the data available in the `/workspace/data` directory." 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "!python3 ../source_code/dataset.py" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": { 90 | "tags": [] 91 | }, 92 | "source": [ 93 | "\n", 94 | "### Labeling with Label Studio\n", 95 | "\n", 96 | "[Label Studio](https://labelstud.io/) is an open-source, flexible, quickly installable data labeling tool with a very convenient user interface. The tool natively comes with a Python module available to install via the pip package manager, but can also be installed in alternative ways, all available [here](https://labelstud.io/guide/install.html), so feel free to pick the one you are most comfortable with.\n", 97 | "\n", 98 | "To get started with the Python module, open a terminal window in your preferred environment (ideally, create a fresh virtual one) and run the command `pip install -U label-studio`. Once installed, start the server with the command `label-studio`. This will automatically open a user interface on the default web browser on port 8080, accessible at `http://localhost:8080` if you are working on your local machine, unless another port is specified.\n", 99 | "\n", 100 | "To proceed, follow these steps and visual explanations:\n", 101 | "- Sign up with an email address and create a password (that these credentials are stored locally on the Label Studio server and can be whatever you prefer).\n", 102 | "\n", 103 | "\n", 104 | "- Create a new project.\n", 105 | "\n", 106 | "\n", 107 | "- Give it a title and optionally a brief description.\n", 108 | "\n", 109 | "\n", 110 | "- Drag and drop images to upload.\n", 111 | "\n", 112 | "\n", 113 | "- Select an object detection task with bounding boxes.\n", 114 | "\n", 115 | "\n", 116 | "- Set the class names.\n", 117 | "" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "If you plan on tagging a significant amount of data, you will likely need to separate it into multiple chunks to avoid hitting the per-project memory limit.\n", 125 | "\n", 126 | "Once the previous steps are completed, you can start with the labeling process. From the project menu, click on `Label All Tasks` at the top.\n", 127 | "\n", 128 | "\n", 129 | "\n", 130 | "Then, for every image, do the following operations:\n", 131 | "- Select an appropriate class.\n", 132 | "- Draw all the bounding boxes for that class.\n", 133 | "- Repeat for other classes.\n", 134 | "- Click `Submit`.\n", 135 | "\n", 136 | "\n", 137 | "\n", 138 | "This will automatically load the next image until there are no images left. While labeling, you can stop at any time and when you resume, you will continue exactly where you left off.\n", 139 | "\n", 140 | "" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "As soon as you have completed the labeling activity, either because you have run out of images or because you are satisfied with how many you have, you can go back to the home page of the project, apply filters to the annotations, and export them by clicking on `Export`. Make sure to scroll down and select the YOLO format when you do so.\n", 148 | "\n", 149 | "\n", 150 | "\n", 151 | "For more in-depth information and an additional visual explanation of the previous steps, explore this [dedicated tutorial](https://labelstud.io/blog/Quickly-Create-Datasets-for-Training-YOLO-Object-Detection.html) on how to label images for YOLO applications on the Label Studio blog." 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "The exported data has a similar structure to this one by default, after unzipping the downloaded file:\n", 159 | "```\n", 160 | "project-1-at-2022-09-20-15-20-f6c05363.zip\n", 161 | " notes.json\n", 162 | " classes.txt\n", 163 | " labels\n", 164 | " image_filename1.txt\n", 165 | " image_filename2.txt\n", 166 | " image_filename3.txt\n", 167 | " ...\n", 168 | " images\n", 169 | " image_filename1.\n", 170 | " image_filename2.\n", 171 | " image_filename3.\n", 172 | " ...\n", 173 | "```\n", 174 | "\n", 175 | "\n", 176 | "The TXT files in the `labels` folder are space-delimited files where each row corresponds to an object in the image with the same name in the `images` folder, in the standard YOLO format:\n", 177 | "```\n", 178 | " \n", 179 | "```\n", 180 | "\n", 181 | "\n", 182 | "where `` is the zero-based integer index of the object class label from `classes.txt`, the bounding box coordinates are expressed as relative coordinates in `[0, 1] x [0, 1]`, and `` is an optional detection confidence in `[0, 1]`, left blank by Label Studio." 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": [ 189 | "### Labeling with Yolo Mark\n", 190 | "\n", 191 | "Another popular data labeling tool is [Yolo Mark](https://github.com/AlexeyAB/Yolo_mark), a Windows and Linux GUI for marking bounded boxes of objects in images for training Yolo. Its use is not as straightforward as Label Studio, as it needs to be compiled from source and does not come with a Python module, but is still as an option to consider for a project.\n", 192 | "\n", 193 | "In order to use Yolo Mark, [download](https://github.com/AlexeyAB/Yolo_mark) the repository from GitHub and follow the instructions in the README file to get the executable program, depending on your operating system. Note that a working installation of [OpenCV](https://opencv.org/) is required to run the program successfully. If you are a Windows user you might consider a tool like [MS Visual Studio](https://visualstudio.microsoft.com/vs/) to compile the project, while for Linux users, you will just need to type the commands `cmake .` and then `make` after moving into the project directory.\n", 194 | "\n", 195 | "At this point, to use the tool to label your custom images, place them in the `x64/Release/data/img` directory, change the number of classes in `x64/Release/data/obj.data` as well as the class names in `x64/Release/data/obj.names`, and run `x64/Release/yolo_mark.cmd` on Windows or `./linux_mark.sh` on Linux to start labeling.\n", 196 | "\n", 197 | "\n", 198 | "\n", 199 | "The resulting YOLO dataset in `x64/Release/data` will have the following structure:\n", 200 | "```\n", 201 | "data\n", 202 | " obj.data\n", 203 | " obj.names\n", 204 | " train.txt\n", 205 | " img\n", 206 | " image_filename1.\n", 207 | " image_filename1.txt\n", 208 | " image_filename2.\n", 209 | " image_filename2.txt\n", 210 | " image_filename3.\n", 211 | " image_filename3.txt\n", 212 | " ...\n", 213 | "``` \n", 214 | "with images and corresponding labels in the same folder, `obj.names` with the class names, and a `train.txt` file with the paths to the labeled images. The format of the TXT annotation files in the `img` folder is the same YOLO format as described before." 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": { 220 | "tags": [] 221 | }, 222 | "source": [ 223 | "## Conversion to KITTI format\n", 224 | "\n", 225 | "Regardless of whether Label Studio or Yolo Mark was used, or a dataset already labeled in YOLO format was provided, conversion to KITTI format is required to experiment with the NVIDIA® TAO Toolkit in the next notebook. The KITTI format not only allows you to unleash the power of transfer learning and pre-trained models available within the TAO Toolkit but also is used to perform offline data augmentation and dramatically increase the size of the dataset.\n", 226 | "\n", 227 | "The KITTI format organizes the data directories of images and corresponding labels into a structure similar to Label Studio, namely:\n", 228 | "```\n", 229 | "dataset_dir\n", 230 | " data\n", 231 | " image_filename1.\n", 232 | " image_filename2.\n", 233 | " image_filename3.\n", 234 | " ...\n", 235 | " labels\n", 236 | " image_filename1.txt\n", 237 | " image_filename2.txt\n", 238 | " image_filename3.txt\n", 239 | " ...\n", 240 | "``` \n", 241 | "The main difference is that in the KITTI format the labels TXT files are space-delimited files where each row corresponds to an object and **the bounding box is stored using 15 (and optional 16th confidence) columns**. The meaning of each of the 15 required columns is described [here](https://docs.nvidia.com/tao/tao-toolkit/text/data_annotation_format.html#label-files). In particular, the first item is the object label and from the fifth to the eighth position we have the bounding box coordinates expressed in pixels **[x-top-left, y-top-left, x-bottom-right, y-bottom-right]**. Note that this is different from the YOLO format since we now use corners to identify the box and it is not resizing invariant.\n", 242 | "\n", 243 | "\n", 244 | "\n", 245 | "To perform the conversion between dataset formats, we will use [FiftyOne](https://voxel51.com/docs/fiftyone/), an open-source Python tool for handling computer vision datasets. FiftyOne allows loading a YOLO dataset and exporting it as KITTI in a few lines of code.\n", 246 | "\n", 247 | "### Load the dataset\n", 248 | "\n", 249 | "The generic `Dataset.from_dir()` method (documentation available [here](https://voxel51.com/docs/fiftyone/api/fiftyone.core.dataset.html#fiftyone.core.dataset.Dataset.from_dir)) loads a dataset from disk and depending on the format, additional parameters can be passed to customize the data import. When dealing with a YOLO data format like in our case, these parameters are inherited from the [YOLOv4DatasetImporter](https://voxel51.com/docs/fiftyone/api/fiftyone.utils.yolo.html#fiftyone.utils.yolo.YOLOv4DatasetImporter) class and a customized import would require the following arguments:\n", 250 | "- `dataset_dir`: the dataset directory.\n", 251 | "- `dataset_type`: the `fiftyone.types.dataset_types.Dataset` type of the dataset.\n", 252 | "- `data_path`: to enable explicit control over the location of the media.\n", 253 | "- `labels_path`: to enable explicit control over the location of the labels.\n", 254 | "- `images_path`: to enable explicit control over the location of the image listing file.\n", 255 | "- `objects_path`: to enable explicit control over the location of the object names file.\n", 256 | "\n", 257 | "If your data stored on disk is not in YOLO format but in one of the [many common formats](https://voxel51.com/docs/fiftyone/user_guide/dataset_creation/datasets.html#supported-import-formats) supported natively by FiftyOne, then you can automatically load your data with minimal code changes in terms of additional parameters.\n", 258 | "\n", 259 | "To install the FiftyOne Python module, run `pip install fiftyone` in your preferred environment (ideally, a virtual one). In this lab, we have already installed it for you.\n", 260 | "\n", 261 | "Let's now load a YOLO dataset generated with Label Studio into FiftyOne. In this case, we have an object names file but we don't have an image listing file, so we just ignore the `images_path` argument and let FiftyOne list the data directory for us." 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [ 270 | "import fiftyone as fo\n", 271 | "\n", 272 | "dataset_dir = \"../data/label-studio/\"\n", 273 | "data_path = \"images/\"\n", 274 | "labels_path = \"labels/\"\n", 275 | "objects_path = \"classes.txt\"\n", 276 | "\n", 277 | "# Create the dataset\n", 278 | "dataset = fo.Dataset.from_dir(\n", 279 | " dataset_dir=dataset_dir,\n", 280 | " data_path=data_path,\n", 281 | " labels_path=labels_path,\n", 282 | " objects_path=objects_path,\n", 283 | " dataset_type=fo.types.YOLOv4Dataset\n", 284 | ")\n", 285 | "\n", 286 | "# View summary info about the dataset\n", 287 | "print(dataset)\n", 288 | "\n", 289 | "# Print the first few samples in the dataset\n", 290 | "print(dataset.head(2))" 291 | ] 292 | }, 293 | { 294 | "cell_type": "markdown", 295 | "metadata": {}, 296 | "source": [ 297 | "Instead, if we were trying to load a dataset generated with Yolo Mark into FiftyOne, saved into a folder named `yolo-mark` that isn't available for the lab, images and labels would now be in the same folder and we would have both an object names file and an image listing file. However, the `train.txt` image listing file contains paths from the executable file directory and not from the dataset home directory, so FiftyOne will not find the images unless we substitute all paths with relative paths in the form `img/image_filename.`. We can do that with some simple code that generates a new `images.txt` file with the right paths.\n", 298 | "```python\n", 299 | "# Read the file\n", 300 | "with open(\"../data/yolo-mark/train.txt\", \"r\") as file :\n", 301 | " filedata = file.read()\n", 302 | " \n", 303 | "# Replace the target string\n", 304 | "# On Linux\n", 305 | "filedata = filedata.replace(\"x64/Release/data/img/\", \"img/\")\n", 306 | "# On Windows\n", 307 | "#filedata = filedata.replace(\"data/img/\", \"img/\")\n", 308 | "\n", 309 | "# Write the file out again\n", 310 | "with open(\"../data/yolo-mark/images.txt\", \"w\") as file:\n", 311 | " file.write(filedata)\n", 312 | "``` \n", 313 | "\n", 314 | "Alternatively, we can again ignore the `images_path` argument and let FiftyOne list all the data directory for us." 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": null, 320 | "metadata": {}, 321 | "outputs": [], 322 | "source": [ 323 | "# If you use a dataset labeled with Yolo Mark, you will need a yolo-mark folder to run the code below to load it into FiftyOne\n", 324 | "\n", 325 | "# dataset_dir = \"../data/yolo-mark/\"\n", 326 | "# data_path = \"img/\"\n", 327 | "# images_path = \"images.txt\"\n", 328 | "# objects_path = \"obj.names\"\n", 329 | "\n", 330 | "# Create the dataset\n", 331 | "# dataset = fo.Dataset.from_dir(\n", 332 | "# dataset_dir=dataset_dir,\n", 333 | "# data_path=data_path,\n", 334 | "# images_path=images_path,\n", 335 | "# objects_path=objects_path,\n", 336 | "# dataset_type=fo.types.YOLOv4Dataset\n", 337 | "# )\n", 338 | "\n", 339 | "# View summary info about the dataset\n", 340 | "# print(dataset)\n", 341 | "\n", 342 | "# Print the first few samples in the dataset\n", 343 | "# print(dataset.head(2))" 344 | ] 345 | }, 346 | { 347 | "cell_type": "markdown", 348 | "metadata": {}, 349 | "source": [ 350 | "### Export to KITTI\n", 351 | "\n", 352 | "Once the dataset is loaded into FiftyOne, conversion to KITTI format is immediate with an export command. The `Dataset.export()` method (documentation available [here](https://voxel51.com/docs/fiftyone/api/fiftyone.core.dataset.html#fiftyone.core.dataset.Dataset.export)) writes the samples to disk and a customized export to KITTI format would require the following arguments:\n", 353 | "- `export_dir`: the dataset export directory.\n", 354 | "- `dataset_type`: the `fiftyone.types.dataset_types.Dataset` type of the dataset.\n", 355 | "- `data_path`: to enable explicit control over the location of the exported media.\n", 356 | "- `labels_path`: to enable explicit control over the location of the exported labels.\n", 357 | "\n", 358 | "Providing only `export_dir` and `dataset_type` would result in an export of the content to a directory following the default layout for the specified format." 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": null, 364 | "metadata": {}, 365 | "outputs": [], 366 | "source": [ 367 | "export_dir = \"../data/training/\"\n", 368 | "data_path = \"image_2/\"\n", 369 | "labels_path = \"label_2/\"\n", 370 | "\n", 371 | "# Export the dataset\n", 372 | "dataset.export(\n", 373 | " export_dir=export_dir,\n", 374 | " data_path=data_path,\n", 375 | " labels_path=labels_path,\n", 376 | " dataset_type=fo.types.KITTIDetectionDataset\n", 377 | ")" 378 | ] 379 | }, 380 | { 381 | "cell_type": "markdown", 382 | "metadata": {}, 383 | "source": [ 384 | "We can now view some images of our dataset before moving on to the next notebook." 385 | ] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "execution_count": null, 390 | "metadata": {}, 391 | "outputs": [], 392 | "source": [ 393 | "# Simple grid visualizer\n", 394 | "import matplotlib.pyplot as plt\n", 395 | "import os\n", 396 | "from math import ceil\n", 397 | "valid_image_ext = ['.jpg', '.png', '.jpeg', '.ppm']\n", 398 | "\n", 399 | "def visualize_images(img_path, num_cols=4, num_images=10):\n", 400 | " num_rows = int(ceil(float(num_images) / float(num_cols)))\n", 401 | " f, axarr = plt.subplots(num_rows, num_cols, figsize=[80,30])\n", 402 | " f.tight_layout()\n", 403 | " a = [os.path.join(img_path, image) for image in os.listdir(img_path) \n", 404 | " if os.path.splitext(image)[1].lower() in valid_image_ext]\n", 405 | " for idx, img_path in enumerate(a[:num_images]):\n", 406 | " col_id = idx % num_cols\n", 407 | " row_id = idx // num_cols\n", 408 | " img = plt.imread(img_path)\n", 409 | " axarr[row_id, col_id].imshow(img) " 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "execution_count": null, 415 | "metadata": {}, 416 | "outputs": [], 417 | "source": [ 418 | "# Visualizing the sample images\n", 419 | "IMG_PATH = '../data/training/image_2'\n", 420 | "COLS = 3 # number of columns in the visualizer grid\n", 421 | "IMAGES = 9 # number of images to visualize\n", 422 | "\n", 423 | "visualize_images(IMG_PATH, num_cols=COLS, num_images=IMAGES)" 424 | ] 425 | }, 426 | { 427 | "cell_type": "markdown", 428 | "metadata": {}, 429 | "source": [ 430 | "In this notebook, we have seen how to label a raw dataset and export it into KITTI format. Next, we will train an object detection model using the TAO Toolkit. Please go to the next notebook by clicking on the `Next Notebook` button below." 431 | ] 432 | }, 433 | { 434 | "cell_type": "markdown", 435 | "metadata": {}, 436 | "source": [ 437 | "***\n", 438 | "\n", 439 | "## Licensing\n", 440 | "\n", 441 | "Copyright © 2022 OpenACC-Standard.org. This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0). These materials include references to hardware and software developed by other entities; all applicable licensing and copyrights apply." 442 | ] 443 | }, 444 | { 445 | "cell_type": "markdown", 446 | "metadata": {}, 447 | "source": [ 448 | "
\n", 449 | "
\n", 450 | " \n", 451 | " 1\n", 452 | " 2\n", 453 | " 3\n", 454 | " 4\n", 455 | " 5\n", 456 | " 6\n", 457 | " 7\n", 458 | " \n", 459 | " Next Notebook\n", 460 | "
\n", 461 | "\n", 462 | "
\n", 463 | "

Home Page

" 464 | ] 465 | } 466 | ], 467 | "metadata": { 468 | "kernelspec": { 469 | "display_name": "Python 3", 470 | "language": "python", 471 | "name": "python3" 472 | }, 473 | "language_info": { 474 | "codemirror_mode": { 475 | "name": "ipython", 476 | "version": 3 477 | }, 478 | "file_extension": ".py", 479 | "mimetype": "text/x-python", 480 | "name": "python", 481 | "nbconvert_exporter": "python", 482 | "pygments_lexer": "ipython3", 483 | "version": "3.8.8" 484 | }, 485 | "toc-autonumbering": false, 486 | "toc-showcode": false, 487 | "toc-showmarkdowntxt": false 488 | }, 489 | "nbformat": 4, 490 | "nbformat_minor": 4 491 | } 492 | -------------------------------------------------------------------------------- /workspace/jupyter_notebook/6.Challenge_DeepStream.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "

Home Page

\n", 8 | "\n", 9 | "
\n", 10 | " Previous Notebook\n", 11 | " \n", 12 | " 1\n", 13 | " 2\n", 14 | " 3\n", 15 | " 4\n", 16 | " 5\n", 17 | " 6\n", 18 | " 7\n", 19 | " \n", 20 | " Next Notebook\n", 21 | "
" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "# Exercise: model deployment with DeepStream\n", 29 | "\n", 30 | "***" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "In this notebook, you will review the concepts learned in [4.Model_deployment_with_DeepStream.ipynb](4.Model_deployment_with_DeepStream.ipynb) while trying to deploy your NVIDIA® TAO Toolkit model to DeepStream SDK using Python bindings.\n", 38 | "\n", 39 | "As an exercise, you are asked to re-implement the same 6-class object detection pipeline with a tracker that has been analyzed in the tutorial notebook. Here are the illustrations of the pipeline: remember that the secondary classifiers (highlighted in gray) are not to be implemented.\n", 40 | "\n", 41 | "\n", 42 | "\n", 43 | "Let us get started with the notebook. You will have to fill in the `COMPLETE THIS SECTION` parts of the code present in the notebook to complete the pipeline. Feel free to refer to the previous notebooks for the commands but make sure to grasp the most important underlying concepts.\n", 44 | "\n", 45 | "## Building the pipeline" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "# Import required libraries\n", 55 | "import sys\n", 56 | "sys.path.append(\"../source_code/N4\")\n", 57 | "from bus_call import bus_call\n", 58 | "import gi\n", 59 | "gi.require_version('Gst', '1.0')\n", 60 | "from gi.repository import GLib, Gst\n", 61 | "import configparser\n", 62 | "import pyds\n", 63 | "import time\n", 64 | "\n", 65 | "############# ~~~~~~~ COMPLETE THIS SECTION ~~~~~~~ #############\n", 66 | "# Define class labels\n", 67 | "PGIE_CLASS_ID_FRESHAPPLE = 0\n", 68 | "#\n", 69 | "#\n", 70 | "#\n", 71 | "#\n", 72 | "#\n", 73 | "###################### ~~~~~~~ END ~~~~~~~ ######################\n", 74 | "\n", 75 | "# Define input/output video files\n", 76 | "INPUT_VIDEO_NAME = \"../source_code/N4/apples.h264\" # Source: https://depositphotos.com\n", 77 | "OUTPUT_VIDEO_NAME = \"../source_code/challenge_deepstream/ds_out.mp4\"\n", 78 | "\n", 79 | "import os\n", 80 | "if not os.path.exists(\"../source_code/challenge_deepstream\"):\n", 81 | " !mkdir ../source_code/challenge_deepstream" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "First, we define a function `make_elm_or_print_err()` to create our elements and report any errors if the creation fails. Elements are created using the `Gst.ElementFactory.make()` function as part of Gstreamer library." 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "# Make element or print error and any other detail\n", 98 | "def make_elm_or_print_err(factoryname, name, printedname, detail=\"\"):\n", 99 | " print(\"Creating\", printedname)\n", 100 | " elm = Gst.ElementFactory.make(factoryname, name)\n", 101 | " if not elm:\n", 102 | " sys.stderr.write(\"Unable to create \" + printedname + \" \\n\")\n", 103 | " if detail:\n", 104 | " sys.stderr.write(detail)\n", 105 | " return elm" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "Next, we initialize GStreamer and create an empty pipeline." 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "# Standard GStreamer initialization\n", 122 | "Gst.init(None)\n", 123 | "\n", 124 | "# Create gstreamer elements\n", 125 | "# Create Pipeline element that will form a connection of other elements\n", 126 | "print(\"Creating Pipeline \\n\")\n", 127 | "pipeline = Gst.Pipeline()\n", 128 | "\n", 129 | "if not pipeline:\n", 130 | " sys.stderr.write(\" Unable to create Pipeline \\n\")" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "Then, we create the elements that are required for our pipeline." 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "########### Create elements required for the Pipeline ###########\n", 147 | "# Source element for reading from the file\n", 148 | "source = make_elm_or_print_err(\"filesrc\", \"file-source\", \"Source\")\n", 149 | "# Since the data format in the input file is elementary h264 stream, we need a h264parser\n", 150 | "h264parser = make_elm_or_print_err(\"h264parse\", \"h264-parser\", \"h264 parse\")\n", 151 | "# Use nvdec_h264 for hardware accelerated decode on GPU\n", 152 | "decoder = make_elm_or_print_err(\"nvv4l2decoder\", \"nvv4l2-decoder\", \"Nvv4l2 Decoder\")\n", 153 | "# Create nvstreammux instance to form batches from one or more sources\n", 154 | "streammux = make_elm_or_print_err(\"nvstreammux\", \"Stream-muxer\", \"NvStreamMux\")\n", 155 | "# Use nvinfer to run inferencing on decoder's output, behavior of inferencing is set through config file\n", 156 | "pgie = make_elm_or_print_err(\"nvinfer\", \"primary-inference\", \"pgie\")\n", 157 | "# Use nvtracker to give objects unique-ids\n", 158 | "tracker = make_elm_or_print_err(\"nvtracker\", \"tracker\", \"tracker\")\n", 159 | "# Use convertor to convert from NV12 to RGBA as required by nvosd\n", 160 | "nvvidconv = make_elm_or_print_err(\"nvvideoconvert\", \"convertor\", \"nvvidconv\")\n", 161 | "# Create OSD to draw on the converted RGBA buffer\n", 162 | "nvosd = make_elm_or_print_err(\"nvdsosd\", \"onscreendisplay\", \"nvosd\")\n", 163 | "# Finally encode and save the osd output\n", 164 | "queue = make_elm_or_print_err(\"queue\", \"queue\", \"Queue\")\n", 165 | "# Use convertor to convert from NV12 to RGBA as required by nvosd\n", 166 | "nvvidconv2 = make_elm_or_print_err(\"nvvideoconvert\", \"convertor2\", \"nvvidconv2\")\n", 167 | "# Place an encoder instead of OSD to save as video file\n", 168 | "encoder = make_elm_or_print_err(\"avenc_mpeg4\", \"encoder\", \"Encoder\")\n", 169 | "# Parse output from Encoder\n", 170 | "codeparser = make_elm_or_print_err(\"mpeg4videoparse\", \"mpeg4-parser\", \"Code Parser\")\n", 171 | "# Create a container\n", 172 | "container = make_elm_or_print_err(\"qtmux\", \"qtmux\", \"Container\")\n", 173 | "# Create Sink for storing the output\n", 174 | "sink = make_elm_or_print_err(\"filesink\", \"filesink\", \"Sink\")" 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "metadata": {}, 180 | "source": [ 181 | "Now that we have created the elements, we can proceed to set various properties for our pipeline.\n", 182 | "\n", 183 | "## Understanding the configuration files\n", 184 | "\n", 185 | "We'll resuse the `pgie` configuration file that was examined in the previous notebook. If you haven't already set your API key in the configuration file [here](../source_code/N4/pgie_yolov4_tao_config.txt) in the field `tlt-model-key`, please go ahead and do so, then save the file with `ctrl s`. Not setting the key makes it impossible to decrypt the model and successfully run this notebook." 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "print(\"Please replace the tlt-model-key variable with your key.\")" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [ 203 | "!cat ../source_code/N4/pgie_yolov4_tao_config.txt" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "We can also visualize the configuration file for our nvtracker (tracking plugin) named `dstest2_tracker_config.txt`. The configuration file is parsed and properties are then set for the tracker." 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "!cat ../source_code/N4/dstest2_tracker_config.txt" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": {}, 225 | "source": [ 226 | "In the next cell, we set the properties for the elements of our pipeline, including but not limited to the contents of the two configuration files." 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": null, 232 | "metadata": {}, 233 | "outputs": [], 234 | "source": [ 235 | "############ Set properties for the Elements ############\n", 236 | "print(\"Playing file \", INPUT_VIDEO_NAME)\n", 237 | "# Set Input File Name \n", 238 | "source.set_property(\"location\", INPUT_VIDEO_NAME)\n", 239 | "# Set Input Width, Height and Batch Size \n", 240 | "streammux.set_property(\"width\", 1920)\n", 241 | "streammux.set_property(\"height\", 1080)\n", 242 | "streammux.set_property(\"batch-size\", 1)\n", 243 | "# Timeout in microseconds to wait after the first buffer is available \n", 244 | "# to push the batch even if a complete batch is not formed.\n", 245 | "streammux.set_property(\"batched-push-timeout\", 4000000)\n", 246 | "# Set Congifuration file for nvinfer \n", 247 | "pgie.set_property(\"config-file-path\", \"../source_code/N4/pgie_yolov4_tao_config.txt\")\n", 248 | "#Set properties of tracker from tracker_config\n", 249 | "config = configparser.ConfigParser()\n", 250 | "config.read(\"../source_code/N4/dstest2_tracker_config.txt\")\n", 251 | "config.sections()\n", 252 | "for key in config['tracker']:\n", 253 | " if key == 'tracker-width' :\n", 254 | " tracker_width = config.getint('tracker', key)\n", 255 | " tracker.set_property('tracker-width', tracker_width)\n", 256 | " if key == 'tracker-height' :\n", 257 | " tracker_height = config.getint('tracker', key)\n", 258 | " tracker.set_property('tracker-height', tracker_height)\n", 259 | " if key == 'gpu-id' :\n", 260 | " tracker_gpu_id = config.getint('tracker', key)\n", 261 | " tracker.set_property('gpu_id', tracker_gpu_id)\n", 262 | " if key == 'll-lib-file' :\n", 263 | " tracker_ll_lib_file = config.get('tracker', key)\n", 264 | " tracker.set_property('ll-lib-file', tracker_ll_lib_file)\n", 265 | " if key == 'll-config-file' :\n", 266 | " tracker_ll_config_file = config.get('tracker', key)\n", 267 | " tracker.set_property('ll-config-file', tracker_ll_config_file)\n", 268 | " if key == 'enable-batch-process' :\n", 269 | " tracker_enable_batch_process = config.getint('tracker', key)\n", 270 | " tracker.set_property('enable_batch_process', tracker_enable_batch_process)\n", 271 | "# Set Encoder bitrate for output video\n", 272 | "encoder.set_property(\"bitrate\", 2000000)\n", 273 | "# Set Output file name and disable sync and async\n", 274 | "sink.set_property(\"location\", OUTPUT_VIDEO_NAME)\n", 275 | "sink.set_property(\"sync\", 0)\n", 276 | "sink.set_property(\"async\", 0)" 277 | ] 278 | }, 279 | { 280 | "cell_type": "markdown", 281 | "metadata": {}, 282 | "source": [ 283 | "We now link all the elements in the order we prefer and create Gstreamer bus to feed all messages through it." 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": null, 289 | "metadata": {}, 290 | "outputs": [], 291 | "source": [ 292 | "########## Add and Link Elements in the Pipeline ##########\n", 293 | "\n", 294 | "print(\"Adding elements to Pipeline \\n\")\n", 295 | "\n", 296 | "############# ~~~~~~~ COMPLETE THIS SECTION ~~~~~~~ #############\n", 297 | "# Adding elements to the pipeline\n", 298 | "pipeline.add(source)\n", 299 | "pipeline.add(h264parser)\n", 300 | "#\n", 301 | "#\n", 302 | "#\n", 303 | "#\n", 304 | "#\n", 305 | "#\n", 306 | "#\n", 307 | "#\n", 308 | "#\n", 309 | "#\n", 310 | "#\n", 311 | "#\n", 312 | "###################### ~~~~~~~ END ~~~~~~~ ######################\n", 313 | "\n", 314 | "# We now link the elements together \n", 315 | "# file-source -> h264-parser -> nvh264-decoder -> nvinfer -> nvvidconv ->\n", 316 | "# queue -> nvvidconv2 -> encoder -> parser -> container -> sink -> output-file\n", 317 | "print(\"Linking elements in the Pipeline \\n\")\n", 318 | "source.link(h264parser)\n", 319 | "h264parser.link(decoder)\n", 320 | "\n", 321 | "##### Creating Sink pad and source pads and linking them together \n", 322 | "\n", 323 | "# Create Sinkpad to Streammux \n", 324 | "sinkpad = streammux.get_request_pad(\"sink_0\")\n", 325 | "if not sinkpad:\n", 326 | " sys.stderr.write(\" Unable to get the sink pad of streammux \\n\")\n", 327 | "# Create source pad from Decoder \n", 328 | "srcpad = decoder.get_static_pad(\"src\")\n", 329 | "if not srcpad:\n", 330 | " sys.stderr.write(\" Unable to get source pad of decoder \\n\")\n", 331 | "\n", 332 | "############# ~~~~~~~ COMPLETE THIS SECTION ~~~~~~~ #############\n", 333 | "# Link the elements\n", 334 | "srcpad.link(sinkpad)\n", 335 | "streammux.link(pgie)\n", 336 | "#\n", 337 | "#\n", 338 | "#\n", 339 | "#\n", 340 | "#\n", 341 | "#\n", 342 | "#\n", 343 | "#\n", 344 | "#\n", 345 | "###################### ~~~~~~~ END ~~~~~~~ ######################" 346 | ] 347 | }, 348 | { 349 | "cell_type": "markdown", 350 | "metadata": {}, 351 | "source": [ 352 | "Now we create an event loop and feed GStreamer bus messages to it." 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": null, 358 | "metadata": {}, 359 | "outputs": [], 360 | "source": [ 361 | "loop = GLib.MainLoop()\n", 362 | "bus = pipeline.get_bus()\n", 363 | "bus.add_signal_watch()\n", 364 | "bus.connect (\"message\", bus_call, loop)" 365 | ] 366 | }, 367 | { 368 | "cell_type": "markdown", 369 | "metadata": {}, 370 | "source": [ 371 | "## Working with the metadata \n", 372 | "\n", 373 | "Our pipeline now carries the metadata forward but does nothing with it up to this moment. As mentioned in the above pipeline diagram, we will now create a callback function to display relevant data on the frame once it is called and create a sink pad in the `nvosd` element to call the function." 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": null, 379 | "metadata": {}, 380 | "outputs": [], 381 | "source": [ 382 | "############## Working with the Metadata ################\n", 383 | "\n", 384 | "def osd_sink_pad_buffer_probe(pad, info, u_data):\n", 385 | " \n", 386 | " ############# ~~~~~~~ COMPLETE THIS SECTION ~~~~~~~ #############\n", 387 | " # Intiallizing object counter with 0\n", 388 | " obj_counter = {\n", 389 | " PGIE_CLASS_ID_FRESHAPPLE:0,\n", 390 | " #\n", 391 | " #\n", 392 | " #\n", 393 | " #\n", 394 | " #\n", 395 | " }\n", 396 | " ###################### ~~~~~~~ END ~~~~~~~ ######################\n", 397 | " \n", 398 | " # Colors of the bounding boxes in RGBA\n", 399 | " obj_colors = {\n", 400 | " PGIE_CLASS_ID_FRESHAPPLE:(1.0, 0.0, 0.0, 0.0),\n", 401 | " PGIE_CLASS_ID_FRESHBANANA:(0.0, 1.0, 0.0, 0.0),\n", 402 | " PGIE_CLASS_ID_FRESHORANGE:(0.0, 0.0, 1.0, 0.0),\n", 403 | " PGIE_CLASS_ID_ROTTENAPPLE:(0.0, 1.0, 1.0, 0.0),\n", 404 | " PGIE_CLASS_ID_ROTTENBANANA:(1.0, 0.0, 1.0, 0.0),\n", 405 | " PGIE_CLASS_ID_ROTTENORANGE:(1.0, 1.0, 0.0, 0.0)\n", 406 | " }\n", 407 | " # Set frame_number & rectangles to draw as 0 \n", 408 | " frame_number=0\n", 409 | " num_rects=0\n", 410 | " \n", 411 | " gst_buffer = info.get_buffer()\n", 412 | " if not gst_buffer:\n", 413 | " print(\"Unable to get GstBuffer \")\n", 414 | " return\n", 415 | "\n", 416 | " # Retrieve batch metadata from the gst_buffer\n", 417 | " # Note that pyds.gst_buffer_get_nvds_batch_meta() expects the\n", 418 | " # C address of gst_buffer as input, which is obtained with hash(gst_buffer)\n", 419 | " batch_meta = pyds.gst_buffer_get_nvds_batch_meta(hash(gst_buffer))\n", 420 | " l_frame = batch_meta.frame_meta_list\n", 421 | " \n", 422 | " while l_frame is not None:\n", 423 | " try:\n", 424 | " # Note that l_frame.data needs a cast to pyds.NvDsFrameMeta\n", 425 | " frame_meta = pyds.NvDsFrameMeta.cast(l_frame.data)\n", 426 | " except StopIteration:\n", 427 | " break\n", 428 | " \n", 429 | " # Get frame number, number of rectangles to draw and object metadata\n", 430 | " frame_number=frame_meta.frame_num\n", 431 | " num_rects = frame_meta.num_obj_meta\n", 432 | " l_obj=frame_meta.obj_meta_list\n", 433 | " \n", 434 | " while l_obj is not None:\n", 435 | " try:\n", 436 | " # Casting l_obj.data to pyds.NvDsObjectMeta\n", 437 | " obj_meta=pyds.NvDsObjectMeta.cast(l_obj.data)\n", 438 | " except StopIteration:\n", 439 | " break\n", 440 | " # Increment object class by 1 and set box border color \n", 441 | " obj_counter[obj_meta.class_id] += 1\n", 442 | " r, g, b, a = obj_colors[obj_meta.class_id]\n", 443 | " obj_meta.rect_params.border_color.set(r, g, b, a)\n", 444 | " try: \n", 445 | " l_obj=l_obj.next\n", 446 | " except StopIteration:\n", 447 | " break\n", 448 | " ################## Setting Metadata Display configruation ############### \n", 449 | " # Acquiring a display meta object\n", 450 | " display_meta=pyds.nvds_acquire_display_meta_from_pool(batch_meta)\n", 451 | " display_meta.num_labels = 1\n", 452 | " py_nvosd_text_params = display_meta.text_params[0]\n", 453 | " # Setting display text to be shown on screen\n", 454 | " py_nvosd_text_params.display_text = \"Frame Number={} Number of Objects={} Freshapple_count={} Freshbanana_count={} \" \\\n", 455 | " \"Freshorange_count={} Rottenapple_count={} Rottenbanana_count={} Rottenorange_count={}\".format(frame_number, num_rects, \n", 456 | " obj_counter[PGIE_CLASS_ID_FRESHAPPLE], obj_counter[PGIE_CLASS_ID_FRESHBANANA], obj_counter[PGIE_CLASS_ID_FRESHORANGE], \n", 457 | " obj_counter[PGIE_CLASS_ID_ROTTENAPPLE], obj_counter[PGIE_CLASS_ID_ROTTENBANANA], obj_counter[PGIE_CLASS_ID_ROTTENORANGE])\n", 458 | " \n", 459 | " # Now set the offsets where the string should appear\n", 460 | " py_nvosd_text_params.x_offset = 10\n", 461 | " py_nvosd_text_params.y_offset = 12\n", 462 | " # Font, font-color and font-size\n", 463 | " py_nvosd_text_params.font_params.font_name = \"Serif\"\n", 464 | " py_nvosd_text_params.font_params.font_size = 14\n", 465 | " # Set(red, green, blue, alpha); Set to White\n", 466 | " py_nvosd_text_params.font_params.font_color.set(1.0, 1.0, 1.0, 1.0)\n", 467 | " # Text background color\n", 468 | " py_nvosd_text_params.set_bg_clr = 1\n", 469 | " # Set(red, green, blue, alpha); set to Black\n", 470 | " py_nvosd_text_params.text_bg_clr.set(0.0, 0.0, 0.0, 1.0)\n", 471 | " # Using pyds.get_string() to get display_text as string to print in notebook\n", 472 | " print(pyds.get_string(py_nvosd_text_params.display_text))\n", 473 | " pyds.nvds_add_display_meta_to_frame(frame_meta, display_meta)\n", 474 | " \n", 475 | " ############################################################################\n", 476 | " \n", 477 | " try:\n", 478 | " l_frame=l_frame.next\n", 479 | " except StopIteration:\n", 480 | " break\n", 481 | " return Gst.PadProbeReturn.OK" 482 | ] 483 | }, 484 | { 485 | "cell_type": "markdown", 486 | "metadata": {}, 487 | "source": [ 488 | "Here we add the probe to get informed of the meta data generated. We add probe to the sink pad of the osd element, since by that time, the buffer would have got all the metadata." 489 | ] 490 | }, 491 | { 492 | "cell_type": "code", 493 | "execution_count": null, 494 | "metadata": {}, 495 | "outputs": [], 496 | "source": [ 497 | "osdsinkpad = nvosd.get_static_pad(\"sink\")\n", 498 | "if not osdsinkpad:\n", 499 | " sys.stderr.write(\" Unable to get sink pad of nvosd \\n\")\n", 500 | " \n", 501 | "osdsinkpad.add_probe(Gst.PadProbeType.BUFFER, osd_sink_pad_buffer_probe, 0)" 502 | ] 503 | }, 504 | { 505 | "cell_type": "markdown", 506 | "metadata": {}, 507 | "source": [ 508 | "## Run the pipeline\n", 509 | "\n", 510 | "Now with everything defined, we can start the playback and listen to the events." 511 | ] 512 | }, 513 | { 514 | "cell_type": "code", 515 | "execution_count": null, 516 | "metadata": {}, 517 | "outputs": [], 518 | "source": [ 519 | "# start play back and listen to events\n", 520 | "print(\"Starting pipeline \\n\")\n", 521 | "start_time = time.time()\n", 522 | "pipeline.set_state(Gst.State.PLAYING)\n", 523 | "try:\n", 524 | " loop.run()\n", 525 | "except:\n", 526 | " pass\n", 527 | "# cleanup\n", 528 | "pipeline.set_state(Gst.State.NULL)\n", 529 | "print(\"--- %s seconds ---\" % (time.time() - start_time))" 530 | ] 531 | }, 532 | { 533 | "cell_type": "markdown", 534 | "metadata": {}, 535 | "source": [ 536 | "With the next cell, we convert the video profile to be compatible with Jupyter notebook." 537 | ] 538 | }, 539 | { 540 | "cell_type": "code", 541 | "execution_count": null, 542 | "metadata": {}, 543 | "outputs": [], 544 | "source": [ 545 | "!ffmpeg -loglevel panic -y -an -i ../source_code/challenge_deepstream/ds_out.mp4 -vcodec libx264 -pix_fmt yuv420p -profile:v baseline -level 3 ../source_code/challenge_deepstream/output.mp4" 546 | ] 547 | }, 548 | { 549 | "cell_type": "code", 550 | "execution_count": null, 551 | "metadata": {}, 552 | "outputs": [], 553 | "source": [ 554 | "# Display the output\n", 555 | "from IPython.display import HTML\n", 556 | "HTML(\"\"\"\n", 557 | "