├── CONTRIBUTING.md
├── Deployment_Guide.md
├── Dockerfile_deepstream
├── LICENSE
├── README.md
├── Singularity_deepstream
├── Singularity_tao
├── Singularity_triton
└── workspace
    ├── Start_here.ipynb
    ├── jupyter_notebook
        ├── 1.Data_labeling_and_preprocessing.ipynb
        ├── 2.Object_detection_using_TAO_YOLOv4.ipynb
        ├── 3.Model_deployment_with_Triton_Inference_Server.ipynb
        ├── 4.Model_deployment_with_DeepStream.ipynb
        ├── 5.Measure_object_size_using_OpenCV.ipynb
        ├── 6.Challenge_DeepStream.ipynb
        ├── 7.Challenge_Triton.ipynb
        └── images
        │   ├── architecture.jpg
        │   ├── augmenting.png
        │   ├── deepstream_overview.jpg
        │   ├── deepstream_python_bindings.png
        │   ├── ds_overview.png
        │   ├── ds_workflow.png
        │   ├── dstream_deploy_options.png
        │   ├── git_branching.jpg
        │   ├── label_studio_1.png
        │   ├── label_studio_10.png
        │   ├── label_studio_11.png
        │   ├── label_studio_2.png
        │   ├── label_studio_3.png
        │   ├── label_studio_4.png
        │   ├── label_studio_5.png
        │   ├── label_studio_6.png
        │   ├── label_studio_7.png
        │   ├── label_studio_8.png
        │   ├── label_studio_9.png
        │   ├── ngc_key.png
        │   ├── ngc_setup_key.png
        │   ├── nvinfer.png
        │   ├── nvosd.png
        │   ├── nvtracker.png
        │   ├── nvvidconv.png
        │   ├── pads.png
        │   ├── pipeline.png
        │   ├── prep_pipeline.png
        │   ├── pruned_vs_unpruned.png
        │   ├── res_mario.jpg
        │   ├── tao_cv_qat_workflow.png
        │   ├── tao_deepstream.jpeg
        │   ├── tao_tf_user_interaction.png
        │   ├── tao_toolkit.jpeg
        │   ├── test2.png
        │   ├── triton_inference_server.jpg
        │   ├── yolo_kitti.png
        │   ├── yolo_label.png
        │   └── yolo_mark.png
    ├── source_code
        ├── N2
        │   └── generate_val_dataset.py
        ├── N3
        │   ├── frame.py
        │   ├── postprocessor.py
        │   ├── preprocess_input.py
        │   ├── triton_model.py
        │   ├── user_data.py
        │   ├── utils.py
        │   ├── yolov4_model.py
        │   └── yolov4_postprocessor.py
        ├── N4
        │   ├── bus_call.py
        │   ├── config_tracker_NvDCF_perf.yml
        │   ├── dstest2_tracker_config.txt
        │   └── pgie_yolov4_tao_config.txt
        ├── N5
        │   └── calc_object_size.py
        └── dataset.py
    └── specs
        ├── default_spec.txt
        ├── yolo_v4_retrain_resnet18_kitti.txt
        ├── yolo_v4_retrain_resnet18_kitti_seq.txt
        ├── yolo_v4_tfrecords_kitti_train.txt
        ├── yolo_v4_tfrecords_kitti_val.txt
        ├── yolo_v4_train_resnet18_kitti.txt
        └── yolo_v4_train_resnet18_kitti_seq.txt


/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | Contributing
 2 | ------------
 3 | 
 4 | Please use the following guidelines when contributing to this project. 
 5 | 
 6 | Before contributing significant changes, please begin a discussion of the desired changes via a GitHub Issue to prevent doing unnecessary or overlapping work.
 7 | 
 8 | ## License
 9 | 
10 | The preferred license for source code contributed to this project is the Apache License 2.0 (https://www.apache.org/licenses/LICENSE-2.0) and for documentation, including Jupyter notebooks and text documentation, is the Creative Commons Attribution 4.0 International (CC BY 4.0) (https://creativecommons.org/licenses/by/4.0/). Contributions under other, compatible licenses will be considered on a case-by-case basis.
11 | 
12 | ## Styling
13 | 
14 | Please use the following style guidelines when making contributions.
15 | 
16 | ### Source Code
17 | * Two-space indentation, no tabs
18 | * To the extent possible, variable names should be descriptive
19 | * Code should be documentation with detail like what function does and returns making the code readable. The code should also have proper license at the beginning of the file.
20 | * The following file extensions should be used appropriately:
21 | 	* Python = .py
22 | 
23 | ### Jupyter Notebooks & Markdown
24 | * When they appear inline with the text; directive names, clauses, function or subroutine names, variable names, file names, commands and command-line arguments should appear between two backticks.
25 | * Code blocks should begin with three backticks to enable appropriate source formatting and end with three backticks.
26 | * Leave an empty line before and after the codeblock.
27 | Emphasis, including quotes made for emphasis and introduction of new terms should be highlighted between a single pair of asterisks
28 | * A level 1 heading should appear at the top of the notebook as the title of the notebook.
29 | * A horizontal rule should appear between sections that begin with a level 2 heading.
30 | 
31 | 
32 | ## Contributing Labs/Modules
33 | #### DeepStream Triton Inference Server Integration 
34 | * In the existing end-to-end CV repo, only models build from TAO or optimized by TRT can be deployed on DeepStream (streaming video). DeepStream Triton Inference Server Integration enables the use of trained model from desired framework, such as TensorFlow, TensorRT, PyTorch, or ONNX-Runtime, and directly run inferences on streaming video.
35 | 	* Task 1:  Extend end-to-end CV repo with DeepStream Triton Inference Server Integration
36 | 	* Task 2:  Upgrade end-to-end CV repo to TAO Toolkit 4.0.1 and Add AutoML section. AutoML is a TAO Toolkit API service that automatically selects     		           deep learning hyperparameters for a chosen model and dataset.
37 | #### Body Pose Estimation
38 | * The use for Body pose estimation in CV domain include
39 | 	* Tracking customer who picked or dropped products in a retail store[real-time inventory]
40 | 	* Track the safety of factory personnel
41 |  	* E-health monitoring system
42 | * Task: Create an end-to-end pose body estimation material (dataset prep., TAO Train, and DeepStream deployment) 
43 | 
44 | 
45 | ### Directory stucture for Github
46 | 
47 | Before starting to work on new lab it is important to follow the recommended git structure as shown below to avoid reformatting.
48 | 
49 | Each lab will have following files/directories consisting of training material for the lab.
50 | * jupyter_notebook folder: Consists of jupyter notebooks and its corresponding images.  
51 | * source_code folder: Source codes are stored in a separate directory because sometime not all clusters may support jupyter notebooks. During such bootcamps, we should be able to use the source codes directly from this directory. 
52 | * presentations: Consists of presentations for the labs ( pdf format is preferred )
53 | * Dockerfile and Singularity: Each lab should have both Docker and Singularity recipes.
54 |  
55 | The lab optionally may also add custom license in case of any deviation from the top level directory license ( Apache 2.0 ). 
56 | 
57 | 
58 | ### Git Branching
59 | 
60 | Adding a new feature/lab will follow a forking workflow. Which means a feature branch development will happen on a forked repo which later gets merged into our original project (GPUHackathons.org) repository.
61 | 
62 | ![Git Branching Workflow](workspace/jupyter_notebook/images/git_branching.jpg)
63 | 
64 | The 5 main steps depicted in image above are as follows:
65 | 1. Fork: To create a new lab/feature the GPUHackathons.org repository must be forked. Fork will create a snapshot of GPUHackathons.org repository at the time it was forked. Any new feature/lab that will be developed should be based on the develop branch of the repository.
66 | 2.  Clone: Developer can than clone this new repository to local machine
67 | Create Feature Branch: Create a new branch with a feature name in which your changes will be done. Recommend naming convention of feature branch is naming convention for branch: ende2end-cv-<feature_name>. The new changes that developer makes can be added, committed and pushed
68 | 3. Push: After the changes are committed, the developer pushes the changes to the remote branch. Push command helps the local changes to github repository
69 | 4. Pull: Submit a pull request. Upon receiving pull request a Hackathon team reviewer/owner will review the changes and upon accepting it can be merged into the develop branch of GpuHacakthons.org
70 | 
71 | Git Branch details are as follows:
72 | 
73 | * master branch: Consists of the stable branch. 
74 | 	* origin/master to be the main branch where the source code of HEAD always reflects a production-ready state
75 | 	* Merge request is possible through:  develop branch
76 | * develop branch: branched from master branch
77 | 	* Must branch from: master branch
78 | 	* Must merge back into: master branch
79 | 	* It is the main development branch where the source code of HEAD always reflects a state with the latest delivered development changes for the next release.
80 | 	* When the source code in the develop branch reaches a stable point and is ready to be released, all of the changes should be merged back into master somehow and then tagged with a release number
81 | 	* All feature development should happen by forking GPUHackathons.org and branching from develop branch only.
82 | 


--------------------------------------------------------------------------------
/Deployment_Guide.md:
--------------------------------------------------------------------------------
  1 | # End-to-End Computer Vision Bootcamp
  2 | 
  3 | The **End-to-End Computer Vision Bootcamp** is designed from a real-world perspective and follows the data processing, development, and deployment pipeline paradigm using a variety of tools. Through hands-on exercises, attendees will learn the fundamentals of preprocessing custom images, speeding the development process using transfer learning for model training, and deployment of trained models for fast and scalable AI in production.
  4 | 
  5 | 
  6 | ## Deploying the Labs
  7 | 
  8 | ### Prerequisites
  9 | 
 10 | To run this tutorial you will need a Laptop/Workstation/DGX machine with NVIDIA GPU.
 11 | 
 12 | - Install the latest [Docker](https://docs.docker.com/engine/install/) or [Singularity](https://sylabs.io/docs/).
 13 |     - Once you have installed **docker**, follow the [post-installation steps](https://docs.docker.com/engine/install/linux-postinstall/) to ensure that docker can be run without `sudo`.
 14 | 
 15 | - Get an NGC account and API key:
 16 | 
 17 |     - Go to the [NGC](https://ngc.nvidia.com/) website and click on `Register for NGC`.
 18 |     - Click on the `Continue` button where `NVIDIA Account (Use existing or create a new NVIDIA account)` is written.
 19 |     - Fill in the required information and register, then proceed to log in with your new account credentials.
 20 |     - In the top right corner, click on your username and select `Setup` in the dropdown menu.
 21 |     - Proceed and click on the `Get API Key` button.
 22 |     - Next, you will find a `Generate API Key` button in the upper right corner. After clicking on this button, a dialog box should appear and you have to click on the `Confirm` button.
 23 |     - Finally, copy the generated API key and username and save them somewhere on your local system.
 24 | 
 25 | ### Tested environment
 26 | 
 27 | All Labs were tested and is set to run on a DGX machine equipped with an Ampere A100 GPU. It was also tested using a workstation equipped with an NVIDIA RTX A3000 GPU with 6GB of VRAM, reducing all the batch sizes to 8 during training. 
 28 | The results may vary when using different hardware and some hyperparameters may not be ideal for fully taking advantage of the graphic card.
 29 | 
 30 | 
 31 | ### Deploying with container
 32 | 
 33 | This material can be deployed with either Docker or Singularity container, refer to the respective sections for the instructions.
 34 | 
 35 | #### Running Docker Container
 36 | 
 37 | ##### Lab 1 & 2
 38 | 
 39 | **Install dependencies**
 40 | 
 41 | 1. Create a new `conda` environment using `miniconda`:
 42 | 
 43 |     - Install `Miniconda` by following the [official instructions](https://conda.io/projects/conda/en/latest/user-guide/install/).
 44 |     - Once you have installed `miniconda`, create a new environment by setting the Python version to 3.6:
 45 |     
 46 |         `conda create -n launcher python=3.6`
 47 |     
 48 |     - Activate the `conda` environment that you have just created:
 49 |     
 50 |         `conda activate launcher`
 51 |     
 52 |     - When you are done with your session, you may deactivate your `conda` environment using the `deactivate` command:
 53 |     
 54 |         `conda deactivate`
 55 |    
 56 | 
 57 | 2. Install the TAO Launcher Python package called `nvidia-tao` into the conda launcher environment:
 58 |     
 59 |     `conda activate launcher`
 60 |     
 61 |     `pip3 install nvidia-tao`
 62 | 
 63 | 3. Invoke the entrypoints using the this command `tao -h`. You should see the following output:
 64 | ```
 65 | usage: tao 
 66 |          {list,stop,info,augment,bpnet,classification,detectnet_v2,dssd,emotionnet,faster_rcnn,fpenet,gazenet,gesturenet,
 67 |          heartratenet,intent_slot_classification,lprnet,mask_rcnn,punctuation_and_capitalization,question_answering,
 68 |          retinanet,speech_to_text,ssd,text_classification,converter,token_classification,unet,yolo_v3,yolo_v4,yolo_v4_tiny}
 69 |          ...
 70 | 
 71 | Launcher for TAO
 72 | 
 73 | optional arguments:
 74 | -h, --help            show this help message and exit
 75 | 
 76 | tasks:
 77 |       {list,stop,info,augment,bpnet,classification,detectnet_v2,dssd,emotionnet,faster_rcnn,fpenet,gazenet,gesturenet,heartratenet
 78 |       ,intent_slot_classification,lprnet,mask_rcnn,punctuation_and_capitalization,question_answering,retinanet,speech_to_text,
 79 |       ssd,text_classification,converter,token_classification,unet,yolo_v3,yolo_v4,yolo_v4_tiny}
 80 | ```
 81 | 
 82 |    For more info, visit the [TAO Toolkit documentation](https://docs.nvidia.com/tao/tao-toolkit/text/tao_toolkit_quick_start_guide.html).
 83 | 
 84 | 4. Install other dependencies needed to run the lab:
 85 | ```
 86 | pip install jupyterlab \
 87 |     matplotlib \
 88 |     fiftyone \
 89 |     attrdict \
 90 |     tqdm \
 91 |     gdown \
 92 |     nvidia-pyindex \
 93 |     tritonclient[all]
 94 | ```
 95 | 
 96 | **Run the Labs**
 97 | 
 98 | Activate the conda launcher environment: `conda activate launcher`
 99 |     
100 | You are to run the first two notebooks `1.Data_labeling_and_preprocessing.ipynb` and `2.Object_detection_using_TAO_YOLOv4.ipynb` in the `launcher` environment.
101 | 
102 | Launch the jupyter lab with:
103 | 
104 | `jupyter-lab --no-browser --allow-root --ip=0.0.0.0 --port=8888 --NotebookApp.token="" --notebook-dir=~/End-to-End-Computer-Vision/workspace` 
105 | 
106 | Remember to set the `--notebook-dir` to the location where the `project folder` where this material is located.
107 | 
108 | Then, open jupyter lab in the browser at http://localhost:8888 and start working on the lab by clicking on the `Start_here.ipynb` notebook.
109 | 
110 | When you are done with `1.Data_labeling_and_preprocessing.ipynb` and `2.Object_detection_using_TAO_YOLOv4.ipynb`, move to the next section.
111 | 
112 | ##### Lab 3 
113 | 
114 | To start the Triton Inference Server instance, you will need to run a container along with the `launcher` virtual environment. This is to emulate the client-server mechanism but on the same system. To start the server, `open a new terminal` and launch the command:
115 | ```
116 | docker run \
117 |   --gpus=1 --rm \
118 |   -p 8000:8000 -p 8001:8001 -p 8002:8002 \
119 |   -v ~/End-to-End-Computer-Vision/workspace/models:/models \
120 |   nvcr.io/nvidia/tritonserver:22.05-py3 \
121 |   tritonserver \
122 |   --model-repository=/models \
123 |   --exit-on-error=false \
124 |   --model-control-mode=poll \
125 |   --repository-poll-secs 30
126 | ```
127 | In order to work properly in this lab, the triton server version should match the TAO Toolkit version that was installed (visible by running `tao info`). Containers with the same `yy.mm` tag avoid version mismatches and conflicts that may prevent you from running and deploying your models. The path to the local model repository needs to be set as well in order to be mapped inside the container.
128 | 
129 | After starting Triton Server, you will see an output on the terminal showing `the server starting up and loading models`. This implies Triton is ready to accept inference requests.
130 | ```
131 | +----------------------+---------+--------+
132 | | Model                | Version | Status |
133 | +----------------------+---------+--------+
134 | | <model_name>         | <v>     | READY  |
135 | | ..                   | .       | ..     |
136 | | ..                   | .       | ..     |
137 | +----------------------+---------+--------+
138 | ...
139 | ...
140 | ...
141 | I1002 21:58:57.891440 62 grpc_server.cc:3914] Started GRPCInferenceService at 0.0.0.0:8001
142 | I1002 21:58:57.893177 62 http_server.cc:2717] Started HTTPService at 0.0.0.0:8000
143 | I1002 21:58:57.935518 62 http_server.cc:2736] Started Metrics Service at 0.0.0.0:8002
144 | ```
145 | 
146 | Now you can go back to your browser with jupyter lab open and run `3.Model_deployment_with_Triton_Inference_Server.ipynb`.
147 | 
148 | When you are done with the notebook, shut down jupyter lab by selecting `File > Shut Down` as well as the Triton Docker container of the server by pressing `ctrl + c` in the logs terminal. 
149 | 
150 | 
151 | ##### Lab 4 & 5
152 | 
153 | To run the DeepStream content, build a Docker container by following these steps:  
154 | 
155 | - Open a terminal window, navigate to the directory where `Dockerfile_deepstream` is located (e.g. `cd ~/End-to-End-Computer-Vision`)
156 | - Run `sudo docker build -f Dockerfile_deepstream --network=host -t <imagename>:<tagnumber> .`, for instance: `sudo docker build -f Dockerfile_deepstream --network=host -t deepstream:1.0 .`
157 | - Next, execute the command: `sudo docker run --rm -it --gpus=all -v ~/End-to-End-Computer-Vision/workspace:/opt/nvidia/deepstream/deepstream-6.1/workspace --network=host -p 8888:8888 deepstream:1.0`
158 | 
159 | flags:
160 | - `--rm` will delete the container when finished.
161 | - `-it` means run in interactive mode.
162 | - `--gpus` option makes GPUs accessible inside the container.
163 | - `-v` is used to mount host directories in the container filesystem.
164 | - `--network=host` will share the host’s network stack to the container.
165 | - `-p` flag explicitly maps a single port or range of ports.
166 | 
167 | When you are inside the container, launch jupyter lab: 
168 | `jupyter-lab --no-browser --allow-root --ip=0.0.0.0 --port=8888 --NotebookApp.token="" --notebook-dir=/opt/nvidia/deepstream/deepstream-6.1/workspace`. 
169 | 
170 | Open the browser at `http://localhost:8888` and start working on `4.Model_deployment_with_DeepStream.ipynb` notebook. Then, move to `5.Measure_object_size_using_OpenCV.ipynb` and complete the material.
171 | 
172 | As soon as you are done with that, shut down jupyter lab by selecting `File > Shut Down` and the container by typing `exit` or pressing `ctrl d` in the terminal window.
173 | 
174 | Congratulations, you've successfully built and deployed an end-to-end computer vision pipeline!
175 | 
176 | 
177 | #### Running Singularity Container
178 | 
179 | ###### Lab 1 & 2
180 | 
181 | To build the TAO Toolkit Singularity container, run: `singularity build --fakeroot --sandbox tao_e2ecv.simg Singularity_tao`
182 | 
183 | Run the container with: `singularity run --fakeroot --nv -B ~/End-to-End-Computer-Vision/workspace:/workspace/tao-experiments tao_e2ecv.simg jupyter-lab --no-browser --allow-root --ip=0.0.0.0 --port=8888 --NotebookApp.token="" --notebook-dir=/workspace/tao-experiments`
184 | 
185 | The `-B` flag mounts local directories in the container filesystem and ensures changes are stored locally in the project folder. Open jupyter lab in browser: http://localhost:8888 
186 | 
187 | You may now start working on the lab by clicking on the `Start_here.ipynb` notebook.
188 | 
189 | When you are done with `1.Data_labeling_and_preprocessing.ipynb` and `2.Object_detection_using_TAO_YOLOv4.ipynb`, shut down jupyter lab by selecting `File > Shut Down` in the top left corner, then shut down the Singularity container by typing `exit` or pressing `ctrl + d` in the terminal window.
190 | 
191 | 
192 | ###### Lab 3
193 | 
194 | To download the Triton Inference Server Singularity container for the Server run: `singularity pull tritonserver:22.05-py3.sif docker://nvcr.io/nvidia/tritonserver:22.05-py3`
195 | 
196 | To build the Triton Inference Server Singularity container for the Client, run: `singularity build --fakeroot --sandbox triton_client_e2ecv.simg Singularity_triton`
197 | 
198 | To activate the Triton Inference Server container, run:
199 | ```
200 | singularity run \
201 |   --nv \
202 |   -B ~/End-to-End-Computer-Vision/workspace/models:/models \
203 |   /mnt/shared/bootcamps/tritonserver:22.05-py3.sif \
204 |   tritonserver \
205 |   --model-repository=/models \
206 |   --exit-on-error=false \
207 |   --model-control-mode=poll \
208 |   --repository-poll-secs 30 \
209 |   --http-port 8000 \
210 |   --grpc-port 8001 \
211 |   --metrics-port 8002
212 | ```
213 | 
214 | You may now activate the Triton Client container with: `singularity run --fakeroot --nv -B ~/End-to-End-Computer-Vision/workspace:/workspace triton_client_e2ecv.simg jupyter-lab --no-browser --allow-root --ip=0.0.0.0 --port=8888 --NotebookApp.token="" --notebook-dir=/workspace`
215 | 
216 | Then, open jupyter lab in browser: http://localhost:8888 and continue the lab by running `3.Model_deployment_with_Triton_Inference_Server.ipynb`.
217 | 
218 | **Note**
219 | 
220 | In a cluster environment, the `Triton Inference Server` container should be launched on the computing node(eg. dgx05) why the `Triton Client` container should be run on the login node (cpu). Therefore, within the notebook, url variable should be modified as follows:
221 | 
222 | 
223 | ```
224 | assume you are on dgx05 then, replace
225 | 
226 | url = "localhost:8000" with url = "dgx05:8000" 
227 | 
228 | url = "localhost:8001" with url = "dgx05:8001"
229 | ```
230 | 
231 | As soon as you are done with that, shut down jupyter lab by selecting `File > Shut Down` and the Client container by typing `exit` or pressing `ctrl + d` in the terminal window.
232 | 
233 | 
234 | ###### Lab 4 & 5
235 | 
236 | To build the DeepStream Singularity container, run: `sudo singularity build --sandbox deepstream_e2ecv.simg Singularity_deepstream`
237 | 
238 | Run the DeepStream container with: `singularity run --fakeroot --nv -B ~/End-to-End-Computer-Vision/workspace:/opt/nvidia/deepstream/deepstream-6.1/workspace /mnt/shared/bootcamps/deepstream_e2ecv.simg jupyter-lab --no-browser --allow-root --ip=0.0.0.0 --port=8888 --NotebookApp.token="" --notebook-dir=/opt/nvidia/deepstream/deepstream-6.1/workspace`
239 | 
240 | Open jupyter lab in browser: http://localhost:8888 and complete the material by running `4.Model_deployment_with_DeepStream.ipynb` and `5.Measure_object_size_using_OpenCV.ipynb`.
241 | 
242 | Congratulations, you've successfully built and deployed an end-to-end computer vision pipeline!
243 | 
244 | 
245 | 
246 | ## Known issues
247 | 
248 | ### TAO
249 | 
250 | a. When installing the TAO Toolkit Launcher to your host machine’s native python3 as opposed to the recommended route of using a virtual environment, you may get an error saying that `tao binary wasn’t found`. This is because the path to your `tao` binary installed by pip wasn’t added to the `PATH` environment variable in your local machine. In this case, please run the following command:
251 | 
252 | `export PATH=$PATH:~/.local/bin`
253 | 
254 | b. When training, you can see an error message stating:
255 | ```
256 | Resource exhausted: OOM when allocating tensor...
257 | ERROR: Ran out of GPU memory, please lower the batch size, use a smaller input resolution, use a smaller backbone, or enable model parallelism for supported TLT architectures (see TLT documentation).
258 | ```
259 | As the error says, you ran out of GPU memory. Try playing with batch size to reduce the memory footprint.
260 | 
261 | ### NGC
262 | 
263 | You can see an error message stating:
264 | 
265 | `ngc: command not found ...`
266 | 
267 | You can resolve this by setting the path to ngc within the conda launcher environment as:
268 | 
269 | `echo "export PATH=\"\$PATH:$(pwd)/ngc-cli\"" >> ~/.bash_profile && source ~/.bash_profile`
270 | 
271 | ### Triton Inference Server
272 | 
273 | You can see in the server logs an error message stating something similar to:
274 | 
275 | ```
276 | E0930 06:24:12.416803 1 logging.cc:43] 1: [stdArchiveReader.cpp::StdArchiveReader::40] Error Code 1: Serialization (Serialization assertion stdVersionRead == serializationVersion failed.Version tag does not match. Note: Current Version: 213, Serialized Engine Version: 205)
277 | E0930 06:24:12.423693 1 logging.cc:43] 4: [runtime.cpp::deserializeCudaEngine::50] Error Code 4: Internal Error (Engine deserialization failed.)
278 | ```
279 | 
280 | The Server container is using a different version of TensorRT than the one the engine was generated with, so the Server is unable to load the model. Make sure to use containers with the same `<yy.mm>` tag when pulling from NGC as this ensures there are no version mismatches. You can verify the version of TAO by running the `tao info` command and then pull the appropriate `nvcr.io/nvidia/tritonserver:yy.mm-py3` Server container to solve the issue.
281 | 
282 | ### DeepStream
283 | 
284 | You can see when running the pipeline an error similar to:
285 | 
286 | ```
287 | ERROR: [TRT]: 4: [runtime.cpp::deserializeCudaEngine::50] Error Code 4: Internal Error (Engine deserialization failed.)
288 | ERROR: ../nvdsinfer/nvdsinfer_model_builder.cpp:1528 Deserialize engine failed from file: /opt/nvidia/deepstream/deepstream-6.1/workspace/yolo_v4/export/trt.engine
289 | ```
290 | The DeepStream container uses a different version of TensorRT than the one the engine was generated with, so it is unable to use the TensorRT engine for inference. Please set the `tlt-encoded-model` path in the configuration file so that if the engine deserialization fails, DeepStream will attempt to rebuild the engine internally.
291 | 


--------------------------------------------------------------------------------
/Dockerfile_deepstream:
--------------------------------------------------------------------------------
 1 | # Select base image
 2 | FROM nvcr.io/nvidia/deepstream:6.1.1-devel
 3 | 
 4 | # Install additional packages
 5 | WORKDIR /opt/nvidia/deepstream/deepstream
 6 | RUN ./user_additional_install.sh
 7 | 
 8 | # Install required dependencies
 9 | RUN apt install ffmpeg python3-gi python3-dev python3-gst-1.0 python-gi-dev git python-dev \
10 |     python3 python3-pip python3.8-dev cmake g++ build-essential libglib2.0-dev \
11 |     libglib2.0-dev-bin libgstreamer1.0-dev libtool m4 autoconf automake libgirepository1.0-dev libcairo2-dev -y
12 |     
13 | # Initialization of submodules
14 | WORKDIR /opt/nvidia/deepstream/deepstream/sources 
15 | RUN git clone https://github.com/NVIDIA-AI-IOT/deepstream_python_apps.git
16 | WORKDIR /opt/nvidia/deepstream/deepstream/sources/deepstream_python_apps
17 | RUN git submodule update --init
18 | 
19 | # Installing Gst-python
20 | RUN apt-get install -y apt-transport-https ca-certificates -y
21 | RUN update-ca-certificates
22 | WORKDIR /opt/nvidia/deepstream/deepstream/sources/deepstream_python_apps/3rdparty/gst-python
23 | RUN ./autogen.sh
24 | RUN make
25 | RUN make install
26 | 
27 | # Compiling python bindings
28 | WORKDIR /opt/nvidia/deepstream/deepstream/sources/deepstream_python_apps/bindings
29 | RUN mkdir build
30 | WORKDIR /opt/nvidia/deepstream/deepstream/sources/deepstream_python_apps/bindings/build
31 | RUN cmake .. -DPYTHON_MAJOR_VERSION=3 -DPYTHON_MINOR_VERSION=8
32 | RUN make
33 | 
34 | # Installing python bindings
35 | RUN pip3 install ./pyds-1.1.4-py3-none*.whl
36 | 
37 | # Install jupyterlab and packages
38 | WORKDIR /opt/nvidia/deepstream/deepstream-6.1
39 | RUN pip3 install jupyterlab \
40 |     ipywidgets \
41 |     matplotlib \
42 |     scipy \
43 |     imutils \
44 |     opencv-python
45 | 
46 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2019 NVIDIA Corporation
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 |    
203 | =======================================================================
204 | 
205 | Creative Commons Attribution 4.0 International Public License
206 | 
207 | By exercising the Licensed Rights (defined below), You accept and agree
208 | to be bound by the terms and conditions of this Creative Commons
209 | Attribution 4.0 International Public License ("Public License"). To the
210 | extent this Public License may be interpreted as a contract, You are
211 | granted the Licensed Rights in consideration of Your acceptance of
212 | these terms and conditions, and the Licensor grants You such rights in
213 | consideration of benefits the Licensor receives from making the
214 | Licensed Material available under these terms and conditions.
215 | 
216 | 
217 | Section 1 -- Definitions.
218 | 
219 |   a. Adapted Material means material subject to Copyright and Similar
220 |      Rights that is derived from or based upon the Licensed Material
221 |      and in which the Licensed Material is translated, altered,
222 |      arranged, transformed, or otherwise modified in a manner requiring
223 |      permission under the Copyright and Similar Rights held by the
224 |      Licensor. For purposes of this Public License, where the Licensed
225 |      Material is a musical work, performance, or sound recording,
226 |      Adapted Material is always produced where the Licensed Material is
227 |      synched in timed relation with a moving image.
228 | 
229 |   b. Adapter's License means the license You apply to Your Copyright
230 |      and Similar Rights in Your contributions to Adapted Material in
231 |      accordance with the terms and conditions of this Public License.
232 | 
233 |   c. Copyright and Similar Rights means copyright and/or similar rights
234 |      closely related to copyright including, without limitation,
235 |      performance, broadcast, sound recording, and Sui Generis Database
236 |      Rights, without regard to how the rights are labeled or
237 |      categorized. For purposes of this Public License, the rights
238 |      specified in Section 2(b)(1)-(2) are not Copyright and Similar
239 |      Rights.
240 | 
241 |   d. Effective Technological Measures means those measures that, in the
242 |      absence of proper authority, may not be circumvented under laws
243 |      fulfilling obligations under Article 11 of the WIPO Copyright
244 |      Treaty adopted on December 20, 1996, and/or similar international
245 |      agreements.
246 | 
247 |   e. Exceptions and Limitations means fair use, fair dealing, and/or
248 |      any other exception or limitation to Copyright and Similar Rights
249 |      that applies to Your use of the Licensed Material.
250 | 
251 |   f. Licensed Material means the artistic or literary work, database,
252 |      or other material to which the Licensor applied this Public
253 |      License.
254 | 
255 |   g. Licensed Rights means the rights granted to You subject to the
256 |      terms and conditions of this Public License, which are limited to
257 |      all Copyright and Similar Rights that apply to Your use of the
258 |      Licensed Material and that the Licensor has authority to license.
259 | 
260 |   h. Licensor means the individual(s) or entity(ies) granting rights
261 |      under this Public License.
262 | 
263 |   i. Share means to provide material to the public by any means or
264 |      process that requires permission under the Licensed Rights, such
265 |      as reproduction, public display, public performance, distribution,
266 |      dissemination, communication, or importation, and to make material
267 |      available to the public including in ways that members of the
268 |      public may access the material from a place and at a time
269 |      individually chosen by them.
270 | 
271 |   j. Sui Generis Database Rights means rights other than copyright
272 |      resulting from Directive 96/9/EC of the European Parliament and of
273 |      the Council of 11 March 1996 on the legal protection of databases,
274 |      as amended and/or succeeded, as well as other essentially
275 |      equivalent rights anywhere in the world.
276 | 
277 |   k. You means the individual or entity exercising the Licensed Rights
278 |      under this Public License. Your has a corresponding meaning.
279 | 
280 | 
281 | Section 2 -- Scope.
282 | 
283 |   a. License grant.
284 | 
285 |        1. Subject to the terms and conditions of this Public License,
286 |           the Licensor hereby grants You a worldwide, royalty-free,
287 |           non-sublicensable, non-exclusive, irrevocable license to
288 |           exercise the Licensed Rights in the Licensed Material to:
289 | 
290 |             a. reproduce and Share the Licensed Material, in whole or
291 |                in part; and
292 | 
293 |             b. produce, reproduce, and Share Adapted Material.
294 | 
295 |        2. Exceptions and Limitations. For the avoidance of doubt, where
296 |           Exceptions and Limitations apply to Your use, this Public
297 |           License does not apply, and You do not need to comply with
298 |           its terms and conditions.
299 | 
300 |        3. Term. The term of this Public License is specified in Section
301 |           6(a).
302 | 
303 |        4. Media and formats; technical modifications allowed. The
304 |           Licensor authorizes You to exercise the Licensed Rights in
305 |           all media and formats whether now known or hereafter created,
306 |           and to make technical modifications necessary to do so. The
307 |           Licensor waives and/or agrees not to assert any right or
308 |           authority to forbid You from making technical modifications
309 |           necessary to exercise the Licensed Rights, including
310 |           technical modifications necessary to circumvent Effective
311 |           Technological Measures. For purposes of this Public License,
312 |           simply making modifications authorized by this Section 2(a)
313 |           (4) never produces Adapted Material.
314 | 
315 |        5. Downstream recipients.
316 | 
317 |             a. Offer from the Licensor -- Licensed Material. Every
318 |                recipient of the Licensed Material automatically
319 |                receives an offer from the Licensor to exercise the
320 |                Licensed Rights under the terms and conditions of this
321 |                Public License.
322 | 
323 |             b. No downstream restrictions. You may not offer or impose
324 |                any additional or different terms or conditions on, or
325 |                apply any Effective Technological Measures to, the
326 |                Licensed Material if doing so restricts exercise of the
327 |                Licensed Rights by any recipient of the Licensed
328 |                Material.
329 | 
330 |        6. No endorsement. Nothing in this Public License constitutes or
331 |           may be construed as permission to assert or imply that You
332 |           are, or that Your use of the Licensed Material is, connected
333 |           with, or sponsored, endorsed, or granted official status by,
334 |           the Licensor or others designated to receive attribution as
335 |           provided in Section 3(a)(1)(A)(i).
336 | 
337 |   b. Other rights.
338 | 
339 |        1. Moral rights, such as the right of integrity, are not
340 |           licensed under this Public License, nor are publicity,
341 |           privacy, and/or other similar personality rights; however, to
342 |           the extent possible, the Licensor waives and/or agrees not to
343 |           assert any such rights held by the Licensor to the limited
344 |           extent necessary to allow You to exercise the Licensed
345 |           Rights, but not otherwise.
346 | 
347 |        2. Patent and trademark rights are not licensed under this
348 |           Public License.
349 | 
350 |        3. To the extent possible, the Licensor waives any right to
351 |           collect royalties from You for the exercise of the Licensed
352 |           Rights, whether directly or through a collecting society
353 |           under any voluntary or waivable statutory or compulsory
354 |           licensing scheme. In all other cases the Licensor expressly
355 |           reserves any right to collect such royalties.
356 | 
357 | 
358 | Section 3 -- License Conditions.
359 | 
360 | Your exercise of the Licensed Rights is expressly made subject to the
361 | following conditions.
362 | 
363 |   a. Attribution.
364 | 
365 |        1. If You Share the Licensed Material (including in modified
366 |           form), You must:
367 | 
368 |             a. retain the following if it is supplied by the Licensor
369 |                with the Licensed Material:
370 | 
371 |                  i. identification of the creator(s) of the Licensed
372 |                     Material and any others designated to receive
373 |                     attribution, in any reasonable manner requested by
374 |                     the Licensor (including by pseudonym if
375 |                     designated);
376 | 
377 |                 ii. a copyright notice;
378 | 
379 |                iii. a notice that refers to this Public License;
380 | 
381 |                 iv. a notice that refers to the disclaimer of
382 |                     warranties;
383 | 
384 |                  v. a URI or hyperlink to the Licensed Material to the
385 |                     extent reasonably practicable;
386 | 
387 |             b. indicate if You modified the Licensed Material and
388 |                retain an indication of any previous modifications; and
389 | 
390 |             c. indicate the Licensed Material is licensed under this
391 |                Public License, and include the text of, or the URI or
392 |                hyperlink to, this Public License.
393 | 
394 |        2. You may satisfy the conditions in Section 3(a)(1) in any
395 |           reasonable manner based on the medium, means, and context in
396 |           which You Share the Licensed Material. For example, it may be
397 |           reasonable to satisfy the conditions by providing a URI or
398 |           hyperlink to a resource that includes the required
399 |           information.
400 | 
401 |        3. If requested by the Licensor, You must remove any of the
402 |           information required by Section 3(a)(1)(A) to the extent
403 |           reasonably practicable.
404 | 
405 |        4. If You Share Adapted Material You produce, the Adapter's
406 |           License You apply must not prevent recipients of the Adapted
407 |           Material from complying with this Public License.
408 | 
409 | 
410 | Section 4 -- Sui Generis Database Rights.
411 | 
412 | Where the Licensed Rights include Sui Generis Database Rights that
413 | apply to Your use of the Licensed Material:
414 | 
415 |   a. for the avoidance of doubt, Section 2(a)(1) grants You the right
416 |      to extract, reuse, reproduce, and Share all or a substantial
417 |      portion of the contents of the database;
418 | 
419 |   b. if You include all or a substantial portion of the database
420 |      contents in a database in which You have Sui Generis Database
421 |      Rights, then the database in which You have Sui Generis Database
422 |      Rights (but not its individual contents) is Adapted Material; and
423 | 
424 |   c. You must comply with the conditions in Section 3(a) if You Share
425 |      all or a substantial portion of the contents of the database.
426 | 
427 | For the avoidance of doubt, this Section 4 supplements and does not
428 | replace Your obligations under this Public License where the Licensed
429 | Rights include other Copyright and Similar Rights.
430 | 
431 | 
432 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
433 | 
434 |   a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
435 |      EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
436 |      AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
437 |      ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
438 |      IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
439 |      WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
440 |      PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
441 |      ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
442 |      KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
443 |      ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
444 | 
445 |   b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
446 |      TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
447 |      NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
448 |      INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
449 |      COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
450 |      USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
451 |      ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
452 |      DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
453 |      IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
454 | 
455 |   c. The disclaimer of warranties and limitation of liability provided
456 |      above shall be interpreted in a manner that, to the extent
457 |      possible, most closely approximates an absolute disclaimer and
458 |      waiver of all liability.
459 | 
460 | 
461 | Section 6 -- Term and Termination.
462 | 
463 |   a. This Public License applies for the term of the Copyright and
464 |      Similar Rights licensed here. However, if You fail to comply with
465 |      this Public License, then Your rights under this Public License
466 |      terminate automatically.
467 | 
468 |   b. Where Your right to use the Licensed Material has terminated under
469 |      Section 6(a), it reinstates:
470 | 
471 |        1. automatically as of the date the violation is cured, provided
472 |           it is cured within 30 days of Your discovery of the
473 |           violation; or
474 | 
475 |        2. upon express reinstatement by the Licensor.
476 | 
477 |      For the avoidance of doubt, this Section 6(b) does not affect any
478 |      right the Licensor may have to seek remedies for Your violations
479 |      of this Public License.
480 | 
481 |   c. For the avoidance of doubt, the Licensor may also offer the
482 |      Licensed Material under separate terms or conditions or stop
483 |      distributing the Licensed Material at any time; however, doing so
484 |      will not terminate this Public License.
485 | 
486 |   d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
487 |      License.
488 | 
489 | 
490 | Section 7 -- Other Terms and Conditions.
491 | 
492 |   a. The Licensor shall not be bound by any additional or different
493 |      terms or conditions communicated by You unless expressly agreed.
494 | 
495 |   b. Any arrangements, understandings, or agreements regarding the
496 |      Licensed Material not stated herein are separate from and
497 |      independent of the terms and conditions of this Public License.
498 | 
499 | 
500 | Section 8 -- Interpretation.
501 | 
502 |   a. For the avoidance of doubt, this Public License does not, and
503 |      shall not be interpreted to, reduce, limit, restrict, or impose
504 |      conditions on any use of the Licensed Material that could lawfully
505 |      be made without permission under this Public License.
506 | 
507 |   b. To the extent possible, if any provision of this Public License is
508 |      deemed unenforceable, it shall be automatically reformed to the
509 |      minimum extent necessary to make it enforceable. If the provision
510 |      cannot be reformed, it shall be severed from this Public License
511 |      without affecting the enforceability of the remaining terms and
512 |      conditions.
513 | 
514 |   c. No term or condition of this Public License will be waived and no
515 |      failure to comply consented to unless expressly agreed to by the
516 |      Licensor.
517 | 
518 |   d. Nothing in this Public License constitutes or may be interpreted
519 |      as a limitation upon, or waiver of, any privileges and immunities
520 |      that apply to the Licensor or You, including from the legal
521 |      processes of any jurisdiction or authority.
522 | 
523 | 
524 | =======================================================================
525 | 
526 | Creative Commons is not a party to its public
527 | licenses. Notwithstanding, Creative Commons may elect to apply one of
528 | its public licenses to material it publishes and in those instances
529 | will be considered the “Licensor.” The text of the Creative Commons
530 | public licenses is dedicated to the public domain under the CC0 Public
531 | Domain Dedication. Except for the limited purpose of indicating that
532 | material is shared under a Creative Commons public license or as
533 | otherwise permitted by the Creative Commons policies published at
534 | creativecommons.org/policies, Creative Commons does not authorize the
535 | use of the trademark "Creative Commons" or any other trademark or logo
536 | of Creative Commons without its prior written consent including,
537 | without limitation, in connection with any unauthorized modifications
538 | to any of its public licenses or any other arrangements,
539 | understandings, or agreements concerning use of licensed material. For
540 | the avoidance of doubt, this paragraph does not form part of the
541 | public licenses.
542 | 
543 | Creative Commons may be contacted at creativecommons.org.
544 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # End-to-End Computer Vision Bootcamp
 2 | 
 3 |  The **End-to-End Computer Vision Bootcamp** is designed from a real-world perspective and follows the data processing, development, and deployment pipeline paradigm using a variety of tools. Through hands-on exercises, attendees will learn the fundamentals of preprocessing custom images, speeding the development process using transfer learning for model training, and deployment of trained models for fast and scalable AI in production.
 4 | 
 5 | ## Bootcamp Content
 6 | 
 7 | The content is structured in five modules with an additional introductory notebook and two challenge notebooks:
 8 | 
 9 | - Welcome to **end-to-end computer vision** bootcamp
10 | - Lab 1: Data labeling and preprocessing
11 | - Lab 2: Object detection using TAO YOLOv4
12 | - Lab 3: Model deployment with Triton Inference Server
13 | - Lab 4: Model deployment with DeepStream
14 | - Lab 5: Measure object size using OpenCV
15 | - Challenge 1: DeepStream SDK
16 | - Challenge 2: Triton Inference Server
17 | 
18 | ## Tools and Frameworks
19 | 
20 | The tools and frameworks used in the bootcamp are as follows:
21 | 
22 | - [NVIDIA® TAO Toolkit](https://developer.nvidia.com/tao-toolkit)
23 | - [NVIDIA DeepStream SDK](https://developer.nvidia.com/deepstream-sdk)
24 | - [NVIDIA Triton™ Inference Server](https://www.nvidia.com/en-us/ai-data-science/products/triton-inference-server/)
25 | - [NVIDIA TensorRT™](https://developer.nvidia.com/tensorrt)
26 | - [OpenCV](https://opencv.org/) 
27 | - [Label Studio](https://labelstud.io/)
28 | 
29 | 
30 | ## Bootcamp Duration
31 | 
32 | The total bootcamp material would take approximately 8.5 hours. It is recommended to divide the teaching of the material into two days, covering the first two notebooks (Lab 1 and Lab 2) in one session and the rest in the next session.
33 | 
34 | 
35 | ## Bootcamp Prerequisites
36 | 
37 | A basic understanding of Deep Learning, Python programming, and familiarity with NVIDIA® NGC™ is required. 
38 | 
39 | 
40 | ## Deploying the Bootcamp materials:
41 | 
42 | To deploy the Labs, please refer to the Deployment guide presented [here](https://github.com/openhackathons-org/End-to-End-Computer-Vision/blob/main/Deployment_Guide.md)
43 | 
44 | 
45 | ## Attribution
46 | This material originates from the OpenHackathons Github repository. Check out additional materials [here](https://github.com/openhackathons-org)
47 | 
48 | Don't forget to check out additional [Open Hackathons Resources](https://www.openhackathons.org/s/technical-resources) and join our [OpenACC and Hackathons Slack Channel](https://www.openacc.org/community#slack) to share your experience and get more help from the community.
49 | 
50 | 
51 | ## Licensing
52 | 
53 | Copyright © 2023 OpenACC-Standard.org. This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0). These materials may include references to hardware and software developed by other entities; all applicable licensing and copyrights apply.
54 | 
55 | 


--------------------------------------------------------------------------------
/Singularity_deepstream:
--------------------------------------------------------------------------------
 1 | # Select base image
 2 | Bootstrap: docker
 3 | From: nvcr.io/nvidia/deepstream:6.1.1-devel
 4 | 
 5 | %environment
 6 |     export XDG_RUNTIME_DIR=
 7 | 
 8 | %post
 9 |     # Install additional packages
10 |     cd /opt/nvidia/deepstream/deepstream
11 |     ./user_additional_install.sh
12 |     
13 |     # Install required dependencies
14 |     apt install ffmpeg python3-gi python3-dev python3-gst-1.0 python-gi-dev git python-dev \
15 |     python3 python3-pip python3.8-dev cmake g++ build-essential libglib2.0-dev \
16 |     libglib2.0-dev-bin libgstreamer1.0-dev libtool m4 autoconf automake libgirepository1.0-dev libcairo2-dev -y
17 |     
18 |     # Initialization of submodules
19 |     cd /opt/nvidia/deepstream/deepstream/sources
20 |     git clone https://github.com/NVIDIA-AI-IOT/deepstream_python_apps.git
21 |     cd /opt/nvidia/deepstream/deepstream/sources/deepstream_python_apps
22 |     git checkout v1.1.4
23 |     git submodule update --init
24 |     
25 |     # Installing Gst-python
26 |     apt-get install -y apt-transport-https ca-certificates -y
27 |     update-ca-certificates
28 |     cd /opt/nvidia/deepstream/deepstream/sources/deepstream_python_apps/3rdparty/gst-python
29 |     ./autogen.sh
30 |     make
31 |     make install
32 |     
33 |     # Compiling python bindings
34 |     cd /opt/nvidia/deepstream/deepstream/sources/deepstream_python_apps/bindings
35 |     mkdir build
36 |     cd /opt/nvidia/deepstream/deepstream/sources/deepstream_python_apps/bindings/build
37 |     cmake .. -DPYTHON_MAJOR_VERSION=3 -DPYTHON_MINOR_VERSION=8
38 |     make
39 |     
40 |     # Installing python bindings
41 |     pip3 install ./pyds-1.1.4-py3-none*.whl
42 |     
43 |     # Install jupyterlab and packages
44 |     pip3 install jupyterlab \
45 |     ipywidgets \
46 |     matplotlib \
47 |     scipy \
48 |     imutils \
49 |     opencv-python
50 | 
51 | %runscript
52 |     "$@"
53 | 
54 | %labels
55 |     Author Massimiliano, Tosin
56 |     
57 | 


--------------------------------------------------------------------------------
/Singularity_tao:
--------------------------------------------------------------------------------
 1 | Bootstrap: docker
 2 | From: nvcr.io/nvidia/tao/tao-toolkit-tf:v3.22.05-tf1.15.5-py3
 3 | 
 4 | %environment
 5 |     export XDG_RUNTIME_DIR=
 6 |     export PATH="$PATH:/usr/local/bin"
 7 |     export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib"
 8 | 
 9 | %post
10 |     # Install jupyterlab and packages
11 |     pip3 install jupyterlab \
12 |     ipywidgets \
13 |     matplotlib \
14 |     opencv-python-headless==4.5.4.60 \
15 |     fiftyone \
16 |     gdown
17 |     
18 | %runscript
19 |     "$@"
20 | 
21 | %labels
22 |     Author Massimiliano, Tosin
23 |     
24 | 


--------------------------------------------------------------------------------
/Singularity_triton:
--------------------------------------------------------------------------------
 1 | Bootstrap: docker
 2 | From: nvcr.io/nvidia/tritonserver:22.05-py3-sdk
 3 | 
 4 | %environment
 5 |     export XDG_RUNTIME_DIR=
 6 | 
 7 | %post
 8 |     apt-get update -y
 9 |     apt install ffmpeg -y
10 |     
11 |     # Install jupyterlab and packages
12 |     pip3 install jupyterlab \
13 |     ipywidgets \
14 |     attrdict \
15 |     tqdm \
16 |     matplotlib \
17 |     protobuf==3.20.*
18 |     
19 | %runscript
20 |     "$@"
21 | 
22 | %labels
23 |     Author Massimiliano, Tosin
24 |     
25 | 


--------------------------------------------------------------------------------
/workspace/Start_here.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# End-to-end computer vision bootcamp\n",
  8 |     "\n",
  9 |     "## Learning objectives\n",
 10 |     "\n",
 11 |     "The goal of this bootcamp is to build a complete end-to-end computer vision pipeline for an object detection application. This material will introduce participants to multiple NVIDIA® SDKs, most notably NVIDIA TAO Toolkit, NVIDIA TensorRT™, NVIDIA Triton™ Inference Server, and NVIDIA DeepStream SDK while giving hands-on experience in data preprocessing, model training, optimization, and deployment at scale.\n",
 12 |     "\n",
 13 |     "A key objective of this bootcamp is to show participants how to seamlessly combine these SDKs so that the same development methodology can be applied to similar use cases as well, even outside the domain of computer vision. With a broader idea of the production flow, participants will be able to better evaluate the complexity and progress of their machine learning projects.\n",
 14 |     "\n",
 15 |     "The bootcamp assumes familiarity with Deep Learning and Computer Vision fundamentals.\n",
 16 |     "\n",
 17 |     "* Language: Python\n",
 18 |     "* Frameworks: NVIDIA TAO Toolkit, NVIDIA TensorRT, NVIDIA Triton Inference Server, NVIDIA DeepStream SDK, OpenCV\n",
 19 |     "\n",
 20 |     "It is not required, although useful, to have more than one GPU for the bootcamp. Let's start by testing the GPUs you are running the code on in this bootcamp:"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": null,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "!nvidia-smi"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "metadata": {
 35 |     "jp-MarkdownHeadingCollapsed": true,
 36 |     "tags": []
 37 |    },
 38 |    "source": [
 39 |     "## Bootcamp outline\n",
 40 |     "\n",
 41 |     "The following contents will be covered during the bootcamp:\n",
 42 |     "\n",
 43 |     "- [**Lab 1: Data labeling and preprocessing**](jupyter_notebook/1.Data_labeling_and_preprocessing.ipynb)\n",
 44 |     "- [**Lab 2: Object detection using TAO YOLOv4**](jupyter_notebook/2.Object_detection_using_TAO_YOLOv4.ipynb)\n",
 45 |     "- [**Lab 3: Model deployment with Triton Inference Server**](jupyter_notebook/3.Model_deployment_with_Triton_Inference_Server.ipynb)\n",
 46 |     "- [**Lab 4: Model deployment with DeepStream**](jupyter_notebook/4.Model_deployment_with_DeepStream.ipynb)\n",
 47 |     "- [**Lab 5: Measure object size using OpenCV**](jupyter_notebook/5.Measure_object_size_using_OpenCV.ipynb)\n",
 48 |     "- [**Challenge 1: DeepStream SDK**](jupyter_notebook/6.Challenge_DeepStream.ipynb)\n",
 49 |     "- [**Challenge 2: Triton Inference Server**](jupyter_notebook/7.Challenge_Triton.ipynb)\n",
 50 |     "\n",
 51 |     "Note: the challenges are extra modules that you can try after learning the individual modules to test your knowledge."
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "markdown",
 56 |    "metadata": {
 57 |     "tags": []
 58 |    },
 59 |    "source": [
 60 |     "## Bootcamp duration\n",
 61 |     "The lab material will be presented in a total of 8.5 hours. The link to the material is available for download at the end of the lab.\n",
 62 |     "\n",
 63 |     "## Content level\n",
 64 |     "Beginner, Intermediate.\n",
 65 |     "\n",
 66 |     "## Target audience and prerequisites\n",
 67 |     "The target audience for this lab is researchers/graduate students and developers who are interested in learning about building an end-to-end computer vision pipeline to bring their scientific or industrial application ideas to life.\n",
 68 |     "\n",
 69 |     "A basic understanding of Deep Learning and Computer Vision is required.\n",
 70 |     "\n",
 71 |     "**Hardware Note:** *all the material was tested and is set to run on a DGX machine equipped with an Ampere A100 GPU. The material was also tested using a workstation equipped with an NVIDIA RTX A3000 GPU with 6GB of VRAM, reducing all the batch sizes to 8 during training. The results may vary when using different hardware and some hyperparameters may not be ideal for fully taking advantage of the graphic card.*"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "markdown",
 76 |    "metadata": {},
 77 |    "source": [
 78 |     "--- \n",
 79 |     "\n",
 80 |     "## Licensing\n",
 81 |     "\n",
 82 |     "Copyright © 2022 OpenACC-Standard.org. This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0). These materials include references to hardware and software developed by other entities; all applicable licensing and copyrights apply."
 83 |    ]
 84 |   }
 85 |  ],
 86 |  "metadata": {
 87 |   "kernelspec": {
 88 |    "display_name": "Python 3 (ipykernel)",
 89 |    "language": "python",
 90 |    "name": "python3"
 91 |   },
 92 |   "language_info": {
 93 |    "codemirror_mode": {
 94 |     "name": "ipython",
 95 |     "version": 3
 96 |    },
 97 |    "file_extension": ".py",
 98 |    "mimetype": "text/x-python",
 99 |    "name": "python",
100 |    "nbconvert_exporter": "python",
101 |    "pygments_lexer": "ipython3",
102 |    "version": "3.9.12"
103 |   },
104 |   "toc-autonumbering": false
105 |  },
106 |  "nbformat": 4,
107 |  "nbformat_minor": 4
108 | }
109 | 


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/1.Data_labeling_and_preprocessing.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<p> <center> <a href=\"../Start_here.ipynb\">Home Page</a> </center> </p>\n",
  8 |     "\n",
  9 |     "<div>\n",
 10 |     "    <span style=\"float: left; width: 52%; text-align: right;\">\n",
 11 |     "        <a >1</a>\n",
 12 |     "        <a href=\"2.Object_detection_using_TAO_YOLOv4.ipynb\">2</a>\n",
 13 |     "        <a href=\"3.Model_deployment_with_Triton_Inference_Server.ipynb\">3</a>\n",
 14 |     "        <a href=\"4.Model_deployment_with_DeepStream.ipynb\">4</a>\n",
 15 |     "        <a href=\"5.Measure_object_size_using_OpenCV.ipynb\">5</a>\n",
 16 |     "        <a href=\"6.Challenge_DeepStream.ipynb\">6</a>\n",
 17 |     "        <a href=\"7.Challenge_Triton.ipynb\">7</a>\n",
 18 |     "    </span>\n",
 19 |     "    <span style=\"float: left; width: 48%; text-align: right;\"><a href=\"2.Object_detection_using_TAO_YOLOv4.ipynb\">Next Notebook</a></span>\n",
 20 |     "</div>"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "# Data labeling and preprocessing\n",
 28 |     "\n",
 29 |     "***"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "metadata": {},
 35 |    "source": [
 36 |     "**The goal of this notebook is to make you understand how to:**\n",
 37 |     "\n",
 38 |     "- Label data for object detection applications\n",
 39 |     "- Convert a dataset into KITTI format\n",
 40 |     "\n",
 41 |     "**Contents of this notebook:**\n",
 42 |     "\n",
 43 |     "- [Custom data labeling](#Custom-data-labeling)\n",
 44 |     "    - [Labeling with Label Studio](#Labeling-with-Label-Studio)\n",
 45 |     "    - [Labeling with Yolo Mark](#Labeling-with-Yolo-Mark)\n",
 46 |     "- [Download data for the lab](#Download-data-for-the-lab)\n",
 47 |     "- [Conversion to KITTI format](#Conversion-to-KITTI-format)\n",
 48 |     "    - [Load the dataset](#Load-the-dataset)\n",
 49 |     "    - [Export to KITTI](#Export-to-KITTI)"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "## Custom data labeling\n",
 57 |     "\n",
 58 |     "Training a deep learning model for an object detection task requires a meaningful amount of annotated data. A dataset for a specific domain application may not be available often or if it is, chances are it may not be labeled or adequate in size. In this notebook, we show how to annotate a custom dataset with bounding boxes and convert it into KITTI file format, useful to expand the number of samples with offline data augmentation or to train a model with transfer learning.\n",
 59 |     "\n",
 60 |     "<img src=\"images/prep_pipeline.png\" width=\"720\">\n",
 61 |     "\n",
 62 |     "We present two tools for data labeling operations:\n",
 63 |     "- Label Studio\n",
 64 |     "- Yolo Mark\n",
 65 |     "\n",
 66 |     "We recommend using Label Studio because of the more intuitive user interface and a better overall labeling experience."
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "## Download data for the lab\n",
 74 |     "\n",
 75 |     "In this lab, we will provide you with a labeled version of a dataset containing three types of fruit - `apples`, `bananas`, and `oranges` - each fresh or rotten, for a total of six classes. The dataset was labeled using Label Studio, as explained above. The project folder has been renamed to `label-studio`. Running the following cell will make the data available in the `/workspace/data` directory."
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "!python3 ../source_code/dataset.py"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "markdown",
 89 |    "metadata": {
 90 |     "tags": []
 91 |    },
 92 |    "source": [
 93 |     "\n",
 94 |     "### Labeling with Label Studio\n",
 95 |     "\n",
 96 |     "[Label Studio](https://labelstud.io/) is an open-source, flexible, quickly installable data labeling tool with a very convenient user interface. The tool natively comes with a Python module available to install via the pip package manager, but can also be installed in alternative ways, all available [here](https://labelstud.io/guide/install.html), so feel free to pick the one you are most comfortable with.\n",
 97 |     "\n",
 98 |     "To get started with the Python module, open a terminal window in your preferred environment (ideally, create a fresh virtual one) and run the command `pip install -U label-studio`. Once installed, start the server with the command `label-studio`. This will automatically open a user interface on the default web browser on port 8080, accessible at `http://localhost:8080` if you are working on your local machine, unless another port is specified.\n",
 99 |     "\n",
100 |     "To proceed, follow these steps and visual explanations:\n",
101 |     "- Sign up with an email address and create a password (that these credentials are stored locally on the Label Studio server and can be whatever you prefer).\n",
102 |     "<img src=\"images/label_studio_1.png\" width=\"720\">\n",
103 |     "\n",
104 |     "- Create a new project.\n",
105 |     "<img src=\"images/label_studio_2.png\" width=\"720\">\n",
106 |     "\n",
107 |     "- Give it a title and optionally a brief description.\n",
108 |     "<img src=\"images/label_studio_3.png\" width=\"720\">\n",
109 |     "\n",
110 |     "- Drag and drop images to upload.\n",
111 |     "<img src=\"images/label_studio_4.png\" width=\"720\">\n",
112 |     "\n",
113 |     "- Select an object detection task with bounding boxes.\n",
114 |     "<img src=\"images/label_studio_5.png\" width=\"720\">\n",
115 |     "\n",
116 |     "- Set the class names.\n",
117 |     "<img src=\"images/label_studio_6.png\" width=\"720\">"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "markdown",
122 |    "metadata": {},
123 |    "source": [
124 |     "If you plan on tagging a significant amount of data, you will likely need to separate it into multiple chunks to avoid hitting the per-project memory limit.\n",
125 |     "\n",
126 |     "Once the previous steps are completed, you can start with the labeling process. From the project menu, click on `Label All Tasks` at the top.\n",
127 |     "\n",
128 |     "<img src=\"images/label_studio_7.png\" width=\"720\">\n",
129 |     "\n",
130 |     "Then, for every image, do the following operations:\n",
131 |     "- Select an appropriate class.\n",
132 |     "- Draw all the bounding boxes for that class.\n",
133 |     "- Repeat for other classes.\n",
134 |     "- Click `Submit`.\n",
135 |     "\n",
136 |     "<img src=\"images/label_studio_8.png\" width=\"720\">\n",
137 |     "\n",
138 |     "This will automatically load the next image until there are no images left. While labeling, you can stop at any time and when you resume, you will continue exactly where you left off.\n",
139 |     "\n",
140 |     "<img src=\"images/label_studio_9.png\" width=\"720\">"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "markdown",
145 |    "metadata": {},
146 |    "source": [
147 |     "As soon as you have completed the labeling activity, either because you have run out of images or because you are satisfied with how many you have, you can go back to the home page of the project, apply filters to the annotations, and export them by clicking on `Export`. Make sure to scroll down and select the YOLO format when you do so.\n",
148 |     "\n",
149 |     "<img src=\"images/label_studio_10.png\" width=\"720\">\n",
150 |     "\n",
151 |     "For more in-depth information and an additional visual explanation of the previous steps, explore this [dedicated tutorial](https://labelstud.io/blog/Quickly-Create-Datasets-for-Training-YOLO-Object-Detection.html) on how to label images for YOLO applications on the Label Studio blog."
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "markdown",
156 |    "metadata": {},
157 |    "source": [
158 |     "The exported data has a similar structure to this one by default, after unzipping the downloaded file:\n",
159 |     "```\n",
160 |     "project-1-at-2022-09-20-15-20-f6c05363.zip\n",
161 |     "    notes.json\n",
162 |     "    classes.txt\n",
163 |     "    labels\n",
164 |     "        image_filename1.txt\n",
165 |     "        image_filename2.txt\n",
166 |     "        image_filename3.txt\n",
167 |     "        ...\n",
168 |     "    images\n",
169 |     "        image_filename1.<ext>\n",
170 |     "        image_filename2.<ext>\n",
171 |     "        image_filename3.<ext>\n",
172 |     "        ...\n",
173 |     "```\n",
174 |     "<img src=\"images/label_studio_11.png\" width=\"720\">\n",
175 |     "\n",
176 |     "The TXT files in the `labels` folder are space-delimited files where each row corresponds to an object in the image with the same name in the `images` folder, in the standard YOLO format:\n",
177 |     "```\n",
178 |     "<target> <x-center> <y-center> <width> <height> <confidence>\n",
179 |     "```\n",
180 |     "<img src=\"images/yolo_label.png\" width=\"720\">\n",
181 |     "\n",
182 |     "where `<target>` is the zero-based integer index of the object class label from `classes.txt`, the bounding box coordinates are expressed as relative coordinates in `[0, 1] x [0, 1]`, and `<confidence>` is an optional detection confidence in `[0, 1]`, left blank by Label Studio."
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "markdown",
187 |    "metadata": {},
188 |    "source": [
189 |     "### Labeling with Yolo Mark\n",
190 |     "\n",
191 |     "Another popular data labeling tool is [Yolo Mark](https://github.com/AlexeyAB/Yolo_mark), a Windows and Linux GUI for marking bounded boxes of objects in images for training Yolo. Its use is not as straightforward as Label Studio, as it needs to be compiled from source and does not come with a Python module, but is still as an option to consider for a project.\n",
192 |     "\n",
193 |     "In order to use Yolo Mark, [download](https://github.com/AlexeyAB/Yolo_mark) the repository from GitHub and follow the instructions in the README file to get the executable program, depending on your operating system. Note that a working installation of [OpenCV](https://opencv.org/) is required to run the program successfully. If you are a Windows user you might consider a tool like [MS Visual Studio](https://visualstudio.microsoft.com/vs/) to compile the project, while for Linux users, you will just need to type the commands `cmake .` and then `make` after moving into the project directory.\n",
194 |     "\n",
195 |     "At this point, to use the tool to label your custom images, place them in the `x64/Release/data/img` directory, change the number of classes in `x64/Release/data/obj.data` as well as the class names in `x64/Release/data/obj.names`, and run `x64/Release/yolo_mark.cmd` on Windows or `./linux_mark.sh` on Linux to start labeling.\n",
196 |     "\n",
197 |     "<img src=\"images/yolo_mark.png\" width=\"720\">\n",
198 |     "\n",
199 |     "The resulting YOLO dataset in `x64/Release/data` will have the following structure:\n",
200 |     "```\n",
201 |     "data\n",
202 |     "    obj.data\n",
203 |     "    obj.names\n",
204 |     "    train.txt\n",
205 |     "    img\n",
206 |     "        image_filename1.<ext>\n",
207 |     "        image_filename1.txt\n",
208 |     "        image_filename2.<ext>\n",
209 |     "        image_filename2.txt\n",
210 |     "        image_filename3.<ext>\n",
211 |     "        image_filename3.txt\n",
212 |     "        ...\n",
213 |     "```            \n",
214 |     "with images and corresponding labels in the same folder, `obj.names` with the class names, and a `train.txt` file with the paths to the labeled images. The format of the TXT annotation files in the `img` folder is the same YOLO format as described before."
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "markdown",
219 |    "metadata": {
220 |     "tags": []
221 |    },
222 |    "source": [
223 |     "## Conversion to KITTI format\n",
224 |     "\n",
225 |     "Regardless of whether Label Studio or Yolo Mark was used, or a dataset already labeled in YOLO format was provided, conversion to KITTI format is required to experiment with the NVIDIA® TAO Toolkit in the next notebook. The KITTI format not only allows you to unleash the power of transfer learning and pre-trained models available within the TAO Toolkit but also is used to perform offline data augmentation and dramatically increase the size of the dataset.\n",
226 |     "\n",
227 |     "The KITTI format organizes the data directories of images and corresponding labels into a structure similar to Label Studio, namely:\n",
228 |     "```\n",
229 |     "dataset_dir\n",
230 |     "    data\n",
231 |     "        image_filename1.<ext>\n",
232 |     "        image_filename2.<ext>\n",
233 |     "        image_filename3.<ext>\n",
234 |     "        ...\n",
235 |     "    labels\n",
236 |     "        image_filename1.txt\n",
237 |     "        image_filename2.txt\n",
238 |     "        image_filename3.txt\n",
239 |     "        ...\n",
240 |     "```  \n",
241 |     "The main difference is that in the KITTI format the labels TXT files are space-delimited files where each row corresponds to an object and **the bounding box is stored using 15 (and optional 16th confidence) columns**. The meaning of each of the 15 required columns is described [here](https://docs.nvidia.com/tao/tao-toolkit/text/data_annotation_format.html#label-files). In particular, the first item is the object label and from the fifth to the eighth position we have the bounding box coordinates expressed in pixels **[x-top-left, y-top-left, x-bottom-right, y-bottom-right]**. Note that this is different from the YOLO format since we now use corners to identify the box and it is not resizing invariant.\n",
242 |     "\n",
243 |     "<img src=\"images/yolo_kitti.png\" width=\"720\">\n",
244 |     "\n",
245 |     "To perform the conversion between dataset formats, we will use [FiftyOne](https://voxel51.com/docs/fiftyone/), an open-source Python tool for handling computer vision datasets. FiftyOne allows loading a YOLO dataset and exporting it as KITTI in a few lines of code.\n",
246 |     "\n",
247 |     "### Load the dataset\n",
248 |     "\n",
249 |     "The generic `Dataset.from_dir()` method (documentation available [here](https://voxel51.com/docs/fiftyone/api/fiftyone.core.dataset.html#fiftyone.core.dataset.Dataset.from_dir)) loads a dataset from disk and depending on the format, additional parameters can be passed to customize the data import. When dealing with a YOLO data format like in our case, these parameters are inherited from the [YOLOv4DatasetImporter](https://voxel51.com/docs/fiftyone/api/fiftyone.utils.yolo.html#fiftyone.utils.yolo.YOLOv4DatasetImporter) class and a customized import would require the following arguments:\n",
250 |     "- `dataset_dir`: the dataset directory.\n",
251 |     "- `dataset_type`: the `fiftyone.types.dataset_types.Dataset` type of the dataset.\n",
252 |     "- `data_path`: to enable explicit control over the location of the media.\n",
253 |     "- `labels_path`: to enable explicit control over the location of the labels.\n",
254 |     "- `images_path`: to enable explicit control over the location of the image listing file.\n",
255 |     "- `objects_path`: to enable explicit control over the location of the object names file.\n",
256 |     "\n",
257 |     "If your data stored on disk is not in YOLO format but in one of the [many common formats](https://voxel51.com/docs/fiftyone/user_guide/dataset_creation/datasets.html#supported-import-formats) supported natively by FiftyOne, then you can automatically load your data with minimal code changes in terms of additional parameters.\n",
258 |     "\n",
259 |     "To install the FiftyOne Python module, run `pip install fiftyone` in your preferred environment (ideally, a virtual one). In this lab, we have already installed it for you.\n",
260 |     "\n",
261 |     "Let's now load a YOLO dataset generated with Label Studio into FiftyOne. In this case, we have an object names file but we don't have an image listing file, so we just ignore the `images_path` argument and let FiftyOne list the data directory for us."
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "execution_count": null,
267 |    "metadata": {},
268 |    "outputs": [],
269 |    "source": [
270 |     "import fiftyone as fo\n",
271 |     "\n",
272 |     "dataset_dir = \"../data/label-studio/\"\n",
273 |     "data_path = \"images/\"\n",
274 |     "labels_path = \"labels/\"\n",
275 |     "objects_path = \"classes.txt\"\n",
276 |     "\n",
277 |     "# Create the dataset\n",
278 |     "dataset = fo.Dataset.from_dir(\n",
279 |     "    dataset_dir=dataset_dir,\n",
280 |     "    data_path=data_path,\n",
281 |     "    labels_path=labels_path,\n",
282 |     "    objects_path=objects_path,\n",
283 |     "    dataset_type=fo.types.YOLOv4Dataset\n",
284 |     ")\n",
285 |     "\n",
286 |     "# View summary info about the dataset\n",
287 |     "print(dataset)\n",
288 |     "\n",
289 |     "# Print the first few samples in the dataset\n",
290 |     "print(dataset.head(2))"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "markdown",
295 |    "metadata": {},
296 |    "source": [
297 |     "Instead, if we were trying to load a dataset generated with Yolo Mark into FiftyOne, saved into a folder named `yolo-mark` that isn't available for the lab, images and labels would now be in the same folder and we would have both an object names file and an image listing file. However, the `train.txt` image listing file contains paths from the executable file directory and not from the dataset home directory, so FiftyOne will not find the images unless we substitute all paths with relative paths in the form `img/image_filename.<ext>`. We can do that with some simple code that generates a new `images.txt` file with the right paths.\n",
298 |     "```python\n",
299 |     "# Read the file\n",
300 |     "with open(\"../data/yolo-mark/train.txt\", \"r\") as file :\n",
301 |     "    filedata = file.read()\n",
302 |     "    \n",
303 |     "# Replace the target string\n",
304 |     "# On Linux\n",
305 |     "filedata = filedata.replace(\"x64/Release/data/img/\", \"img/\")\n",
306 |     "# On Windows\n",
307 |     "#filedata = filedata.replace(\"data/img/\", \"img/\")\n",
308 |     "\n",
309 |     "# Write the file out again\n",
310 |     "with open(\"../data/yolo-mark/images.txt\", \"w\") as file:\n",
311 |     "    file.write(filedata)\n",
312 |     "```    \n",
313 |     "\n",
314 |     "Alternatively, we can again ignore the `images_path` argument and let FiftyOne list all the data directory for us."
315 |    ]
316 |   },
317 |   {
318 |    "cell_type": "code",
319 |    "execution_count": null,
320 |    "metadata": {},
321 |    "outputs": [],
322 |    "source": [
323 |     "# If you use a dataset labeled with Yolo Mark, you will need a yolo-mark folder to run the code below to load it into FiftyOne\n",
324 |     "\n",
325 |     "# dataset_dir = \"../data/yolo-mark/\"\n",
326 |     "# data_path = \"img/\"\n",
327 |     "# images_path = \"images.txt\"\n",
328 |     "# objects_path = \"obj.names\"\n",
329 |     "\n",
330 |     "# Create the dataset\n",
331 |     "# dataset = fo.Dataset.from_dir(\n",
332 |     "#     dataset_dir=dataset_dir,\n",
333 |     "#     data_path=data_path,\n",
334 |     "#     images_path=images_path,\n",
335 |     "#     objects_path=objects_path,\n",
336 |     "#     dataset_type=fo.types.YOLOv4Dataset\n",
337 |     "# )\n",
338 |     "\n",
339 |     "# View summary info about the dataset\n",
340 |     "# print(dataset)\n",
341 |     "\n",
342 |     "# Print the first few samples in the dataset\n",
343 |     "# print(dataset.head(2))"
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "markdown",
348 |    "metadata": {},
349 |    "source": [
350 |     "### Export to KITTI\n",
351 |     "\n",
352 |     "Once the dataset is loaded into FiftyOne, conversion to KITTI format is immediate with an export command. The `Dataset.export()` method (documentation available [here](https://voxel51.com/docs/fiftyone/api/fiftyone.core.dataset.html#fiftyone.core.dataset.Dataset.export)) writes the samples to disk and a customized export to KITTI format would require the following arguments:\n",
353 |     "- `export_dir`: the dataset export directory.\n",
354 |     "- `dataset_type`: the `fiftyone.types.dataset_types.Dataset` type of the dataset.\n",
355 |     "- `data_path`: to enable explicit control over the location of the exported media.\n",
356 |     "- `labels_path`: to enable explicit control over the location of the exported labels.\n",
357 |     "\n",
358 |     "Providing only `export_dir` and `dataset_type` would result in an export of the content to a directory following the default layout for the specified format."
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "code",
363 |    "execution_count": null,
364 |    "metadata": {},
365 |    "outputs": [],
366 |    "source": [
367 |     "export_dir = \"../data/training/\"\n",
368 |     "data_path = \"image_2/\"\n",
369 |     "labels_path = \"label_2/\"\n",
370 |     "\n",
371 |     "# Export the dataset\n",
372 |     "dataset.export(\n",
373 |     "    export_dir=export_dir,\n",
374 |     "    data_path=data_path,\n",
375 |     "    labels_path=labels_path,\n",
376 |     "    dataset_type=fo.types.KITTIDetectionDataset\n",
377 |     ")"
378 |    ]
379 |   },
380 |   {
381 |    "cell_type": "markdown",
382 |    "metadata": {},
383 |    "source": [
384 |     "We can now view some images of our dataset before moving on to the next notebook."
385 |    ]
386 |   },
387 |   {
388 |    "cell_type": "code",
389 |    "execution_count": null,
390 |    "metadata": {},
391 |    "outputs": [],
392 |    "source": [
393 |     "# Simple grid visualizer\n",
394 |     "import matplotlib.pyplot as plt\n",
395 |     "import os\n",
396 |     "from math import ceil\n",
397 |     "valid_image_ext = ['.jpg', '.png', '.jpeg', '.ppm']\n",
398 |     "\n",
399 |     "def visualize_images(img_path, num_cols=4, num_images=10):\n",
400 |     "    num_rows = int(ceil(float(num_images) / float(num_cols)))\n",
401 |     "    f, axarr = plt.subplots(num_rows, num_cols, figsize=[80,30])\n",
402 |     "    f.tight_layout()\n",
403 |     "    a = [os.path.join(img_path, image) for image in os.listdir(img_path) \n",
404 |     "         if os.path.splitext(image)[1].lower() in valid_image_ext]\n",
405 |     "    for idx, img_path in enumerate(a[:num_images]):\n",
406 |     "        col_id = idx % num_cols\n",
407 |     "        row_id = idx // num_cols\n",
408 |     "        img = plt.imread(img_path)\n",
409 |     "        axarr[row_id, col_id].imshow(img) "
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "code",
414 |    "execution_count": null,
415 |    "metadata": {},
416 |    "outputs": [],
417 |    "source": [
418 |     "# Visualizing the sample images\n",
419 |     "IMG_PATH = '../data/training/image_2'\n",
420 |     "COLS = 3 # number of columns in the visualizer grid\n",
421 |     "IMAGES = 9 # number of images to visualize\n",
422 |     "\n",
423 |     "visualize_images(IMG_PATH, num_cols=COLS, num_images=IMAGES)"
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "markdown",
428 |    "metadata": {},
429 |    "source": [
430 |     "In this notebook, we have seen how to label a raw dataset and export it into KITTI format. Next, we will train an object detection model using the TAO Toolkit. Please go to the next notebook by clicking on the `Next Notebook` button below."
431 |    ]
432 |   },
433 |   {
434 |    "cell_type": "markdown",
435 |    "metadata": {},
436 |    "source": [
437 |     "***\n",
438 |     "\n",
439 |     "## Licensing\n",
440 |     "\n",
441 |     "Copyright © 2022 OpenACC-Standard.org. This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0). These materials include references to hardware and software developed by other entities; all applicable licensing and copyrights apply."
442 |    ]
443 |   },
444 |   {
445 |    "cell_type": "markdown",
446 |    "metadata": {},
447 |    "source": [
448 |     "<br>\n",
449 |     "<div>\n",
450 |     "    <span style=\"float: left; width: 52%; text-align: right;\">\n",
451 |     "        <a >1</a>\n",
452 |     "        <a href=\"2.Object_detection_using_TAO_YOLOv4.ipynb\">2</a>\n",
453 |     "        <a href=\"3.Model_deployment_with_Triton_Inference_Server.ipynb\">3</a>\n",
454 |     "        <a href=\"4.Model_deployment_with_DeepStream.ipynb\">4</a>\n",
455 |     "        <a href=\"5.Measure_object_size_using_OpenCV.ipynb\">5</a>\n",
456 |     "        <a href=\"6.Challenge_DeepStream.ipynb\">6</a>\n",
457 |     "        <a href=\"7.Challenge_Triton.ipynb\">7</a>\n",
458 |     "    </span>\n",
459 |     "    <span style=\"float: left; width: 48%; text-align: right;\"><a href=\"2.Object_detection_using_TAO_YOLOv4.ipynb\">Next Notebook</a></span>\n",
460 |     "</div>\n",
461 |     "\n",
462 |     "<br>\n",
463 |     "<p> <center> <a href=\"../Start_here.ipynb\">Home Page</a> </center> </p>"
464 |    ]
465 |   }
466 |  ],
467 |  "metadata": {
468 |   "kernelspec": {
469 |    "display_name": "Python 3",
470 |    "language": "python",
471 |    "name": "python3"
472 |   },
473 |   "language_info": {
474 |    "codemirror_mode": {
475 |     "name": "ipython",
476 |     "version": 3
477 |    },
478 |    "file_extension": ".py",
479 |    "mimetype": "text/x-python",
480 |    "name": "python",
481 |    "nbconvert_exporter": "python",
482 |    "pygments_lexer": "ipython3",
483 |    "version": "3.8.8"
484 |   },
485 |   "toc-autonumbering": false,
486 |   "toc-showcode": false,
487 |   "toc-showmarkdowntxt": false
488 |  },
489 |  "nbformat": 4,
490 |  "nbformat_minor": 4
491 | }
492 | 


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/6.Challenge_DeepStream.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<p> <center> <a href=\"../Start_here.ipynb\">Home Page</a> </center> </p>\n",
  8 |     "\n",
  9 |     "<div>\n",
 10 |     "    <span style=\"float: left; width: 33%; text-align: left;\"><a href=\"5.Measure_object_size_using_OpenCV.ipynb\">Previous Notebook</a></span>\n",
 11 |     "    <span style=\"float: left; width: 34%; text-align: center;\">\n",
 12 |     "        <a href=\"1.Data_labeling_and_preprocessing.ipynb\">1</a>\n",
 13 |     "        <a href=\"2.Object_detection_using_TAO_YOLOv4.ipynb\">2</a>\n",
 14 |     "        <a href=\"3.Model_deployment_with_Triton_Inference_Server.ipynb\">3</a>\n",
 15 |     "        <a href=\"4.Model_deployment_with_DeepStream.ipynb\">4</a>\n",
 16 |     "        <a href=\"5.Measure_object_size_using_OpenCV.ipynb\">5</a>\n",
 17 |     "        <a >6</a>\n",
 18 |     "        <a href=\"7.Challenge_Triton.ipynb\">7</a>\n",
 19 |     "    </span>\n",
 20 |     "    <span style=\"float: left; width: 33%; text-align: right;\"><a href=\"7.Challenge_Triton.ipynb\">Next Notebook</a></span>\n",
 21 |     "</div>"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "# Exercise: model deployment with DeepStream\n",
 29 |     "\n",
 30 |     "***"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "In this notebook, you will review the concepts learned in [4.Model_deployment_with_DeepStream.ipynb](4.Model_deployment_with_DeepStream.ipynb) while trying to deploy your NVIDIA® TAO Toolkit model to DeepStream SDK using Python bindings.\n",
 38 |     "\n",
 39 |     "As an exercise, you are asked to re-implement the same 6-class object detection pipeline with a tracker that has been analyzed in the tutorial notebook. Here are the illustrations of the pipeline: remember that the secondary classifiers (highlighted in gray) are not to be implemented.\n",
 40 |     "\n",
 41 |     "<img src=\"images/test2.png\" width=\"1080\">\n",
 42 |     "\n",
 43 |     "Let us get started with the notebook. You will have to fill in the `COMPLETE THIS SECTION` parts of the code present in the notebook to complete the pipeline. Feel free to refer to the previous notebooks for the commands but make sure to grasp the most important underlying concepts.\n",
 44 |     "\n",
 45 |     "## Building the pipeline"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "# Import required libraries\n",
 55 |     "import sys\n",
 56 |     "sys.path.append(\"../source_code/N4\")\n",
 57 |     "from bus_call import bus_call\n",
 58 |     "import gi\n",
 59 |     "gi.require_version('Gst', '1.0')\n",
 60 |     "from gi.repository import GLib, Gst\n",
 61 |     "import configparser\n",
 62 |     "import pyds\n",
 63 |     "import time\n",
 64 |     "\n",
 65 |     "############# ~~~~~~~ COMPLETE THIS SECTION ~~~~~~~ #############\n",
 66 |     "# Define class labels\n",
 67 |     "PGIE_CLASS_ID_FRESHAPPLE = 0\n",
 68 |     "#\n",
 69 |     "#\n",
 70 |     "#\n",
 71 |     "#\n",
 72 |     "#\n",
 73 |     "###################### ~~~~~~~ END ~~~~~~~ ######################\n",
 74 |     "\n",
 75 |     "# Define input/output video files\n",
 76 |     "INPUT_VIDEO_NAME  = \"../source_code/N4/apples.h264\" # Source: https://depositphotos.com\n",
 77 |     "OUTPUT_VIDEO_NAME = \"../source_code/challenge_deepstream/ds_out.mp4\"\n",
 78 |     "\n",
 79 |     "import os\n",
 80 |     "if not os.path.exists(\"../source_code/challenge_deepstream\"):\n",
 81 |     "    !mkdir ../source_code/challenge_deepstream"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "metadata": {},
 87 |    "source": [
 88 |     "First, we define a function `make_elm_or_print_err()` to create our elements and report any errors if the creation fails. Elements are created using the `Gst.ElementFactory.make()` function as part of Gstreamer library."
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "# Make element or print error and any other detail\n",
 98 |     "def make_elm_or_print_err(factoryname, name, printedname, detail=\"\"):\n",
 99 |     "    print(\"Creating\", printedname)\n",
100 |     "    elm = Gst.ElementFactory.make(factoryname, name)\n",
101 |     "    if not elm:\n",
102 |     "        sys.stderr.write(\"Unable to create \" + printedname + \" \\n\")\n",
103 |     "    if detail:\n",
104 |     "        sys.stderr.write(detail)\n",
105 |     "    return elm"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "markdown",
110 |    "metadata": {},
111 |    "source": [
112 |     "Next, we initialize GStreamer and create an empty pipeline."
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": null,
118 |    "metadata": {},
119 |    "outputs": [],
120 |    "source": [
121 |     "# Standard GStreamer initialization\n",
122 |     "Gst.init(None)\n",
123 |     "\n",
124 |     "# Create gstreamer elements\n",
125 |     "# Create Pipeline element that will form a connection of other elements\n",
126 |     "print(\"Creating Pipeline \\n\")\n",
127 |     "pipeline = Gst.Pipeline()\n",
128 |     "\n",
129 |     "if not pipeline:\n",
130 |     "    sys.stderr.write(\" Unable to create Pipeline \\n\")"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "markdown",
135 |    "metadata": {},
136 |    "source": [
137 |     "Then, we create the elements that are required for our pipeline."
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": null,
143 |    "metadata": {},
144 |    "outputs": [],
145 |    "source": [
146 |     "########### Create elements required for the Pipeline ###########\n",
147 |     "# Source element for reading from the file\n",
148 |     "source = make_elm_or_print_err(\"filesrc\", \"file-source\", \"Source\")\n",
149 |     "# Since the data format in the input file is elementary h264 stream, we need a h264parser\n",
150 |     "h264parser = make_elm_or_print_err(\"h264parse\", \"h264-parser\", \"h264 parse\")\n",
151 |     "# Use nvdec_h264 for hardware accelerated decode on GPU\n",
152 |     "decoder = make_elm_or_print_err(\"nvv4l2decoder\", \"nvv4l2-decoder\", \"Nvv4l2 Decoder\")\n",
153 |     "# Create nvstreammux instance to form batches from one or more sources\n",
154 |     "streammux = make_elm_or_print_err(\"nvstreammux\", \"Stream-muxer\", \"NvStreamMux\")\n",
155 |     "# Use nvinfer to run inferencing on decoder's output, behavior of inferencing is set through config file\n",
156 |     "pgie = make_elm_or_print_err(\"nvinfer\", \"primary-inference\", \"pgie\")\n",
157 |     "# Use nvtracker to give objects unique-ids\n",
158 |     "tracker = make_elm_or_print_err(\"nvtracker\", \"tracker\", \"tracker\")\n",
159 |     "# Use convertor to convert from NV12 to RGBA as required by nvosd\n",
160 |     "nvvidconv = make_elm_or_print_err(\"nvvideoconvert\", \"convertor\", \"nvvidconv\")\n",
161 |     "# Create OSD to draw on the converted RGBA buffer\n",
162 |     "nvosd = make_elm_or_print_err(\"nvdsosd\", \"onscreendisplay\", \"nvosd\")\n",
163 |     "# Finally encode and save the osd output\n",
164 |     "queue = make_elm_or_print_err(\"queue\", \"queue\", \"Queue\")\n",
165 |     "# Use convertor to convert from NV12 to RGBA as required by nvosd\n",
166 |     "nvvidconv2 = make_elm_or_print_err(\"nvvideoconvert\", \"convertor2\", \"nvvidconv2\")\n",
167 |     "# Place an encoder instead of OSD to save as video file\n",
168 |     "encoder = make_elm_or_print_err(\"avenc_mpeg4\", \"encoder\", \"Encoder\")\n",
169 |     "# Parse output from Encoder\n",
170 |     "codeparser = make_elm_or_print_err(\"mpeg4videoparse\", \"mpeg4-parser\", \"Code Parser\")\n",
171 |     "# Create a container\n",
172 |     "container = make_elm_or_print_err(\"qtmux\", \"qtmux\", \"Container\")\n",
173 |     "# Create Sink for storing the output\n",
174 |     "sink = make_elm_or_print_err(\"filesink\", \"filesink\", \"Sink\")"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "markdown",
179 |    "metadata": {},
180 |    "source": [
181 |     "Now that we have created the elements, we can proceed to set various properties for our pipeline.\n",
182 |     "\n",
183 |     "## Understanding the configuration files\n",
184 |     "\n",
185 |     "We'll resuse the `pgie` configuration file that was examined in the previous notebook. If you haven't already set your API key in the configuration file [here](../source_code/N4/pgie_yolov4_tao_config.txt) in the field `tlt-model-key`, please go ahead and do so, then save the file with `ctrl s`. Not setting the key makes it impossible to decrypt the model and successfully run this notebook."
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "code",
190 |    "execution_count": null,
191 |    "metadata": {},
192 |    "outputs": [],
193 |    "source": [
194 |     "print(\"Please replace the tlt-model-key variable with your key.\")"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": null,
200 |    "metadata": {},
201 |    "outputs": [],
202 |    "source": [
203 |     "!cat ../source_code/N4/pgie_yolov4_tao_config.txt"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "markdown",
208 |    "metadata": {},
209 |    "source": [
210 |     "We can also visualize the configuration file for our nvtracker (tracking plugin) named `dstest2_tracker_config.txt`. The configuration file is parsed and properties are then set for the tracker."
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": null,
216 |    "metadata": {},
217 |    "outputs": [],
218 |    "source": [
219 |     "!cat ../source_code/N4/dstest2_tracker_config.txt"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "markdown",
224 |    "metadata": {},
225 |    "source": [
226 |     "In the next cell, we set the properties for the elements of our pipeline, including but not limited to the contents of the two configuration files."
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "code",
231 |    "execution_count": null,
232 |    "metadata": {},
233 |    "outputs": [],
234 |    "source": [
235 |     "############ Set properties for the Elements ############\n",
236 |     "print(\"Playing file \", INPUT_VIDEO_NAME)\n",
237 |     "# Set Input File Name \n",
238 |     "source.set_property(\"location\", INPUT_VIDEO_NAME)\n",
239 |     "# Set Input Width, Height and Batch Size \n",
240 |     "streammux.set_property(\"width\", 1920)\n",
241 |     "streammux.set_property(\"height\", 1080)\n",
242 |     "streammux.set_property(\"batch-size\", 1)\n",
243 |     "# Timeout in microseconds to wait after the first buffer is available \n",
244 |     "# to push the batch even if a complete batch is not formed.\n",
245 |     "streammux.set_property(\"batched-push-timeout\", 4000000)\n",
246 |     "# Set Congifuration file for nvinfer \n",
247 |     "pgie.set_property(\"config-file-path\", \"../source_code/N4/pgie_yolov4_tao_config.txt\")\n",
248 |     "#Set properties of tracker from tracker_config\n",
249 |     "config = configparser.ConfigParser()\n",
250 |     "config.read(\"../source_code/N4/dstest2_tracker_config.txt\")\n",
251 |     "config.sections()\n",
252 |     "for key in config['tracker']:\n",
253 |     "    if key == 'tracker-width' :\n",
254 |     "        tracker_width = config.getint('tracker', key)\n",
255 |     "        tracker.set_property('tracker-width', tracker_width)\n",
256 |     "    if key == 'tracker-height' :\n",
257 |     "        tracker_height = config.getint('tracker', key)\n",
258 |     "        tracker.set_property('tracker-height', tracker_height)\n",
259 |     "    if key == 'gpu-id' :\n",
260 |     "        tracker_gpu_id = config.getint('tracker', key)\n",
261 |     "        tracker.set_property('gpu_id', tracker_gpu_id)\n",
262 |     "    if key == 'll-lib-file' :\n",
263 |     "        tracker_ll_lib_file = config.get('tracker', key)\n",
264 |     "        tracker.set_property('ll-lib-file', tracker_ll_lib_file)\n",
265 |     "    if key == 'll-config-file' :\n",
266 |     "        tracker_ll_config_file = config.get('tracker', key)\n",
267 |     "        tracker.set_property('ll-config-file', tracker_ll_config_file)\n",
268 |     "    if key == 'enable-batch-process' :\n",
269 |     "        tracker_enable_batch_process = config.getint('tracker', key)\n",
270 |     "        tracker.set_property('enable_batch_process', tracker_enable_batch_process)\n",
271 |     "# Set Encoder bitrate for output video\n",
272 |     "encoder.set_property(\"bitrate\", 2000000)\n",
273 |     "# Set Output file name and disable sync and async\n",
274 |     "sink.set_property(\"location\", OUTPUT_VIDEO_NAME)\n",
275 |     "sink.set_property(\"sync\", 0)\n",
276 |     "sink.set_property(\"async\", 0)"
277 |    ]
278 |   },
279 |   {
280 |    "cell_type": "markdown",
281 |    "metadata": {},
282 |    "source": [
283 |     "We now link all the elements in the order we prefer and create Gstreamer bus to feed all messages through it."
284 |    ]
285 |   },
286 |   {
287 |    "cell_type": "code",
288 |    "execution_count": null,
289 |    "metadata": {},
290 |    "outputs": [],
291 |    "source": [
292 |     "########## Add and Link Elements in the Pipeline ##########\n",
293 |     "\n",
294 |     "print(\"Adding elements to Pipeline \\n\")\n",
295 |     "\n",
296 |     "############# ~~~~~~~ COMPLETE THIS SECTION ~~~~~~~ #############\n",
297 |     "# Adding elements to the pipeline\n",
298 |     "pipeline.add(source)\n",
299 |     "pipeline.add(h264parser)\n",
300 |     "#\n",
301 |     "#\n",
302 |     "#\n",
303 |     "#\n",
304 |     "#\n",
305 |     "#\n",
306 |     "#\n",
307 |     "#\n",
308 |     "#\n",
309 |     "#\n",
310 |     "#\n",
311 |     "#\n",
312 |     "###################### ~~~~~~~ END ~~~~~~~ ######################\n",
313 |     "\n",
314 |     "# We now  link the elements together \n",
315 |     "# file-source -> h264-parser -> nvh264-decoder -> nvinfer -> nvvidconv ->\n",
316 |     "# queue -> nvvidconv2 -> encoder -> parser -> container -> sink -> output-file\n",
317 |     "print(\"Linking elements in the Pipeline \\n\")\n",
318 |     "source.link(h264parser)\n",
319 |     "h264parser.link(decoder)\n",
320 |     "\n",
321 |     "##### Creating Sink pad and source pads and linking them together \n",
322 |     "\n",
323 |     "# Create Sinkpad to Streammux \n",
324 |     "sinkpad = streammux.get_request_pad(\"sink_0\")\n",
325 |     "if not sinkpad:\n",
326 |     "    sys.stderr.write(\" Unable to get the sink pad of streammux \\n\")\n",
327 |     "# Create source pad from Decoder   \n",
328 |     "srcpad = decoder.get_static_pad(\"src\")\n",
329 |     "if not srcpad:\n",
330 |     "    sys.stderr.write(\" Unable to get source pad of decoder \\n\")\n",
331 |     "\n",
332 |     "############# ~~~~~~~ COMPLETE THIS SECTION ~~~~~~~ #############\n",
333 |     "# Link the elements\n",
334 |     "srcpad.link(sinkpad)\n",
335 |     "streammux.link(pgie)\n",
336 |     "#\n",
337 |     "#\n",
338 |     "#\n",
339 |     "#\n",
340 |     "#\n",
341 |     "#\n",
342 |     "#\n",
343 |     "#\n",
344 |     "#\n",
345 |     "###################### ~~~~~~~ END ~~~~~~~ ######################"
346 |    ]
347 |   },
348 |   {
349 |    "cell_type": "markdown",
350 |    "metadata": {},
351 |    "source": [
352 |     "Now we create an event loop and feed GStreamer bus messages to it."
353 |    ]
354 |   },
355 |   {
356 |    "cell_type": "code",
357 |    "execution_count": null,
358 |    "metadata": {},
359 |    "outputs": [],
360 |    "source": [
361 |     "loop = GLib.MainLoop()\n",
362 |     "bus = pipeline.get_bus()\n",
363 |     "bus.add_signal_watch()\n",
364 |     "bus.connect (\"message\", bus_call, loop)"
365 |    ]
366 |   },
367 |   {
368 |    "cell_type": "markdown",
369 |    "metadata": {},
370 |    "source": [
371 |     "## Working with the metadata \n",
372 |     "\n",
373 |     "Our pipeline now carries the metadata forward but does nothing with it up to this moment. As mentioned in the above pipeline diagram, we will now create a callback function to display relevant data on the frame once it is called and create a sink pad in the `nvosd` element to call the function."
374 |    ]
375 |   },
376 |   {
377 |    "cell_type": "code",
378 |    "execution_count": null,
379 |    "metadata": {},
380 |    "outputs": [],
381 |    "source": [
382 |     "############## Working with the Metadata ################\n",
383 |     "\n",
384 |     "def osd_sink_pad_buffer_probe(pad, info, u_data):\n",
385 |     "    \n",
386 |     "    ############# ~~~~~~~ COMPLETE THIS SECTION ~~~~~~~ #############\n",
387 |     "    # Intiallizing object counter with 0\n",
388 |     "    obj_counter = {\n",
389 |     "        PGIE_CLASS_ID_FRESHAPPLE:0,\n",
390 |     "        #\n",
391 |     "        #\n",
392 |     "        #\n",
393 |     "        #\n",
394 |     "        #\n",
395 |     "    }\n",
396 |     "    ###################### ~~~~~~~ END ~~~~~~~ ######################\n",
397 |     "    \n",
398 |     "    # Colors of the bounding boxes in RGBA\n",
399 |     "    obj_colors = {\n",
400 |     "        PGIE_CLASS_ID_FRESHAPPLE:(1.0, 0.0, 0.0, 0.0),\n",
401 |     "        PGIE_CLASS_ID_FRESHBANANA:(0.0, 1.0, 0.0, 0.0),\n",
402 |     "        PGIE_CLASS_ID_FRESHORANGE:(0.0, 0.0, 1.0, 0.0),\n",
403 |     "        PGIE_CLASS_ID_ROTTENAPPLE:(0.0, 1.0, 1.0, 0.0),\n",
404 |     "        PGIE_CLASS_ID_ROTTENBANANA:(1.0, 0.0, 1.0, 0.0),\n",
405 |     "        PGIE_CLASS_ID_ROTTENORANGE:(1.0, 1.0, 0.0, 0.0)\n",
406 |     "    }\n",
407 |     "    # Set frame_number & rectangles to draw as 0 \n",
408 |     "    frame_number=0\n",
409 |     "    num_rects=0\n",
410 |     "    \n",
411 |     "    gst_buffer = info.get_buffer()\n",
412 |     "    if not gst_buffer:\n",
413 |     "        print(\"Unable to get GstBuffer \")\n",
414 |     "        return\n",
415 |     "\n",
416 |     "    # Retrieve batch metadata from the gst_buffer\n",
417 |     "    # Note that pyds.gst_buffer_get_nvds_batch_meta() expects the\n",
418 |     "    # C address of gst_buffer as input, which is obtained with hash(gst_buffer)\n",
419 |     "    batch_meta = pyds.gst_buffer_get_nvds_batch_meta(hash(gst_buffer))\n",
420 |     "    l_frame = batch_meta.frame_meta_list\n",
421 |     "    \n",
422 |     "    while l_frame is not None:\n",
423 |     "        try:\n",
424 |     "            # Note that l_frame.data needs a cast to pyds.NvDsFrameMeta\n",
425 |     "            frame_meta = pyds.NvDsFrameMeta.cast(l_frame.data)\n",
426 |     "        except StopIteration:\n",
427 |     "            break\n",
428 |     "        \n",
429 |     "        # Get frame number, number of rectangles to draw and object metadata\n",
430 |     "        frame_number=frame_meta.frame_num\n",
431 |     "        num_rects = frame_meta.num_obj_meta\n",
432 |     "        l_obj=frame_meta.obj_meta_list\n",
433 |     "        \n",
434 |     "        while l_obj is not None:\n",
435 |     "            try:\n",
436 |     "                # Casting l_obj.data to pyds.NvDsObjectMeta\n",
437 |     "                obj_meta=pyds.NvDsObjectMeta.cast(l_obj.data)\n",
438 |     "            except StopIteration:\n",
439 |     "                break\n",
440 |     "            # Increment object class by 1 and set box border color  \n",
441 |     "            obj_counter[obj_meta.class_id] += 1\n",
442 |     "            r, g, b, a = obj_colors[obj_meta.class_id]\n",
443 |     "            obj_meta.rect_params.border_color.set(r, g, b, a)\n",
444 |     "            try: \n",
445 |     "                l_obj=l_obj.next\n",
446 |     "            except StopIteration:\n",
447 |     "                break\n",
448 |     "        ################## Setting Metadata Display configruation ############### \n",
449 |     "        # Acquiring a display meta object\n",
450 |     "        display_meta=pyds.nvds_acquire_display_meta_from_pool(batch_meta)\n",
451 |     "        display_meta.num_labels = 1\n",
452 |     "        py_nvosd_text_params = display_meta.text_params[0]\n",
453 |     "        # Setting display text to be shown on screen\n",
454 |     "        py_nvosd_text_params.display_text = \"Frame Number={} Number of Objects={} Freshapple_count={} Freshbanana_count={} \" \\\n",
455 |     "            \"Freshorange_count={} Rottenapple_count={} Rottenbanana_count={} Rottenorange_count={}\".format(frame_number, num_rects, \n",
456 |     "            obj_counter[PGIE_CLASS_ID_FRESHAPPLE], obj_counter[PGIE_CLASS_ID_FRESHBANANA], obj_counter[PGIE_CLASS_ID_FRESHORANGE], \n",
457 |     "            obj_counter[PGIE_CLASS_ID_ROTTENAPPLE], obj_counter[PGIE_CLASS_ID_ROTTENBANANA], obj_counter[PGIE_CLASS_ID_ROTTENORANGE])\n",
458 |     "        \n",
459 |     "        # Now set the offsets where the string should appear\n",
460 |     "        py_nvosd_text_params.x_offset = 10\n",
461 |     "        py_nvosd_text_params.y_offset = 12\n",
462 |     "        # Font, font-color and font-size\n",
463 |     "        py_nvosd_text_params.font_params.font_name = \"Serif\"\n",
464 |     "        py_nvosd_text_params.font_params.font_size = 14\n",
465 |     "        # Set(red, green, blue, alpha); Set to White\n",
466 |     "        py_nvosd_text_params.font_params.font_color.set(1.0, 1.0, 1.0, 1.0)\n",
467 |     "        # Text background color\n",
468 |     "        py_nvosd_text_params.set_bg_clr = 1\n",
469 |     "        # Set(red, green, blue, alpha); set to Black\n",
470 |     "        py_nvosd_text_params.text_bg_clr.set(0.0, 0.0, 0.0, 1.0)\n",
471 |     "        # Using pyds.get_string() to get display_text as string to print in notebook\n",
472 |     "        print(pyds.get_string(py_nvosd_text_params.display_text))\n",
473 |     "        pyds.nvds_add_display_meta_to_frame(frame_meta, display_meta)\n",
474 |     "        \n",
475 |     "        ############################################################################\n",
476 |     "        \n",
477 |     "        try:\n",
478 |     "            l_frame=l_frame.next\n",
479 |     "        except StopIteration:\n",
480 |     "            break\n",
481 |     "    return Gst.PadProbeReturn.OK"
482 |    ]
483 |   },
484 |   {
485 |    "cell_type": "markdown",
486 |    "metadata": {},
487 |    "source": [
488 |     "Here we add the probe to get informed of the meta data generated. We add probe to the sink pad of the osd element, since by that time, the buffer would have got all the metadata."
489 |    ]
490 |   },
491 |   {
492 |    "cell_type": "code",
493 |    "execution_count": null,
494 |    "metadata": {},
495 |    "outputs": [],
496 |    "source": [
497 |     "osdsinkpad = nvosd.get_static_pad(\"sink\")\n",
498 |     "if not osdsinkpad:\n",
499 |     "    sys.stderr.write(\" Unable to get sink pad of nvosd \\n\")\n",
500 |     "    \n",
501 |     "osdsinkpad.add_probe(Gst.PadProbeType.BUFFER, osd_sink_pad_buffer_probe, 0)"
502 |    ]
503 |   },
504 |   {
505 |    "cell_type": "markdown",
506 |    "metadata": {},
507 |    "source": [
508 |     "## Run the pipeline\n",
509 |     "\n",
510 |     "Now with everything defined, we can start the playback and listen to the events."
511 |    ]
512 |   },
513 |   {
514 |    "cell_type": "code",
515 |    "execution_count": null,
516 |    "metadata": {},
517 |    "outputs": [],
518 |    "source": [
519 |     "# start play back and listen to events\n",
520 |     "print(\"Starting pipeline \\n\")\n",
521 |     "start_time = time.time()\n",
522 |     "pipeline.set_state(Gst.State.PLAYING)\n",
523 |     "try:\n",
524 |     "    loop.run()\n",
525 |     "except:\n",
526 |     "    pass\n",
527 |     "# cleanup\n",
528 |     "pipeline.set_state(Gst.State.NULL)\n",
529 |     "print(\"--- %s seconds ---\" % (time.time() - start_time))"
530 |    ]
531 |   },
532 |   {
533 |    "cell_type": "markdown",
534 |    "metadata": {},
535 |    "source": [
536 |     "With the next cell, we convert the video profile to be compatible with Jupyter notebook."
537 |    ]
538 |   },
539 |   {
540 |    "cell_type": "code",
541 |    "execution_count": null,
542 |    "metadata": {},
543 |    "outputs": [],
544 |    "source": [
545 |     "!ffmpeg -loglevel panic -y -an -i ../source_code/challenge_deepstream/ds_out.mp4 -vcodec libx264 -pix_fmt yuv420p -profile:v baseline -level 3 ../source_code/challenge_deepstream/output.mp4"
546 |    ]
547 |   },
548 |   {
549 |    "cell_type": "code",
550 |    "execution_count": null,
551 |    "metadata": {},
552 |    "outputs": [],
553 |    "source": [
554 |     "# Display the output\n",
555 |     "from IPython.display import HTML\n",
556 |     "HTML(\"\"\"\n",
557 |     " <video width=\"640\" height=\"480\" controls>\n",
558 |     " <source src=\"../source_code/challenge_deepstream/output.mp4\"\n",
559 |     " </video>\n",
560 |     "\"\"\".format())"
561 |    ]
562 |   },
563 |   {
564 |    "cell_type": "markdown",
565 |    "metadata": {},
566 |    "source": [
567 |     "In this notebook, you have reviewed the deployment with DeepStream. In the next one, you will practice deployment with NVIDIA Triton™ Inference Server. You will need to reactivate the Triton environment and the server container in order to complete the second challenge notebook. Please check the `README` file on how to do so."
568 |    ]
569 |   },
570 |   {
571 |    "cell_type": "markdown",
572 |    "metadata": {},
573 |    "source": [
574 |     "***\n",
575 |     "\n",
576 |     "## Licensing\n",
577 |     "\n",
578 |     "Copyright © 2022 OpenACC-Standard.org. This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0). These materials include references to hardware and software developed by other entities; all applicable licensing and copyrights apply."
579 |    ]
580 |   },
581 |   {
582 |    "cell_type": "markdown",
583 |    "metadata": {},
584 |    "source": [
585 |     "<br>\n",
586 |     "<div>\n",
587 |     "    <span style=\"float: left; width: 33%; text-align: left;\"><a href=\"5.Measure_object_size_using_OpenCV.ipynb\">Previous Notebook</a></span>\n",
588 |     "    <span style=\"float: left; width: 34%; text-align: center;\">\n",
589 |     "        <a href=\"1.Data_labeling_and_preprocessing.ipynb\">1</a>\n",
590 |     "        <a href=\"2.Object_detection_using_TAO_YOLOv4.ipynb\">2</a>\n",
591 |     "        <a href=\"3.Model_deployment_with_Triton_Inference_Server.ipynb\">3</a>\n",
592 |     "        <a href=\"4.Model_deployment_with_DeepStream.ipynb\">4</a>\n",
593 |     "        <a href=\"5.Measure_object_size_using_OpenCV.ipynb\">5</a>\n",
594 |     "        <a >6</a>\n",
595 |     "        <a href=\"7.Challenge_Triton.ipynb\">7</a>\n",
596 |     "    </span>\n",
597 |     "    <span style=\"float: left; width: 33%; text-align: right;\"><a href=\"7.Challenge_Triton.ipynb\">Next Notebook</a></span>\n",
598 |     "</div>\n",
599 |     "\n",
600 |     "<br>\n",
601 |     "<p> <center> <a href=\"../Start_here.ipynb\">Home Page</a> </center> </p>"
602 |    ]
603 |   }
604 |  ],
605 |  "metadata": {
606 |   "kernelspec": {
607 |    "display_name": "Python 3 (ipykernel)",
608 |    "language": "python",
609 |    "name": "python3"
610 |   },
611 |   "language_info": {
612 |    "codemirror_mode": {
613 |     "name": "ipython",
614 |     "version": 3
615 |    },
616 |    "file_extension": ".py",
617 |    "mimetype": "text/x-python",
618 |    "name": "python",
619 |    "nbconvert_exporter": "python",
620 |    "pygments_lexer": "ipython3",
621 |    "version": "3.9.12"
622 |   }
623 |  },
624 |  "nbformat": 4,
625 |  "nbformat_minor": 4
626 | }
627 | 


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/architecture.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/architecture.jpg


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/augmenting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/augmenting.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/deepstream_overview.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/deepstream_overview.jpg


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/deepstream_python_bindings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/deepstream_python_bindings.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/ds_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/ds_overview.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/ds_workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/ds_workflow.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/dstream_deploy_options.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/dstream_deploy_options.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/git_branching.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/git_branching.jpg


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/label_studio_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/label_studio_1.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/label_studio_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/label_studio_10.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/label_studio_11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/label_studio_11.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/label_studio_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/label_studio_2.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/label_studio_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/label_studio_3.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/label_studio_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/label_studio_4.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/label_studio_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/label_studio_5.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/label_studio_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/label_studio_6.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/label_studio_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/label_studio_7.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/label_studio_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/label_studio_8.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/label_studio_9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/label_studio_9.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/ngc_key.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/ngc_key.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/ngc_setup_key.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/ngc_setup_key.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/nvinfer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/nvinfer.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/nvosd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/nvosd.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/nvtracker.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/nvtracker.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/nvvidconv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/nvvidconv.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/pads.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/pads.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/pipeline.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/prep_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/prep_pipeline.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/pruned_vs_unpruned.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/pruned_vs_unpruned.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/res_mario.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/res_mario.jpg


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/tao_cv_qat_workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/tao_cv_qat_workflow.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/tao_deepstream.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/tao_deepstream.jpeg


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/tao_tf_user_interaction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/tao_tf_user_interaction.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/tao_toolkit.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/tao_toolkit.jpeg


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/test2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/test2.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/triton_inference_server.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/triton_inference_server.jpg


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/yolo_kitti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/yolo_kitti.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/yolo_label.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/yolo_label.png


--------------------------------------------------------------------------------
/workspace/jupyter_notebook/images/yolo_mark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openhackathons-org/End-to-End-Computer-Vision/5bb9bde35279f1ff87317c3610fbb5656cd48fd5/workspace/jupyter_notebook/images/yolo_mark.png


--------------------------------------------------------------------------------
/workspace/source_code/N2/generate_val_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-2020, NVIDIA CORPORATION.  All rights reserved.
 2 | 
 3 | """Script to generate val dataset for SSD/DSSD tutorial."""
 4 | 
 5 | from __future__ import absolute_import
 6 | from __future__ import division
 7 | from __future__ import print_function
 8 | 
 9 | import argparse
10 | import os
11 | 
12 | 
13 | def parse_args(args=None):
14 |     """parse the arguments."""
15 |     parser = argparse.ArgumentParser(description='Generate val dataset for SSD/DSSD tutorial')
16 | 
17 |     parser.add_argument(
18 |         "--input_image_dir",
19 |         type=str,
20 |         required=True,
21 |         help="Input directory to KITTI training dataset images."
22 |     )
23 | 
24 |     parser.add_argument(
25 |         "--input_label_dir",
26 |         type=str,
27 |         required=True,
28 |         help="Input directory to KITTI training dataset labels."
29 |     )
30 | 
31 |     parser.add_argument(
32 |         "--output_dir",
33 |         type=str,
34 |         required=True,
35 |         help="Ouput directory to TLT val dataset."
36 |     )
37 | 
38 |     parser.add_argument(
39 |         "--val_split",
40 |         type=int,
41 |         required=False,
42 |         default=10,
43 |         help="Percentage of training dataset for generating val dataset"
44 |     )
45 | 
46 |     return parser.parse_args(args)
47 | 
48 | 
49 | def main(args=None):
50 |     """Main function for data preparation."""
51 | 
52 |     args = parse_args(args)
53 | 
54 |     img_files = []
55 |     for file_name in os.listdir(args.input_image_dir):
56 |         if file_name.split(".")[-1] == "png":
57 |             img_files.append(file_name)
58 | 
59 |     total_cnt = len(img_files)
60 |     val_ratio = float(args.val_split) / 100.0
61 |     val_cnt = int(total_cnt * val_ratio)
62 |     train_cnt = total_cnt - val_cnt
63 |     val_img_list = img_files[0:val_cnt]
64 | 
65 |     target_img_path = os.path.join(args.output_dir, "image")
66 |     target_label_path = os.path.join(args.output_dir, "label")
67 | 
68 |     if not os.path.exists(target_img_path):
69 |         os.makedirs(target_img_path)
70 |     else:
71 |         print("This script will not run as output image path already exists.")
72 |         return
73 | 
74 |     if not os.path.exists(target_label_path):
75 |         os.makedirs(target_label_path)
76 |     else:
77 |         print("This script will not run as output label path already exists.")
78 |         return
79 | 
80 |     print("Total {} samples in KITTI training dataset".format(total_cnt))
81 |     print("{} for train and {} for val".format(train_cnt, val_cnt))
82 | 
83 |     for img_name in val_img_list:
84 |         label_name = img_name.split(".")[0] + ".txt"
85 |         os.rename(os.path.join(args.input_image_dir, img_name),
86 |                   os.path.join(target_img_path, img_name))
87 |         os.rename(os.path.join(args.input_label_dir, label_name),
88 |                   os.path.join(target_label_path, label_name))
89 | 
90 | 
91 | if __name__ == "__main__":
92 |     main()
93 | 


--------------------------------------------------------------------------------
/workspace/source_code/N3/frame.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
  2 | # 
  3 | # Permission is hereby granted, free of charge, to any person obtaining
  4 | # a copy of this software and associated documentation files (the
  5 | # "Software"), to deal in the Software without restriction, including
  6 | # without limitation the rights to use, copy, modify, merge, publish,
  7 | # distribute, sublicense, and/or sell copies of the Software, and to
  8 | # permit persons to whom the Software is furnished to do so, subject to
  9 | # the following conditions:
 10 | # 
 11 | # The above copyright notice and this permission notice shall be
 12 | # included in all copies or substantial portions of the Software.
 13 | # 
 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 18 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 19 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 20 | # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 21 | 
 22 | import os
 23 | 
 24 | from PIL import Image
 25 | import numpy as np
 26 | 
 27 | import tritonclient.grpc.model_config_pb2 as mc
 28 | from preprocess_input import preprocess_input
 29 | 
 30 | class Frame(object):
 31 |     """Data structure to contain an image."""
 32 | 
 33 |     def __init__(self, image_path, data_format, dtype, target_shape):
 34 |         """Instantiate a frame object."""
 35 |         self._image_path = image_path
 36 |         if data_format not in [mc.ModelInput.FORMAT_NCHW, mc.ModelInput.FORMAT_NHWC]:
 37 |             raise NotImplementedError(
 38 |                 "Data format not in the supported data format: {}".format(data_format)
 39 |             )
 40 |         self.data_format = data_format
 41 |         self.height = None
 42 |         self.width = None
 43 |         self.dtype = dtype
 44 |         assert len(target_shape) == 3, (
 45 |             "3 dimensions are required for input definitions. Got {}".format(len(target_shape))
 46 |         )
 47 |         if self.data_format == mc.ModelInput.FORMAT_NCHW:
 48 |             self.c, self.h, self.w = target_shape
 49 |         else:
 50 |             self.h, self.w, self.c = target_shape
 51 |         assert self.c in [1, 3], (
 52 |             "Number of channels should be 1 or 3. Got {}".format(self.c))
 53 |         self.target_shape = target_shape
 54 | 
 55 |         self.model_img_mode = 'RGB' if self.c == 3 else 'L'
 56 |         self.keep_aspect_ratio = True
 57 |         self.img_mean = [103.939, 116.779, 123.68]
 58 | 
 59 |     def load_image(self):
 60 |         """Load the image defined."""
 61 |         if not os.path.exists(self._image_path):
 62 |             raise NotFoundError("Cannot find image at {}".format(self._image_path))
 63 |         image = Image.open(self._image_path)
 64 |         self.width, self.height = image.size
 65 | 
 66 |         if self.c == 1:
 67 |             image = image.convert("L")
 68 |         else:
 69 |             image = image.convert("RGB")
 70 |         return image
 71 | 
 72 |     def as_numpy(self, image):
 73 |         """Return a numpy array."""
 74 |         image = image.resize((self.w, self.h), Image.ANTIALIAS)
 75 |         nparray = np.asarray(image).astype(self.dtype)
 76 |         if nparray.ndim == 2:
 77 |             nparray = nparray[:, :, np.newaxis]
 78 |         if self.data_format == mc.ModelInput.FORMAT_NCHW:
 79 |             nparray = np.transpose(nparray, (2, 0, 1))
 80 |         return nparray
 81 | 
 82 |     def _load_img(self):
 83 |         """load an image and returns the original image and a numpy array for model to consume.
 84 | 
 85 |         Args:
 86 |             img_path (str): path to an image
 87 |         Returns:
 88 |             img (PIL.Image): PIL image of original image.
 89 |             ratio (float): resize ratio of original image over processed image
 90 |             inference_input (array): numpy array for processed image
 91 |         """
 92 |         img = Image.open(self._image_path)
 93 |         orig_w, orig_h = img.size
 94 |         ratio = min(self.w/float(orig_w), self.h/float(orig_h))
 95 | 
 96 |         # do not change aspect ratio
 97 |         new_w = int(round(orig_w*ratio))
 98 |         new_h = int(round(orig_h*ratio))
 99 | 
100 |         if self.keep_aspect_ratio:
101 |             im = img.resize((new_w, new_h), Image.ANTIALIAS)
102 |         else:
103 |             im = img.resize((self.w, self.h), Image.ANTIALIAS)
104 | 
105 |         if im.mode in ('RGBA', 'LA') or \
106 |                 (im.mode == 'P' and 'transparency' in im.info) and \
107 |                 self.model_img_mode == 'L' :
108 | 
109 |             # Need to convert to RGBA if LA format due to a bug in PIL
110 |             im = im.convert('RGBA')
111 |             inf_img = Image.new("RGBA", (self.w, self.h))
112 |             inf_img.paste(im, (0, 0))
113 |             inf_img = inf_img.convert(self.model_img_mode)
114 |         else:
115 |             inf_img = Image.new(
116 |                 self.model_img_mode,
117 |                 (self.w, self.h)
118 |             )
119 |             inf_img.paste(im, (0, 0))
120 | 
121 |         inf_img = np.array(inf_img).astype(np.float32)
122 |         if self.model_img_mode == 'L':
123 |             inf_img = np.expand_dims(inf_img, axis=2)
124 |             inference_input = inf_img.transpose(2, 0, 1) - 117.3786
125 |         else:
126 |             inference_input = preprocess_input(inf_img.transpose(2, 0, 1),
127 |                                                img_mean=self.img_mean)
128 | 
129 |         return inference_input
130 | 
131 |     def _load_img_maskrcnn(self):
132 |         """load an image and returns the original image and a numpy array for model to consume.
133 | 
134 |         Args:
135 |             img_path (str): path to an image
136 |         Returns:
137 |             img (PIL.Image): PIL image of original image.
138 |             ratio (float): resize ratio of original image over processed image
139 |             inference_input (array): numpy array for processed image
140 |         """
141 | 
142 |         img = Image.open(self._image_path)
143 |         orig_w, orig_h = img.size
144 |         ratio = min(self.w/float(orig_w), self.h/float(orig_h))
145 | 
146 |         # do not change aspect ratio
147 |         new_w = int(round(orig_w*ratio))
148 |         new_h = int(round(orig_h*ratio))
149 | 
150 |         if self.keep_aspect_ratio:
151 |             im = img.resize((new_w, new_h), Image.ANTIALIAS)
152 |         else:
153 |             im = img.resize((self.w, self.h), Image.ANTIALIAS)
154 | 
155 |         if im.mode in ('RGBA', 'LA') or \
156 |                 (im.mode == 'P' and 'transparency' in im.info) and \
157 |                 self.model_img_mode == 'L' :
158 | 
159 |             # Need to convert to RGBA if LA format due to a bug in PIL
160 |             im = im.convert('RGBA')
161 |             inf_img = Image.new("RGBA", (self.w, self.h))
162 |             inf_img.paste(im, (0, 0))
163 |             inf_img = inf_img.convert(self.model_img_mode)
164 |         else:
165 |             inf_img = Image.new(
166 |                 self.model_img_mode,
167 |                 (self.w, self.h)
168 |             )
169 |             inf_img.paste(im, (0, 0))
170 | 
171 |         inf_img = np.array(inf_img).astype(np.float32)
172 |         if self.model_img_mode == 'L':
173 |             inf_img = np.expand_dims(inf_img, axis=2)
174 |             inference_input = inf_img.transpose(2, 0, 1) - 117.3786
175 |         else:
176 |             inference_input = preprocess_input(inf_img.transpose(2, 0, 1), mode="torch")
177 | 
178 |         return inference_input
179 | 
180 |     def _load_img_multitask_classification(self):
181 |         """load an image and returns the original image and a numpy array for model to consume.
182 | 
183 |         Args:
184 |             img_path (str): path to an image
185 |         Returns:
186 |             img (PIL.Image): PIL image of original image.
187 |             inference_input (array): numpy array for processed image
188 |         """
189 | 
190 |         img = Image.open(self._image_path)
191 |         image = img.resize((self.w, self.h), Image.ANTIALIAS).convert('RGB')
192 |         inference_input = preprocess_input(np.array(image).astype(np.float32).transpose(2, 0, 1))
193 | 
194 |         return inference_input
195 | 


--------------------------------------------------------------------------------
/workspace/source_code/N3/postprocessor.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | # 
 3 | # Permission is hereby granted, free of charge, to any person obtaining
 4 | # a copy of this software and associated documentation files (the
 5 | # "Software"), to deal in the Software without restriction, including
 6 | # without limitation the rights to use, copy, modify, merge, publish,
 7 | # distribute, sublicense, and/or sell copies of the Software, and to
 8 | # permit persons to whom the Software is furnished to do so, subject to
 9 | # the following conditions:
10 | # 
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | # 
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 
22 | """Simple class to run post processing of Triton Inference outputs."""
23 | 
24 | import os
25 | 
26 | class Postprocessor(object):
27 |     """Class to run post processing of Triton Tensors."""
28 | 
29 |     def __init__(self, batch_size, frames, output_path, data_format):
30 |         """Initialize a post processor class.
31 | 
32 |         Args:
33 |             batch_size (int): Number of images in the batch.
34 |             frames (list): List of images.
35 |             output_path (str): Unix path to the output rendered images and labels.
36 |             data_format (str): Order of the input model dimensions.
37 |                 "channels_first": CHW order.
38 |                 "channels_last": HWC order.        
39 |         """
40 |         self.batch_size = batch_size
41 |         self.frames = frames
42 |         self.output_path = output_path
43 |         self.data_format = data_format
44 |         if not os.path.exists(self.output_path):
45 |             os.makedirs(self.output_path)
46 |         self.initialized = True
47 | 
48 |     def apply(self, output_tensors, this_id, render=True):
49 |         """Apply the post processor to the outputs."""
50 |         raise NotImplementedError("Base class doesn't implement any post-processing")
51 | 


--------------------------------------------------------------------------------
/workspace/source_code/N3/preprocess_input.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
  2 | # 
  3 | # Permission is hereby granted, free of charge, to any person obtaining
  4 | # a copy of this software and associated documentation files (the
  5 | # "Software"), to deal in the Software without restriction, including
  6 | # without limitation the rights to use, copy, modify, merge, publish,
  7 | # distribute, sublicense, and/or sell copies of the Software, and to
  8 | # permit persons to whom the Software is furnished to do so, subject to
  9 | # the following conditions:
 10 | # 
 11 | # The above copyright notice and this permission notice shall be
 12 | # included in all copies or substantial portions of the Software.
 13 | # 
 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 18 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 19 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 20 | # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 21 | 
 22 | """Utilities for ImageNet data preprocessing & prediction decoding."""
 23 | 
 24 | from __future__ import absolute_import
 25 | from __future__ import division
 26 | from __future__ import print_function
 27 | 
 28 | import logging
 29 | import numpy as np
 30 | logger = logging.getLogger(__name__)
 31 | 
 32 | 
 33 | def _preprocess_numpy_input(x, data_format, mode, color_mode, img_mean, **kwargs):
 34 |     """Preprocesses a Numpy array encoding a batch of images.
 35 | 
 36 |     # Arguments
 37 |         x: Input array, 3D or 4D.
 38 |         data_format: Data format of the image array.
 39 |         mode: One of "caffe", "tf" or "torch".
 40 |             - caffe: will convert the images from RGB to BGR,
 41 |                 then will zero-center each color channel with
 42 |                 respect to the ImageNet dataset,
 43 |                 without scaling.
 44 |             - tf: will scale pixels between -1 and 1,
 45 |                 sample-wise.
 46 |             - torch: will scale pixels between 0 and 1 and then
 47 |                 will normalize each channel with respect to the
 48 |                 ImageNet dataset.
 49 | 
 50 |     # Returns
 51 |         Preprocessed Numpy array.
 52 |     """
 53 |     if not issubclass(x.dtype.type, np.floating):
 54 |         #x = x.astype(backend.floatx(), copy=False)
 55 |         x = x.astype(float32, copy=False)
 56 | 
 57 |     if mode == 'tf':
 58 |         if img_mean and len(img_mean) > 0:
 59 |             logger.debug("image_mean is ignored in tf mode.")
 60 |         x /= 127.5
 61 |         x -= 1.
 62 |         return x
 63 | 
 64 |     if mode == 'torch':
 65 |         if img_mean and len(img_mean) > 0:
 66 |             logger.debug("image_mean is ignored in torch mode.")
 67 |         x /= 255.
 68 |         if color_mode == "rgb":
 69 |             mean = [0.485, 0.456, 0.406]
 70 |             std = [0.224, 0.224, 0.224]
 71 |         elif color_mode == "grayscale":
 72 |             mean = [0.449]
 73 |             std = [0.224]
 74 |         else:
 75 |             raise NotImplementedError("Invalid color mode: {}".format(color_mode))
 76 |     else:
 77 |         if color_mode == "rgb":
 78 |             if data_format == 'channels_first':
 79 |                 # 'RGB'->'BGR'
 80 |                 if x.ndim == 3:
 81 |                     x = x[::-1, ...]
 82 |                 else:
 83 |                     x = x[:, ::-1, ...]
 84 |             else:
 85 |                 # 'RGB'->'BGR'
 86 |                 x = x[..., ::-1]
 87 |             if not img_mean:
 88 |                 mean = [103.939, 116.779, 123.68]
 89 |             else:
 90 |                 assert len(img_mean) == 3, "image_mean must be a list of 3 values \
 91 |                     for RGB input."
 92 |                 mean = img_mean
 93 |             std = None
 94 |         else:
 95 |             if not img_mean:
 96 |                 mean = [117.3786]
 97 |             else:
 98 |                 assert len(img_mean) == 1, "image_mean must be a list of a single value \
 99 |                     for gray image input."
100 |                 mean = img_mean
101 |             std = None
102 | 
103 |     # Zero-center by mean pixel
104 |     if data_format == 'channels_first':
105 |         for idx in range(len(mean)):
106 |             if x.ndim == 3:
107 |                 x[idx, :, :] -= mean[idx]
108 |                 if std is not None:
109 |                     x[idx, :, :] /= std[idx]
110 |             else:
111 |                 x[:, idx, :, :] -= mean[idx]
112 |                 if std is not None:
113 |                     x[:, idx, :, :] /= std[idx]
114 |     else:
115 |         for idx in range(len(mean)):
116 |             x[..., idx] -= mean[idx]
117 |             if std is not None:
118 |                 x[..., idx] /= std[idx]
119 |     return x
120 | 
121 | 
122 | 
123 | def preprocess_input(x, data_format=None, mode='caffe', color_mode="rgb", img_mean=None, **kwargs):
124 |     """Preprocesses a tensor or Numpy array encoding a batch of images.
125 | 
126 |     # Arguments
127 |         x: Input Numpy or symbolic tensor, 3D or 4D.
128 |             The preprocessed data is written over the input data
129 |             if the data types are compatible. To avoid this
130 |             behaviour, `numpy.copy(x)` can be used.
131 |         data_format: Data format of the image tensor/array.
132 |         mode: One of "caffe", "tf" or "torch".
133 |             - caffe: will convert the images from RGB to BGR,
134 |                 then will zero-center each color channel with
135 |                 respect to the ImageNet dataset,
136 |                 without scaling.
137 |             - tf: will scale pixels between -1 and 1,
138 |                 sample-wise.
139 |             - torch: will scale pixels between 0 and 1 and then
140 |                 will normalize each channel with respect to the
141 |                 ImageNet dataset.
142 | 
143 |     # Returns
144 |         Preprocessed tensor or Numpy array.
145 | 
146 |     # Raises
147 |         ValueError: In case of unknown `data_format` argument.
148 |     """
149 |     data_format = "channels_first"
150 | 
151 |     return _preprocess_numpy_input(x, data_format=data_format,
152 |                                        mode=mode, color_mode=color_mode,
153 |                                        img_mean=img_mean, **kwargs)
154 | 


--------------------------------------------------------------------------------
/workspace/source_code/N3/triton_model.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
  2 | # 
  3 | # Permission is hereby granted, free of charge, to any person obtaining
  4 | # a copy of this software and associated documentation files (the
  5 | # "Software"), to deal in the Software without restriction, including
  6 | # without limitation the rights to use, copy, modify, merge, publish,
  7 | # distribute, sublicense, and/or sell copies of the Software, and to
  8 | # permit persons to whom the Software is furnished to do so, subject to
  9 | # the following conditions:
 10 | # 
 11 | # The above copyright notice and this permission notice shall be
 12 | # included in all copies or substantial portions of the Software.
 13 | # 
 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 18 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 19 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 20 | # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 21 | 
 22 | """Triton inference client for TAO Toolkit model."""
 23 | 
 24 | 
 25 | import tritonclient.grpc.model_config_pb2 as mc
 26 | import numpy as np
 27 | 
 28 | 
 29 | class TritonModel(object):
 30 |     """Simple class to run model inference using Triton client."""
 31 | 
 32 |     def __init__(self, max_batch_size, input_names, output_names,
 33 |                  channels, height, width, data_format, triton_dtype):
 34 |         """Set up a detectnet_v2 triton model instance.
 35 |         
 36 |         Args:
 37 |             max_batch_size(int): The maximum batch size of the TensorRT engine.
 38 |             input_names (str): List of the input node names
 39 |             output_names (str): List of the output node names
 40 |             channels (int): Number of chanels in the input dimensions
 41 |             height (int): Height of the input
 42 |             width (int): Width of the input
 43 |             data_format (str): The input dimension order. This can be "channels_first"
 44 |                 or "channels_last". "channels_first" is in the CHW order,
 45 |                 and "channels_last" is in HWC order.
 46 |             triton_dtype (proto): Triton input data type.
 47 |             channel_mode (str): String order of the C dimension of the input.
 48 |                 "RGB" or "BGR"
 49 |                 
 50 |         Returns:
 51 |             An instance of the DetectnetModel.
 52 |         """
 53 |         self.max_batch_size = max_batch_size
 54 |         self.input_names = input_names
 55 |         self.output_names = output_names
 56 |         self.c = channels
 57 |         assert channels in [1, 3], (
 58 |             "TAO Toolkit models only support 1 or 3 channel inputs."
 59 |         )
 60 |         self.h = height
 61 |         self.w = width
 62 |         self.data_format = data_format
 63 |         self.triton_dtype = triton_dtype
 64 |         self.scale = 1
 65 |         if channels == 3:
 66 |             self.mean = [0., 0., 0.]
 67 |         else:
 68 |             self.mean = [0]
 69 |         self.mean = np.asarray(self.mean).astype(np.float32)
 70 |         if self.data_format == mc.ModelInput.FORMAT_NCHW:
 71 |             self.mean = self.mean[:, np.newaxis, np.newaxis]
 72 | 
 73 |     @staticmethod
 74 |     def parse_model(model_metadata, model_config):
 75 |         """Simple class to parse model metadata and model config."""
 76 |         raise NotImplementedError("Base class doesn't implement this method.")
 77 | 
 78 |     @classmethod
 79 |     def from_metadata(cls, model_metadata, model_config):
 80 |         """Parse a model from the metadata config."""
 81 |         parsed_outputs = cls.parse_model(model_metadata, model_config)
 82 |         max_batch_size, input_names, output_names, channels, height, width, \
 83 |             data_format, triton_dtype = parsed_outputs
 84 |         return cls(
 85 |             max_batch_size, input_names, output_names,
 86 |             channels, height, width, data_format,
 87 |             triton_dtype
 88 |         )
 89 | 
 90 |     def get_config(self):
 91 |         """Get dictionary config."""
 92 |         config_dict = {
 93 |             "data_format": self.data_format,
 94 |             "max_batch_size": self.max_batch_size,
 95 |             "channels": self.c,
 96 |             "width": self.w,
 97 |             "height": self.h,
 98 |             "input_names": self.input_names,
 99 |             "output_names": self.output_names,
100 |             "triton_dtype": self.triton_dtype
101 |         }
102 |         return config_dict
103 | 
104 |     def preprocess(self, image):
105 |         """Function to preprocess image
106 | 
107 |         Performs mean subtraction and then normalization.
108 | 
109 |         Args:
110 |             image (np.ndarray): Numpy ndarray of an input batch.
111 | 
112 |         Returns:
113 |             image (np.ndarray): Preprocessed input image.
114 |         """
115 |         image = (image - self.mean) * self.scale
116 |         return image
117 | 


--------------------------------------------------------------------------------
/workspace/source_code/N3/user_data.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | # 
 3 | # Permission is hereby granted, free of charge, to any person obtaining
 4 | # a copy of this software and associated documentation files (the
 5 | # "Software"), to deal in the Software without restriction, including
 6 | # without limitation the rights to use, copy, modify, merge, publish,
 7 | # distribute, sublicense, and/or sell copies of the Software, and to
 8 | # permit persons to whom the Software is furnished to do so, subject to
 9 | # the following conditions:
10 | # 
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | # 
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 
22 | """User data requests."""
23 | 
24 | import sys
25 | 
26 | if sys.version_info >= (3, 0):
27 |     import queue
28 | else:
29 |     import Queue as queue
30 | 
31 | class UserData:
32 |     """Data structure to gather queued requests."""
33 | 
34 |     def __init__(self):
35 |         self._completed_requests = queue.Queue()
36 | 


--------------------------------------------------------------------------------
/workspace/source_code/N3/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | # 
 3 | # Permission is hereby granted, free of charge, to any person obtaining
 4 | # a copy of this software and associated documentation files (the
 5 | # "Software"), to deal in the Software without restriction, including
 6 | # without limitation the rights to use, copy, modify, merge, publish,
 7 | # distribute, sublicense, and/or sell copies of the Software, and to
 8 | # permit persons to whom the Software is furnished to do so, subject to
 9 | # the following conditions:
10 | # 
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | # 
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 
22 | 
23 | from attrdict import AttrDict
24 | 
25 | import tritonclient.grpc as grpcclient
26 | import tritonclient.http as httpclient
27 | 
28 | 
29 | def completion_callback(user_data, result, error):
30 |     """Callback function used for async_stream_infer()."""
31 |     user_data._completed_requests.put((result, error))
32 | 
33 | 
34 | def convert_http_metadata_config(_metadata, _config):
35 |     """Convert to the http metadata to class Dict."""
36 |     _model_metadata = AttrDict(_metadata)
37 |     _model_config = AttrDict(_config)
38 | 
39 |     return _model_metadata, _model_config
40 | 
41 | 
42 | def requestGenerator(batched_image_data, input_name, output_name, dtype, protocol,
43 |                      num_classes=0):
44 |     """Generator for triton inference requests.
45 | 
46 |     Args:
47 |         batch_image_data (np.ndarray): Numpy array of a batch of images.
48 |         input_name (str): Name of the input array
49 |         output_name (list(str)): Name of the model outputs
50 |         dtype: Tensor data type for Triton
51 |         protocol (str): The protocol used to communicated between the Triton
52 |             server and TAO Toolkit client.
53 |         num_classes (int): The number of classes in the network.
54 | 
55 |     Yields:
56 |         inputs
57 |         outputs
58 |         made_name (str): Name of the triton model
59 |         model_version (int): Version number
60 |     """
61 |     if protocol == "grpc":
62 |         client = grpcclient
63 |     else:
64 |         client = httpclient
65 | 
66 |     # Set the input data
67 |     inputs = [client.InferInput(input_name, batched_image_data.shape, dtype)]
68 |     inputs[0].set_data_from_numpy(batched_image_data)
69 | 
70 |     outputs = [
71 |         client.InferRequestedOutput(
72 |             out_name, class_count=num_classes
73 |         ) for out_name in output_name
74 |     ]
75 | 
76 |     yield inputs, outputs
77 | 


--------------------------------------------------------------------------------
/workspace/source_code/N3/yolov4_model.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
  2 | # 
  3 | # Permission is hereby granted, free of charge, to any person obtaining
  4 | # a copy of this software and associated documentation files (the
  5 | # "Software"), to deal in the Software without restriction, including
  6 | # without limitation the rights to use, copy, modify, merge, publish,
  7 | # distribute, sublicense, and/or sell copies of the Software, and to
  8 | # permit persons to whom the Software is furnished to do so, subject to
  9 | # the following conditions:
 10 | # 
 11 | # The above copyright notice and this permission notice shall be
 12 | # included in all copies or substantial portions of the Software.
 13 | # 
 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 18 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 19 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 20 | # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 21 | 
 22 | """Triton inference client for TAO Toolkit model."""
 23 | 
 24 | 
 25 | import tritonclient.grpc.model_config_pb2 as mc
 26 | from triton_model import TritonModel
 27 | 
 28 | CHANNEL_MODES = ["rgb", "bgr", "l"]
 29 | 
 30 | 
 31 | class YOLOv4Model(TritonModel):
 32 |     """Simple class to run model inference using Triton client."""
 33 | 
 34 |     def __init__(self, max_batch_size, input_names, output_names,
 35 |                  channels, height, width, data_format,
 36 |                  triton_dtype, channel_mode="RGB"):
 37 |         """Set up a yolov4 triton model instance.
 38 |         
 39 |         Args:
 40 |             max_batch_size(int): The maximum batch size of the TensorRT engine.
 41 |             input_names (str): List of the input node names
 42 |             output_names (str): List of the output node names
 43 |             channels (int): Number of chanels in the input dimensions
 44 |             height (int): Height of the input
 45 |             width (int): Width of the input
 46 |             data_format (str): The input dimension order. This can be "channels_first"
 47 |                 or "channels_last". "channels_first" is in the CHW order,
 48 |                 and "channels_last" is in HWC order.
 49 |             triton_dtype (proto): Triton input data type.
 50 |             channel_mode (str): String order of the C dimension of the input.
 51 |                 "RGB" or "BGR"
 52 |                 
 53 |         Returns:
 54 |             An instance of the YOLOv4Model.
 55 |         """
 56 |         super().__init__(max_batch_size, input_names, output_names,
 57 |                          channels, height, width, data_format,
 58 |                          triton_dtype)
 59 |         self.scale = 1.0
 60 | 
 61 |     @staticmethod
 62 |     def parse_model(model_metadata, model_config):
 63 |         """Parse model metadata and model config from the triton server."""
 64 |         if len(model_metadata.inputs) != 1:
 65 |             raise Exception("expecting 1 input, got {}".format(
 66 |                 len(model_metadata.inputs)))
 67 | 
 68 |         if len(model_metadata.outputs) != 4:
 69 |             raise Exception("expecting 4 output, got {}".format(
 70 |                 len(model_metadata.outputs)))
 71 | 
 72 |         if len(model_config.input) != 1:
 73 |             raise Exception(
 74 |                 "expecting 1 input in model configuration, got {}".format(
 75 |                     len(model_config.input)))
 76 | 
 77 |         if len(model_config.output) != 4:
 78 |             raise Exception(
 79 |                 "expecting 2 input in model configuration, got {}".format(
 80 |                     len(model_config.input)))
 81 | 
 82 |         input_metadata = model_metadata.inputs[0]
 83 |         input_config = model_config.input[0]
 84 |         output_metadata = model_metadata.outputs
 85 | 
 86 | 
 87 |         for _, data in enumerate(output_metadata):
 88 |             if _ == 0 :
 89 |                 if data.datatype != "INT32":
 90 |                     raise Exception("expecting output datatype to be INT32, model '" +
 91 |                                 data.name + "' output type is " +
 92 |                                 data.datatype)
 93 |             if _ != 0 :
 94 |                 if data.datatype != "FP32":
 95 |                     raise Exception("expecting output datatype to be FP32, model '" +
 96 |                                 data.name + "' output type is " +
 97 |                                 data.datatype)
 98 | 
 99 |         # Model input must have 3 dims, either CHW or HWC (not counting
100 |         # the batch dimension), either CHW or HWC
101 |         input_batch_dim = (model_config.max_batch_size > 0)
102 |         expected_input_dims = 3 + (1 if input_batch_dim else 0)
103 |         if len(input_metadata.shape) != expected_input_dims:
104 |             raise Exception(
105 |                 "expecting input to have {} dimensions, model '{}' input has {}".
106 |                 format(expected_input_dims, model_metadata.name,
107 |                     len(input_metadata.shape)))
108 | 
109 |         if type(input_config.format) == str:
110 |             FORMAT_ENUM_TO_INT = dict(mc.ModelInput.Format.items())
111 |             input_config.format = FORMAT_ENUM_TO_INT[input_config.format]
112 | 
113 |         if ((input_config.format != mc.ModelInput.FORMAT_NCHW) and
114 |             (input_config.format != mc.ModelInput.FORMAT_NHWC)):
115 |             raise Exception("unexpected input format " +
116 |                             mc.ModelInput.Format.Name(input_config.format) +
117 |                             ", expecting " +
118 |                             mc.ModelInput.Format.Name(mc.ModelInput.FORMAT_NCHW) +
119 |                             " or " +
120 |                             mc.ModelInput.Format.Name(mc.ModelInput.FORMAT_NHWC))
121 | 
122 |         if input_config.format == mc.ModelInput.FORMAT_NHWC:
123 |             h = input_metadata.shape[1 if input_batch_dim else 0]
124 |             w = input_metadata.shape[2 if input_batch_dim else 1]
125 |             c = input_metadata.shape[3 if input_batch_dim else 2]
126 |         else:
127 |             c = input_metadata.shape[1 if input_batch_dim else 0]
128 |             h = input_metadata.shape[2 if input_batch_dim else 1]
129 |             w = input_metadata.shape[3 if input_batch_dim else 2]
130 | 
131 |         print(model_config.max_batch_size, input_metadata.name,
132 |                 [data.name for data in output_metadata], c, h, w, input_config.format,
133 |                 input_metadata.datatype)
134 | 
135 |         return (model_config.max_batch_size, input_metadata.name,
136 |                 [data.name for data in output_metadata], c, h, w, input_config.format,
137 |                 input_metadata.datatype)
138 | 


--------------------------------------------------------------------------------
/workspace/source_code/N3/yolov4_postprocessor.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
  2 | # 
  3 | # Permission is hereby granted, free of charge, to any person obtaining
  4 | # a copy of this software and associated documentation files (the
  5 | # "Software"), to deal in the Software without restriction, including
  6 | # without limitation the rights to use, copy, modify, merge, publish,
  7 | # distribute, sublicense, and/or sell copies of the Software, and to
  8 | # permit persons to whom the Software is furnished to do so, subject to
  9 | # the following conditions:
 10 | # 
 11 | # The above copyright notice and this permission notice shall be
 12 | # included in all copies or substantial portions of the Software.
 13 | # 
 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 18 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 19 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 20 | # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 21 | 
 22 | """Simple class to run post processing of Triton Inference outputs."""
 23 | 
 24 | import os
 25 | import numpy as np
 26 | from PIL import Image, ImageDraw
 27 | 
 28 | from postprocessor import Postprocessor
 29 | 
 30 |     
 31 | def trt_output_process_fn(y_encoded):
 32 |     "function to process TRT model output."
 33 |     keep_k, boxes, scores, cls_id = y_encoded
 34 |     result = []
 35 |     for idx, k in enumerate(keep_k.reshape(-1)):
 36 |         mul = np.array([640,
 37 |                         384,
 38 |                         640,
 39 |                         384])
 40 |         loc = boxes[idx].reshape(-1, 4)[:k] * mul
 41 |         cid = cls_id[idx].reshape(-1, 1)[:k]
 42 |         conf = scores[idx].reshape(-1, 1)[:k]
 43 |         result.append(np.concatenate((cid, conf, loc), axis=-1))
 44 |     return result
 45 | 
 46 | 
 47 | class YOLOv4Postprocessor(Postprocessor):
 48 |     """Class to run post processing of Triton Tensors."""
 49 | 
 50 |     def __init__(self, batch_size, frames, output_path, data_format):
 51 |         """Initialize a post processor class for a yolov4 model.
 52 |         
 53 |         Args:
 54 |             batch_size (int): Number of images in the batch.
 55 |             frames (list): List of images.
 56 |             output_path (str): Unix path to the output rendered images and labels.
 57 |             data_format (str): Order of the input model dimensions.
 58 |                 "channels_first": CHW order.
 59 |                 "channels_last": HWC order.
 60 |         """
 61 |         super().__init__(batch_size, frames, output_path, data_format)
 62 |         self.output_names = ["BatchedNMS",
 63 |                              "BatchedNMS_1",
 64 |                              "BatchedNMS_2",
 65 |                              "BatchedNMS_3"]
 66 |         self.threshold = 0.6
 67 |         self.keep_aspect_ratio = True
 68 |         self.class_mapping = {0: 'freshapple', 1: 'freshbanana', 2: 'freshorange', 3: 'rottenapple', 4: 'rottenbanana', 5: 'rottenorange'}
 69 | 
 70 |     def _get_bbox_and_kitti_label_single_img(
 71 |         self, img, img_ratio, y_decoded,
 72 |         is_draw_img, is_kitti_export
 73 |     ):
 74 |         """helper function to draw bbox on original img and get kitti label on single image.
 75 | 
 76 |         Note: img will be modified in-place.
 77 |         """
 78 |         kitti_txt = ""
 79 |         draw = ImageDraw.Draw(img)
 80 |         color_list = ['Black', 'Red', 'Blue', 'Gold', 'Purple', 'Green']
 81 |         for i in y_decoded:
 82 |             if float(i[1]) < self.threshold:
 83 |                 continue
 84 | 
 85 |             if self.keep_aspect_ratio:
 86 |                 i[2:6] *= img_ratio
 87 |             else:
 88 |                 orig_w, orig_h = img.size
 89 |                 ratio_w = float(orig_w) / self.model_input_width
 90 |                 ratio_h = float(orig_h) / self.model_input_height
 91 |                 i[2] *= ratio_w
 92 |                 i[3] *= ratio_h
 93 |                 i[4] *= ratio_w
 94 |                 i[5] *= ratio_h
 95 | 
 96 |             if is_kitti_export:
 97 |                 kitti_txt += self.class_mapping[int(i[0])] + ' 0 0 0 ' + \
 98 |                     ' '.join([str(x) for x in i[2:6]])+' 0 0 0 0 0 0 0 ' + str(i[1])+'\n'
 99 | 
100 |             if is_draw_img:
101 |                 draw.rectangle(
102 |                     ((i[2], i[3]), (i[4], i[5])),
103 |                     outline=color_list[int(i[0]) % len(color_list)]
104 |                 )
105 |                 # txt pad
106 |                 draw.rectangle(((i[2], i[3]), (i[2] + 100, i[3]+10)),
107 |                                fill=color_list[int(i[0]) % len(color_list)])
108 | 
109 |                 draw.text((i[2], i[3]), "{0}: {1:.2f}".format(self.class_mapping[int(i[0])], i[1]))
110 | 
111 | 
112 |         return img, kitti_txt
113 | 
114 | 
115 |     def apply(self, results, this_id, render=True, batching=True):
116 |         """Apply the post processor to the outputs to the yolov4 outputs."""
117 | 
118 |         #output_array = {}
119 |         output_array = []      
120 |   
121 |         for output_name in self.output_names:
122 |             #print(results.as_numpy(output_name))
123 |             output_array.append(results.as_numpy(output_name))
124 | 
125 |         for image_idx in range(self.batch_size):
126 |             y_pred = [i[image_idx] for i in output_array]
127 |             y_pred_decoded = trt_output_process_fn(y_pred)
128 | 
129 |             current_idx = (int(this_id) - 1) * self.batch_size + image_idx
130 |             if current_idx >= len(self.frames):
131 |                 break
132 |             current_frame = self.frames[current_idx]
133 |             filename = os.path.basename(current_frame._image_path)
134 | 
135 |             img = Image.open(current_frame._image_path)
136 |             orig_w, orig_h = img.size
137 |             ratio = min(current_frame.w/float(orig_w), current_frame.h/float(orig_h))
138 |             new_w = int(round(orig_w*ratio))
139 |             ratio = float(orig_w)/new_w
140 | 
141 |             output_label_file = os.path.join(
142 |                 self.output_path, "infer_labels",
143 |                 "{}.txt".format(os.path.splitext(filename)[0])
144 |             )
145 |             output_image_file = os.path.join(
146 |                 self.output_path, "infer_images",
147 |                 "{}.png".format(os.path.splitext(filename)[0])
148 |             )
149 |             if not os.path.exists(os.path.dirname(output_label_file)):
150 |                 os.makedirs(os.path.dirname(output_label_file))
151 |             if not os.path.exists(os.path.dirname(output_image_file)):
152 |                 os.makedirs(os.path.dirname(output_image_file))
153 | 
154 |             img, kitti_txt = self._get_bbox_and_kitti_label_single_img(img, ratio, y_pred_decoded[0], output_image_file, output_label_file)
155 | 
156 |             img.save(output_image_file)
157 | 
158 |             open(output_label_file, 'w').write(kitti_txt)
159 | 


--------------------------------------------------------------------------------
/workspace/source_code/N4/bus_call.py:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | # SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | ################################################################################
17 | 
18 | import gi
19 | import sys
20 | gi.require_version('Gst', '1.0')
21 | from gi.repository import Gst
22 | def bus_call(bus, message, loop):
23 |     t = message.type
24 |     if t == Gst.MessageType.EOS:
25 |         sys.stdout.write("End-of-stream\n")
26 |         loop.quit()
27 |     elif t==Gst.MessageType.WARNING:
28 |         err, debug = message.parse_warning()
29 |         sys.stderr.write("Warning: %s: %s\n" % (err, debug))
30 |     elif t == Gst.MessageType.ERROR:
31 |         err, debug = message.parse_error()
32 |         sys.stderr.write("Error: %s: %s\n" % (err, debug))
33 |         loop.quit()
34 |     return True
35 |     
36 | 


--------------------------------------------------------------------------------
/workspace/source_code/N4/config_tracker_NvDCF_perf.yml:
--------------------------------------------------------------------------------
 1 | %YAML:1.0
 2 | ################################################################################
 3 | # SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 4 | # SPDX-License-Identifier: Apache-2.0
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | ################################################################################
18 | 
19 | BaseConfig:
20 |   minDetectorConfidence: 0   # If the confidence of a detector bbox is lower than this, then it won't be considered for tracking
21 | 
22 | TargetManagement:
23 |   enableBboxUnClipping: 1   # In case the bbox is likely to be clipped by image border, unclip bbox
24 |   maxTargetsPerStream: 150  # Max number of targets to track per stream. Recommended to set >10. Note: this value should account for the targets being tracked in shadow mode as well. Max value depends on the GPU memory capacity
25 | 
26 |   # [Creation & Termination Policy]
27 |   minIouDiff4NewTarget: 0.5   # If the IOU between the newly detected object and any of the existing targets is higher than this threshold, this newly detected object will be discarded.
28 |   minTrackerConfidence: 0.2   # If the confidence of an object tracker is lower than this on the fly, then it will be tracked in shadow mode. Valid Range: [0.0, 1.0]
29 |   probationAge: 3 # If the target's age exceeds this, the target will be considered to be valid.
30 |   maxShadowTrackingAge: 30  # Max length of shadow tracking. If the shadowTrackingAge exceeds this limit, the tracker will be terminated.
31 |   earlyTerminationAge: 1   # If the shadowTrackingAge reaches this threshold while in TENTATIVE period, the target will be terminated prematurely.
32 | 
33 | TrajectoryManagement:
34 |   useUniqueID: 0   # Use 64-bit long Unique ID when assignining tracker ID. Default is [true]
35 | 
36 | DataAssociator:
37 |   dataAssociatorType: 0 # the type of data associator among { DEFAULT= 0 }
38 |   associationMatcherType: 0 # the type of matching algorithm among { GREEDY=0, GLOBAL=1 }
39 |   checkClassMatch: 1  # If checked, only the same-class objects are associated with each other. Default: true
40 | 
41 |   # [Association Metric: Thresholds for valid candidates]
42 |   minMatchingScore4Overall: 0.0   # Min total score
43 |   minMatchingScore4SizeSimilarity: 0.6  # Min bbox size similarity score
44 |   minMatchingScore4Iou: 0.0       # Min IOU score
45 |   minMatchingScore4VisualSimilarity: 0.7  # Min visual similarity score
46 | 
47 |   # [Association Metric: Weights]
48 |   matchingScoreWeight4VisualSimilarity: 0.6  # Weight for the visual similarity (in terms of correlation response ratio)
49 |   matchingScoreWeight4SizeSimilarity: 0.0    # Weight for the Size-similarity score
50 |   matchingScoreWeight4Iou: 0.4   # Weight for the IOU score
51 | 
52 | StateEstimator:
53 |   stateEstimatorType: 1  # the type of state estimator among { DUMMY=0, SIMPLE=1, REGULAR=2 }
54 | 
55 |   # [Dynamics Modeling]
56 |   processNoiseVar4Loc: 2.0    # Process noise variance for bbox center
57 |   processNoiseVar4Size: 1.0   # Process noise variance for bbox size
58 |   processNoiseVar4Vel: 0.1    # Process noise variance for velocity
59 |   measurementNoiseVar4Detector: 4.0    # Measurement noise variance for detector's detection
60 |   measurementNoiseVar4Tracker: 16.0    # Measurement noise variance for tracker's localization
61 | 
62 | VisualTracker:
63 |   visualTrackerType: 1 # the type of visual tracker among { DUMMY=0, NvDCF=1 }
64 | 
65 |   # [NvDCF: Feature Extraction]
66 |   useColorNames: 1     # Use ColorNames feature
67 |   useHog: 0            # Use Histogram-of-Oriented-Gradient (HOG) feature
68 |   featureImgSizeLevel: 2  # Size of a feature image. Valid range: {1, 2, 3, 4, 5}, from the smallest to the largest
69 |   featureFocusOffsetFactor_y: -0.2 # The offset for the center of hanning window relative to the feature height. The center of hanning window would move by (featureFocusOffsetFactor_y*featureMatSize.height) in vertical direction
70 | 
71 |   # [NvDCF: Correlation Filter]
72 |   filterLr: 0.075 # learning rate for DCF filter in exponential moving average. Valid Range: [0.0, 1.0]
73 |   filterChannelWeightsLr: 0.1 # learning rate for the channel weights among feature channels. Valid Range: [0.0, 1.0]
74 |   gaussianSigma: 0.75 # Standard deviation for Gaussian for desired response when creating DCF filter [pixels]
75 | 


--------------------------------------------------------------------------------
/workspace/source_code/N4/dstest2_tracker_config.txt:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | # SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | ################################################################################
17 | 
18 | # Mandatory properties for the tracker:
19 | #   tracker-width
20 | #   tracker-height: needs to be multiple of 6 for NvDCF
21 | #   gpu-id
22 | #   ll-lib-file: path to low-level tracker lib
23 | #   ll-config-file: required for NvDCF, optional for KLT and IOU
24 | #
25 | [tracker]
26 | tracker-width=640
27 | tracker-height=384
28 | gpu-id=0
29 | ll-lib-file=/opt/nvidia/deepstream/deepstream/lib/libnvds_nvmultiobjecttracker.so
30 | ll-config-file=config_tracker_NvDCF_perf.yml
31 | #enable-past-frame=1
32 | enable-batch-process=1
33 | 


--------------------------------------------------------------------------------
/workspace/source_code/N4/pgie_yolov4_tao_config.txt:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Permission is hereby granted, free of charge, to any person obtaining a
 5 | # copy of this software and associated documentation files (the "Software"),
 6 | # to deal in the Software without restriction, including without limitation
 7 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 | # and/or sell copies of the Software, and to permit persons to whom the
 9 | # Software is furnished to do so, subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be included in
12 | # all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 | # DEALINGS IN THE SOFTWARE.
21 | ################################################################################
22 | 
23 | [property]
24 | gpu-id=0
25 | net-scale-factor=1.0
26 | offsets=103.939;116.779;123.68
27 | model-color-format=1
28 | ## Path to yolo_v4 labels.txt
29 | labelfile-path=../../yolo_v4/export/labels.txt
30 | ## Path to generated TensorRT engine
31 | #model-engine-file=../../yolo_v4/export/yolov4_resnet18_epoch_015.etlt_b1_gpu0_fp32.engine
32 | ## Path to int8-calib-file
33 | #int8-calib-file=../../yolo_v4/export/cal.bin
34 | ## Path to exported yolo_v4 model.etlt
35 | tlt-encoded-model=../../yolo_v4/export/yolov4_resnet18_epoch_015.etlt
36 | ## Key to decrypt model
37 | tlt-model-key=nvidia_tlt
38 | infer-dims=3;384;640
39 | maintain-aspect-ratio=0
40 | uff-input-order=0
41 | uff-input-blob-name=Input
42 | batch-size=1
43 | ## 0=FP32, 1=INT8, 2=FP16 mode
44 | network-mode=0
45 | num-detected-classes=6
46 | interval=0
47 | gie-unique-id=1
48 | is-classifier=0
49 | output-tensor-meta=0
50 | #network-type=0
51 | cluster-mode=3
52 | output-blob-names=BatchedNMS
53 | parse-bbox-func-name=NvDsInferParseCustomBatchedNMSTLT
54 | custom-lib-path=/opt/nvidia/deepstream/deepstream/lib/libnvds_infercustomparser.so
55 | layer-device-precision=cls/mul:fp32:gpu;box/mul_6:fp32:gpu;box/add:fp32:gpu;box/mul_4:fp32:gpu;box/add_1:fp32:gpu;cls/Reshape_reshape:fp32:gpu;box/Reshape_reshape:fp32:gpu;encoded_detections:fp32:gpu;bg_leaky_conv1024_lrelu:fp32:gpu;sm_bbox_processor/concat_concat:fp32:gpu;sm_bbox_processor/sub:fp32:gpu;sm_bbox_processor/Exp:fp32:gpu;yolo_conv1_4_lrelu:fp32:gpu;yolo_conv1_3_1_lrelu:fp32:gpu;md_leaky_conv512_lrelu:fp32:gpu;sm_bbox_processor/Reshape_reshape:fp32:gpu;conv_sm_object:fp32:gpu;yolo_conv5_1_lrelu:fp32:gpu;concatenate_6:fp32:gpu;yolo_conv3_1_lrelu:fp32:gpu;concatenate_5:fp32:gpu;yolo_neck_1_lrelu:fp32:gpu
56 | 
57 | [class-attrs-all]
58 | pre-cluster-threshold=0.6
59 | roi-top-offset=0
60 | roi-bottom-offset=0
61 | detected-min-w=0
62 | detected-min-h=0
63 | detected-max-w=0
64 | detected-max-h=0
65 | 


--------------------------------------------------------------------------------
/workspace/source_code/N5/calc_object_size.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 OpenACC-Standard.org. This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, 
 2 | # under the Creative Commons Attribution 4.0 International (CC BY 4.0). These materials include references to hardware and software 
 3 | # developed by other entities; all applicable licensing and copyrights apply.
 4 | 
 5 | import cv2
 6 | import imutils
 7 | import numpy as np
 8 | 
 9 | 
10 | def calc_object_size(img_path, output_path="output", pixels_per_metric=38):
11 |     """Calculate the size of an object in the image using color thresholding"""
12 |     
13 |     # load image with OpenCV and blur it slightly
14 |     image = cv2.imread(img_path)  
15 |     blurred = cv2.GaussianBlur(image, (3, 3), 0)
16 |     # convert to hsv color space
17 |     hsv = cv2.cvtColor(blurred, cv2.COLOR_BGR2HSV)
18 | 
19 |     # color thresholding
20 |     # orange color range values: Hue (5 - 25)
21 |     lower = np.array([5, 130, 155])
22 |     upper = np.array([25, 255, 255])
23 |     # using inRange function to get only orange colors
24 |     mask = cv2.inRange(hsv, lower, upper)
25 |     # remove noise
26 |     mask = cv2.erode(mask, None, iterations=2)
27 |     mask = cv2.dilate(mask, None, iterations=2)
28 |     
29 |     # find contours in the edge map
30 |     cnts = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL,
31 |         cv2.CHAIN_APPROX_SIMPLE)
32 |     cnts = imutils.grab_contours(cnts)
33 |     # sort the contours and keep the largest
34 |     cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
35 |     c = cnts[0]
36 | 
37 |     # compute the minimum enclosing circle of the contour
38 |     orig = image.copy()
39 |     (x, y), radius = cv2.minEnclosingCircle(c)
40 | 
41 |     # draw the circle
42 |     cv2.circle(orig, (int(x), int(y)), int(radius), (0, 255, 0), 2)
43 | 
44 |     # draw a diameter and end points
45 |     cv2.line(orig, (int(x - radius), int(y)), (int(x + radius), int(y)),
46 |         (255, 0, 255), 2)
47 |     cv2.circle(orig, (int(x - radius), int(y)), 5, (255, 0, 0), -1)
48 |     cv2.circle(orig, (int(x + radius), int(y)), 5, (255, 0, 0), -1)
49 | 
50 |     # draw the center
51 |     cv2.circle(orig, (int(x), int(y)), 5, (0, 0, 255), -1)
52 | 
53 |     # compute the size of the object
54 |     dimR = radius / pixels_per_metric
55 |     print(f"Diameter of the object: {2 * dimR:.1f}cm")
56 | 
57 |     # draw the object sizes on the image
58 |     cv2.putText(orig, "{:.1f}cm".format(2 * dimR),
59 |         (int(x - 15), int(y - 10)), cv2.FONT_HERSHEY_SIMPLEX,
60 |         0.65, (0, 0, 0), 2)
61 |     
62 |     # save the output image
63 |     cv2.imwrite(output_path, orig)
64 | 
65 |     return 2 * dimR
66 | 


--------------------------------------------------------------------------------
/workspace/source_code/dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 OpenACC-Standard.org. This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, 
 2 | # under the Creative Commons Attribution 4.0 International (CC BY 4.0). These materials include references to hardware and software 
 3 | # developed by other entities; all applicable licensing and copyrights apply.
 4 | 
 5 | import gdown
 6 | import os
 7 | import shutil
 8 | 
 9 | #url = "https://drive.google.com/uc?id=1E8KaSkexo5U4OhiDIrfUipBbHcvwWCvJ&export=download"
10 | url = "https://drive.google.com/uc?id=1E8KaSkexo5U4OhiDIrfUipBbHcvwWCvJ&confirm=t"
11 | output = "dataset_E2ECV.zip"
12 | gdown.download(url, output, quiet=False, proxy=None)
13 | 
14 | shutil.unpack_archive(output)
15 | 
16 | if not os.path.exists("../data"):
17 |     shutil.move("data", "../data")
18 | else:
19 |     shutil.rmtree("data")
20 | 
21 | if not os.path.exists("../source_code/N4/apples.h264"):
22 |     shutil.move("apples.h264", "../source_code/N4/apples.h264")
23 | else:
24 |     os.remove("apples.h264")
25 | 
26 | if not os.path.exists("../source_code/N5/oranges.mp4"):
27 |     shutil.move("oranges.mp4", "../source_code/N5/oranges.mp4")
28 | else:
29 |     os.remove("oranges.mp4")
30 | 
31 | if not os.path.exists("../source_code/N5/oranges"):
32 |     shutil.move("oranges", "../source_code/N5/oranges")
33 | else:
34 |     shutil.rmtree("oranges")
35 | 
36 | os.remove(output)
37 | 
38 | 


--------------------------------------------------------------------------------
/workspace/specs/default_spec.txt:
--------------------------------------------------------------------------------
 1 | # Spec file for tao-augment
 2 | 
 3 | # Spatial augmentation config
 4 | spatial_config{
 5 |   flip_config{
 6 |     flip_horizontal: true
 7 |   }
 8 |   rotation_config{
 9 |     angle: 10.0
10 |     units: "degrees"
11 |   }
12 |   translation_config{
13 |     translate_y: 20
14 |   }
15 | }
16 | 
17 | # Color augmentation config
18 | color_config{
19 |   hue_saturation_config{
20 |     hue_rotation_angle: 10.0
21 |     saturation_shift: 1.0
22 |   }
23 |   brightness_config{
24 |     offset: 10
25 |   }
26 | }
27 | 
28 | # Blur config
29 | blur_config{
30 |   size: 5
31 |   std: 1.0
32 | }
33 | 
34 | # Data dimensions
35 | output_image_width: 640
36 | output_image_height: 384
37 | output_image_channel: 3
38 | image_extension: ".png"
39 | 


--------------------------------------------------------------------------------
/workspace/specs/yolo_v4_retrain_resnet18_kitti.txt:
--------------------------------------------------------------------------------
  1 | random_seed: 42
  2 | yolov4_config {
  3 |   big_anchor_shape: "[(566.97, 301.71), (526.13, 354.46), (569.45, 339.69)]"
  4 |   mid_anchor_shape: "[(502.15, 273.98), (507.71, 322.29), (444.95, 374.63)]"
  5 |   small_anchor_shape: "[(284.35, 384.00), (557.86, 223.83), (441.60, 297.71)]"
  6 |   box_matching_iou: 0.25
  7 |   matching_neutral_box_iou: 0.5
  8 |   arch: "resnet"
  9 |   nlayers: 18
 10 |   arch_conv_blocks: 2
 11 |   loss_loc_weight: 1.0
 12 |   loss_neg_obj_weights: 1.0
 13 |   loss_class_weights: 1.0
 14 |   label_smoothing: 0.0
 15 |   big_grid_xy_extend: 0.05
 16 |   mid_grid_xy_extend: 0.1
 17 |   small_grid_xy_extend: 0.2
 18 |   freeze_bn: false
 19 |   #freeze_blocks: [0, 1, 2]
 20 |   force_relu: false
 21 | }
 22 | training_config {
 23 |   batch_size_per_gpu: 8
 24 |   num_epochs: 15
 25 |   enable_qat: false
 26 |   checkpoint_interval: 3
 27 |   learning_rate {
 28 |     soft_start_cosine_annealing_schedule {
 29 |       min_learning_rate: 1e-7
 30 |       max_learning_rate: 1e-4
 31 |       soft_start: 0.3
 32 |     }
 33 |   }
 34 |   regularizer {
 35 |     type: NO_REG
 36 |     weight: 3e-9
 37 |   }
 38 |   optimizer {
 39 |     adam {
 40 |       epsilon: 1e-7
 41 |       beta1: 0.9
 42 |       beta2: 0.999
 43 |       amsgrad: false
 44 |     }
 45 |   }
 46 |   pruned_model_path: "EXPERIMENT_DIR/experiment_dir_pruned/yolov4_resnet18_pruned.tlt"
 47 | }
 48 | eval_config {
 49 |   average_precision_mode: SAMPLE
 50 |   batch_size: 8
 51 |   matching_iou_threshold: 0.5
 52 | }
 53 | nms_config {
 54 |   confidence_threshold: 0.001
 55 |   clustering_iou_threshold: 0.5
 56 |   top_k: 200
 57 |   force_on_cpu: true
 58 | }
 59 | augmentation_config {
 60 |   hue: 0.1
 61 |   saturation: 1.5
 62 |   exposure: 1.5
 63 |   vertical_flip: 0.5
 64 |   horizontal_flip: 0.5
 65 |   jitter: 0.3
 66 |   output_width: 640
 67 |   output_height: 384
 68 |   output_channel: 3
 69 |   randomize_input_shape_period: 0
 70 |   mosaic_prob: 0.5
 71 |   mosaic_min_ratio: 0.2
 72 | }
 73 | dataset_config {
 74 |   data_sources: {
 75 |       tfrecords_path: "/workspace/tao-experiments/data/training/tfrecords/train*"
 76 |       image_directory_path: "/workspace/tao-experiments/data/training"
 77 |   }
 78 |   include_difficult_in_training: true
 79 |   image_extension: "png"
 80 |   target_class_mapping {
 81 |       key: "freshapple"
 82 |       value: "freshapple"
 83 |   }
 84 |   target_class_mapping {
 85 |       key: "freshbanana"
 86 |       value: "freshbanana"
 87 |   }
 88 |   target_class_mapping {
 89 |       key: "freshorange"
 90 |       value: "freshorange"
 91 |   }
 92 |   target_class_mapping {
 93 |       key: "rottenapple"
 94 |       value: "rottenapple"
 95 |   }
 96 |   target_class_mapping {
 97 |       key: "rottenbanana"
 98 |       value: "rottenbanana"
 99 |   }
100 |   target_class_mapping {
101 |       key: "rottenorange"
102 |       value: "rottenorange"
103 |   }
104 |   validation_data_sources: {
105 |       tfrecords_path: "/workspace/tao-experiments/data/val/tfrecords/val*"
106 |       image_directory_path: "/workspace/tao-experiments/data/val"
107 |   }
108 | }
109 | 


--------------------------------------------------------------------------------
/workspace/specs/yolo_v4_retrain_resnet18_kitti_seq.txt:
--------------------------------------------------------------------------------
  1 | random_seed: 42
  2 | yolov4_config {
  3 |   big_anchor_shape: "[(566.97, 301.71), (526.13, 354.46), (569.45, 339.69)]"
  4 |   mid_anchor_shape: "[(502.15, 273.98), (507.71, 322.29), (444.95, 374.63)]"
  5 |   small_anchor_shape: "[(284.35, 384.00), (557.86, 223.83), (441.60, 297.71)]"
  6 |   box_matching_iou: 0.25
  7 |   matching_neutral_box_iou: 0.5
  8 |   arch: "resnet"
  9 |   nlayers: 18
 10 |   arch_conv_blocks: 2
 11 |   loss_loc_weight: 1.0
 12 |   loss_neg_obj_weights: 1.0
 13 |   loss_class_weights: 1.0
 14 |   label_smoothing: 0.0
 15 |   big_grid_xy_extend: 0.05
 16 |   mid_grid_xy_extend: 0.1
 17 |   small_grid_xy_extend: 0.2
 18 |   freeze_bn: false
 19 |   #freeze_blocks: [0, 1, 2]
 20 |   force_relu: false
 21 | }
 22 | training_config {
 23 |   batch_size_per_gpu: 8
 24 |   num_epochs: 15
 25 |   enable_qat: false
 26 |   checkpoint_interval: 3
 27 |   learning_rate {
 28 |     soft_start_cosine_annealing_schedule {
 29 |       min_learning_rate: 1e-7
 30 |       max_learning_rate: 1e-4
 31 |       soft_start: 0.3
 32 |     }
 33 |   }
 34 |   regularizer {
 35 |     type: NO_REG
 36 |     weight: 3e-9
 37 |   }
 38 |   optimizer {
 39 |     adam {
 40 |       epsilon: 1e-7
 41 |       beta1: 0.9
 42 |       beta2: 0.999
 43 |       amsgrad: false
 44 |     }
 45 |   }
 46 |   pruned_model_path: "EXPERIMENT_DIR/experiment_dir_pruned/yolov4_resnet18_pruned.tlt"
 47 | }
 48 | eval_config {
 49 |   average_precision_mode: SAMPLE
 50 |   batch_size: 8
 51 |   matching_iou_threshold: 0.5
 52 | }
 53 | nms_config {
 54 |   confidence_threshold: 0.001
 55 |   clustering_iou_threshold: 0.5
 56 |   top_k: 200
 57 | }
 58 | augmentation_config {
 59 |   hue: 0.1
 60 |   saturation: 1.5
 61 |   exposure: 1.5
 62 |   vertical_flip: 0.5
 63 |   horizontal_flip: 0.5
 64 |   jitter: 0.3
 65 |   output_width: 640
 66 |   output_height: 384
 67 |   output_channel: 3
 68 |   randomize_input_shape_period: 0
 69 |   mosaic_prob: 0.5
 70 |   mosaic_min_ratio: 0.2
 71 | }
 72 | dataset_config {
 73 |   data_sources: {
 74 |       label_directory_path: "/workspace/tao-experiments/data/training/label_2"
 75 |       image_directory_path: "/workspace/tao-experiments/data/training/image_2"
 76 |   }
 77 |   include_difficult_in_training: true
 78 |   target_class_mapping {
 79 |       key: "freshapple"
 80 |       value: "freshapple"
 81 |   }
 82 |   target_class_mapping {
 83 |       key: "freshbanana"
 84 |       value: "freshbanana"
 85 |   }
 86 |   target_class_mapping {
 87 |       key: "freshorange"
 88 |       value: "freshorange"
 89 |   }
 90 |   target_class_mapping {
 91 |       key: "rottenapple"
 92 |       value: "rottenapple"
 93 |   }
 94 |   target_class_mapping {
 95 |       key: "rottenbanana"
 96 |       value: "rottenbanana"
 97 |   }
 98 |   target_class_mapping {
 99 |       key: "rottenorange"
100 |       value: "rottenorange"
101 |   }
102 |   validation_data_sources: {
103 |       label_directory_path: "/workspace/tao-experiments/data/val/label"
104 |       image_directory_path: "/workspace/tao-experiments/data/val/image"
105 |   }
106 | }
107 | 


--------------------------------------------------------------------------------
/workspace/specs/yolo_v4_tfrecords_kitti_train.txt:
--------------------------------------------------------------------------------
 1 | kitti_config {
 2 |   root_directory_path: "/workspace/tao-experiments/data/training"
 3 |   image_dir_name: "image_2"
 4 |   label_dir_name: "label_2"
 5 |   image_extension: ".png"
 6 |   partition_mode: "random"
 7 |   num_partitions: 2
 8 |   val_split: 14
 9 |   num_shards: 10
10 | }
11 | image_directory_path: "/workspace/tao-experiments/data/training"
12 | 


--------------------------------------------------------------------------------
/workspace/specs/yolo_v4_tfrecords_kitti_val.txt:
--------------------------------------------------------------------------------
 1 | kitti_config {
 2 |   root_directory_path: "/workspace/tao-experiments/data/val"
 3 |   image_dir_name: "image"
 4 |   label_dir_name: "label"
 5 |   image_extension: ".png"
 6 |   partition_mode: "random"
 7 |   num_partitions: 2
 8 |   val_split: 14
 9 |   num_shards: 10
10 | }
11 | image_directory_path: "/workspace/tao-experiments/data/val"
12 | 


--------------------------------------------------------------------------------
/workspace/specs/yolo_v4_train_resnet18_kitti.txt:
--------------------------------------------------------------------------------
  1 | random_seed: 42
  2 | yolov4_config {
  3 |   big_anchor_shape: "[(566.97, 301.71), (526.13, 354.46), (569.45, 339.69)]"
  4 |   mid_anchor_shape: "[(502.15, 273.98), (507.71, 322.29), (444.95, 374.63)]"
  5 |   small_anchor_shape: "[(284.35, 384.00), (557.86, 223.83), (441.60, 297.71)]"
  6 |   box_matching_iou: 0.25
  7 |   matching_neutral_box_iou: 0.5
  8 |   arch: "resnet"
  9 |   nlayers: 18
 10 |   arch_conv_blocks: 2
 11 |   loss_loc_weight: 1.0
 12 |   loss_neg_obj_weights: 1.0
 13 |   loss_class_weights: 1.0
 14 |   label_smoothing: 0.0
 15 |   big_grid_xy_extend: 0.05
 16 |   mid_grid_xy_extend: 0.1
 17 |   small_grid_xy_extend: 0.2
 18 |   freeze_bn: false
 19 |   #freeze_blocks: [0, 1, 2]
 20 |   force_relu: false
 21 | }
 22 | training_config {
 23 |   batch_size_per_gpu: 8
 24 |   num_epochs: 30
 25 |   enable_qat: false
 26 |   checkpoint_interval: 3
 27 |   learning_rate {
 28 |     soft_start_cosine_annealing_schedule {
 29 |       min_learning_rate: 1e-7
 30 |       max_learning_rate: 1e-4
 31 |       soft_start: 0.3
 32 |     }
 33 |   }
 34 |   regularizer {
 35 |     type: L1
 36 |     weight: 3e-5
 37 |   }
 38 |   optimizer {
 39 |     adam {
 40 |       epsilon: 1e-7
 41 |       beta1: 0.9
 42 |       beta2: 0.999
 43 |       amsgrad: false
 44 |     }
 45 |   }
 46 |   pretrain_model_path: "EXPERIMENT_DIR/pretrained_resnet18/pretrained_object_detection_vresnet18/resnet_18.hdf5"
 47 | }
 48 | eval_config {
 49 |   average_precision_mode: SAMPLE
 50 |   batch_size: 8
 51 |   matching_iou_threshold: 0.5
 52 | }
 53 | nms_config {
 54 |   confidence_threshold: 0.001
 55 |   clustering_iou_threshold: 0.5
 56 |   force_on_cpu: true
 57 |   top_k: 200
 58 | }
 59 | augmentation_config {
 60 |   hue: 0.1
 61 |   saturation: 1.5
 62 |   exposure: 1.5
 63 |   vertical_flip: 0.5
 64 |   horizontal_flip: 0.5
 65 |   jitter: 0.3
 66 |   output_width: 640
 67 |   output_height: 384
 68 |   output_channel: 3
 69 |   randomize_input_shape_period: 0
 70 |   mosaic_prob: 0.5
 71 |   mosaic_min_ratio: 0.2
 72 | }
 73 | dataset_config {
 74 |   data_sources: {
 75 |       tfrecords_path: "/workspace/tao-experiments/data/training/tfrecords/train*"
 76 |       image_directory_path: "/workspace/tao-experiments/data/training"
 77 |   }
 78 |   include_difficult_in_training: true
 79 |   image_extension: "png"
 80 |   target_class_mapping {
 81 |       key: "freshapple"
 82 |       value: "freshapple"
 83 |   }
 84 |   target_class_mapping {
 85 |       key: "freshbanana"
 86 |       value: "freshbanana"
 87 |   }
 88 |   target_class_mapping {
 89 |       key: "freshorange"
 90 |       value: "freshorange"
 91 |   }
 92 |   target_class_mapping {
 93 |       key: "rottenapple"
 94 |       value: "rottenapple"
 95 |   }
 96 |   target_class_mapping {
 97 |       key: "rottenbanana"
 98 |       value: "rottenbanana"
 99 |   }
100 |   target_class_mapping {
101 |       key: "rottenorange"
102 |       value: "rottenorange"
103 |   }
104 |   validation_data_sources: {
105 |       tfrecords_path: "/workspace/tao-experiments/data/val/tfrecords/val*"
106 |       image_directory_path: "/workspace/tao-experiments/data/val"
107 |   }
108 | }
109 | 


--------------------------------------------------------------------------------
/workspace/specs/yolo_v4_train_resnet18_kitti_seq.txt:
--------------------------------------------------------------------------------
  1 | random_seed: 42
  2 | yolov4_config {
  3 |   big_anchor_shape: "[(566.97, 301.71), (526.13, 354.46), (569.45, 339.69)]"
  4 |   mid_anchor_shape: "[(502.15, 273.98), (507.71, 322.29), (444.95, 374.63)]"
  5 |   small_anchor_shape: "[(284.35, 384.00), (557.86, 223.83), (441.60, 297.71)]"
  6 |   box_matching_iou: 0.25
  7 |   matching_neutral_box_iou: 0.5
  8 |   arch: "resnet"
  9 |   nlayers: 18
 10 |   arch_conv_blocks: 2
 11 |   loss_loc_weight: 1.0
 12 |   loss_neg_obj_weights: 1.0
 13 |   loss_class_weights: 1.0
 14 |   label_smoothing: 0.0
 15 |   big_grid_xy_extend: 0.05
 16 |   mid_grid_xy_extend: 0.1
 17 |   small_grid_xy_extend: 0.2
 18 |   freeze_bn: false
 19 |   #freeze_blocks: [0, 1, 2]
 20 |   force_relu: false
 21 | }
 22 | training_config {
 23 |   batch_size_per_gpu: 8
 24 |   num_epochs: 30
 25 |   enable_qat: false
 26 |   checkpoint_interval: 3
 27 |   learning_rate {
 28 |     soft_start_cosine_annealing_schedule {
 29 |       min_learning_rate: 1e-7
 30 |       max_learning_rate: 1e-4
 31 |       soft_start: 0.3
 32 |     }
 33 |   }
 34 |   regularizer {
 35 |     type: L1
 36 |     weight: 3e-5
 37 |   }
 38 |   optimizer {
 39 |     adam {
 40 |       epsilon: 1e-7
 41 |       beta1: 0.9
 42 |       beta2: 0.999
 43 |       amsgrad: false
 44 |     }
 45 |   }
 46 |   pretrain_model_path: "EXPERIMENT_DIR/pretrained_resnet18/pretrained_object_detection_vresnet18/resnet_18.hdf5"
 47 | }
 48 | eval_config {
 49 |   average_precision_mode: SAMPLE
 50 |   batch_size: 8
 51 |   matching_iou_threshold: 0.5
 52 | }
 53 | nms_config {
 54 |   confidence_threshold: 0.001
 55 |   clustering_iou_threshold: 0.5
 56 |   top_k: 200
 57 | }
 58 | augmentation_config {
 59 |   hue: 0.1
 60 |   saturation: 1.5
 61 |   exposure: 1.5
 62 |   vertical_flip: 0.5
 63 |   horizontal_flip: 0.5
 64 |   jitter: 0.3
 65 |   output_width: 640
 66 |   output_height: 384
 67 |   output_channel: 3
 68 |   randomize_input_shape_period: 0
 69 |   mosaic_prob: 0.5
 70 |   mosaic_min_ratio: 0.2
 71 | }
 72 | dataset_config {
 73 |   data_sources: {
 74 |       label_directory_path: "/workspace/tao-experiments/data/training/label_2"
 75 |       image_directory_path: "/workspace/tao-experiments/data/training/image_2"
 76 |   }
 77 |   include_difficult_in_training: true
 78 |   target_class_mapping {
 79 |       key: "freshapple"
 80 |       value: "freshapple"
 81 |   }
 82 |   target_class_mapping {
 83 |       key: "freshbanana"
 84 |       value: "freshbanana"
 85 |   }
 86 |   target_class_mapping {
 87 |       key: "freshorange"
 88 |       value: "freshorange"
 89 |   }
 90 |   target_class_mapping {
 91 |       key: "rottenapple"
 92 |       value: "rottenapple"
 93 |   }
 94 |   target_class_mapping {
 95 |       key: "rottenbanana"
 96 |       value: "rottenbanana"
 97 |   }
 98 |   target_class_mapping {
 99 |       key: "rottenorange"
100 |       value: "rottenorange"
101 |   }
102 |   validation_data_sources: {
103 |       label_directory_path: "/workspace/tao-experiments/data/val/label"
104 |       image_directory_path: "/workspace/tao-experiments/data/val/image"
105 |   }
106 | }
107 | 


--------------------------------------------------------------------------------