├── tao_action_recognition
    ├── data_generation
    │   ├── resources
    │   │   ├── of_preprocess_pipe.png
    │   │   └── rgb_preprocess_pipe.png
    │   ├── jetson_of
    │   │   └── vpi
    │   │   │   ├── CMakeLists.txt
    │   │   │   └── main.cpp
    │   ├── convert_dataset.py
    │   ├── README.md
    │   ├── split_dataset.py
    │   ├── preprocess_HMDB_RGB.sh
    │   ├── generate_new_dataset_format.py
    │   ├── preprocess_HMDB.sh
    │   ├── preprocess_SHAD_RGB.sh
    │   ├── convert_of.py
    │   ├── preprocess_SHAD.sh
    │   └── save_tracks_shad.py
    ├── tensorrt_inference
    │   ├── trt_inference
    │   │   ├── __init__.py
    │   │   └── engine.py
    │   ├── README.md
    │   ├── ar_trt_inference.py
    │   └── ar_of_trt_inference.py
    ├── specs
    │   ├── i3d_rgb_3d_64_export.yaml
    │   └── train_rgb_3d_64_i3d.yaml
    └── doc
    │   └── load_I3D.md
├── README.md
├── tao_ocdr
    └── handwritten
    │   ├── specs
    │       ├── ocd
    │       │   ├── export.yaml
    │       │   ├── inference.yaml
    │       │   ├── evaluate.yaml
    │       │   └── train.yaml
    │       └── ocr
    │       │   └── experiment.yaml
    │   └── preprocess_data.py
├── tao_key_points_estimation
    └── tensorrt_inference
    │   ├── trt_inference
    │       ├── __init__.py
    │       └── engine.py
    │   ├── README.md
    │   └── fpenet_trt_inference.py
├── tao_pointpillars
    └── tensorrt_sample
    │   ├── include
    │       ├── postprocess.h
    │       └── pointpillar.h
    │   ├── README.md
    │   ├── test
    │       ├── CMakeLists.txt
    │       └── main.cpp
    │   ├── src
    │       ├── postprocess.cpp
    │       └── pointpillar.cpp
    │   └── LICENSE
├── tao_retinanet
    ├── README.md
    └── tao_retinanet_scales_aspect_ratio_estimate.py
├── tao_object_dection
    └── yolov4
    │   ├── specs
    │       ├── classification_cspdarknet53.txt
    │       └── yolov4_416_coco14.txt
    │   └── README.md
├── tao_classification
    ├── mobilenet_v2
    │   └── mobilenetv2_imagenet2012.txt
    └── deploy_to_deepstream
    │   └── README.md
├── LICENSE
├── tao_training_without_network
    └── Guide.md
├── tao_api
    └── how_to_modify_code_for_TAO_API.md
└── tao_forum_faq
    └── FAQ.md


/tao_action_recognition/data_generation/resources/of_preprocess_pipe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/tao_toolkit_recipes/HEAD/tao_action_recognition/data_generation/resources/of_preprocess_pipe.png


--------------------------------------------------------------------------------
/tao_action_recognition/data_generation/resources/rgb_preprocess_pipe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/tao_toolkit_recipes/HEAD/tao_action_recognition/data_generation/resources/rgb_preprocess_pipe.png


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Reference samples of data generation and tensorrt inference for TAO Toolkit
2 | This repository provides reference samples of data generation and tensorrt inference for [TAO Toolkit](https://developer.nvidia.com/tao-toolkit)
3 | 
4 | The supported task:
5 | 
6 | - [Action Recognition](https://github.com/NVIDIA-AI-IOT/tao_toolkit_recipes/tree/main/tao_action_recognition)
7 | 


--------------------------------------------------------------------------------
/tao_ocdr/handwritten/specs/ocd/export.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   load_pruned_graph: False
 3 |   pruned_graph_path: '/results/prune/pruned_0.1.pth'
 4 | 
 5 | export:
 6 |   results_dir: /results/ocd/export
 7 |   checkpoint: '/results/train/model_best.pth'
 8 |   onnx_file: '/results/export/model_best.onnx'
 9 |   width: 1024
10 |   height: 1024
11 | 
12 | dataset:
13 |   validate_dataset:
14 |       data_path: ['/data/ocdnet/iamdata/test']
15 | 


--------------------------------------------------------------------------------
/tao_ocdr/handwritten/specs/ocd/inference.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   load_pruned_graph: false
 3 |   pruned_graph_path: '/results/prune/pruned_0.1.pth'
 4 | 
 5 | inference:
 6 |   checkpoint: '/results/train/model_best.pth'
 7 |   input_folder: /data/ocdnet/iamdata/test/img
 8 |   width: 1024
 9 |   height: 1024
10 |   img_mode: BGR
11 |   polygon: false
12 |   show: false
13 |   results_dir: /results/inference
14 | 
15 |   post_processing:
16 |     type: SegDetectorRepresenter
17 |     args:
18 |       thresh: 0.45
19 |       box_thresh: 0.55
20 |       max_candidates: 1000
21 |       unclip_ratio: 1.5
22 | 
23 | 


--------------------------------------------------------------------------------
/tao_key_points_estimation/tensorrt_inference/trt_inference/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from .engine import *
17 | 


--------------------------------------------------------------------------------
/tao_key_points_estimation/tensorrt_inference/README.md:
--------------------------------------------------------------------------------
 1 | # TensorRT inference sample for TAO key points estimation
 2 | 
 3 | ## Introduction
 4 | This is a TensorRT inference sample for TAO key points estimation. This sample will consume TensorRT engine and json format input generated in FPENet notebook.
 5 | 
 6 | ## Prequisites
 7 | `TensorRT`, `numpy`, `cv2` is needed for this sample. You can try TensorRT docker image on [NGC](https://ngc.nvidia.com/catalog/containers/nvidia:tensorrt) for easily building environment. 
 8 | 
 9 | 
10 | ## Steps to run inference:
11 | 
12 | ```sh
13 | # Generate TensorRT engine of fpenet model
14 | tao fpenet export -m <Trained TAO Model Path> -k <Encode Key> -o <Output file .etlt> --engine_file trt_fpenet.engine
15 | 
16 | # run inference:
17 | python3 fpenet_trt_inference.py --input_json=<Input json file> --trt_engine=<trt fpenet engine> --output_img_dir=<Path to output images>
18 | ```


--------------------------------------------------------------------------------
/tao_ocdr/handwritten/specs/ocd/evaluate.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   load_pruned_graph: False
 3 |   pruned_graph_path: '/results/prune/pruned_0.1.pth'
 4 | 
 5 | evaluate:
 6 |   results_dir: /results/ocd/evaluate
 7 |   checkpoint: /results/train/model_best.pth
 8 |   gpu_id: 0
 9 |   post_processing:
10 |     type: SegDetectorRepresenter
11 |     args:
12 |       thresh: 0.45
13 |       box_thresh: 0.55
14 |       max_candidates: 1000
15 |       unclip_ratio: 1.5
16 | 
17 |   metric:
18 |     type: QuadMetric
19 |     args:
20 |       is_output_polygon: false
21 | 
22 | 
23 | dataset:
24 |   validate_dataset:
25 |       data_path: ['/data/ocdnet/iamdata/test']
26 |       args:
27 |         pre_processes:
28 |           - type: Resize2D
29 |             args:
30 |               short_size:
31 |                 - 2464
32 |                 - 3520
33 |               resize_text_polys: true
34 |         img_mode: BGR
35 |         filter_keys: []
36 |         ignore_tags: ['*', '###']
37 |       loader:
38 |         batch_size: 1
39 |         shuffle: false
40 |         pin_memory: false
41 |         num_workers: 4
42 |         
43 | 
44 | 


--------------------------------------------------------------------------------
/tao_action_recognition/tensorrt_inference/trt_inference/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 NVIDIA CORPORATION. All rights reserved.
 2 | 
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | # of this software and associated documentation files (the "Software"), to deal
 5 | # in the Software without restriction, including without limitation the rights
 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | # copies of the Software, and to permit persons to whom the Software is
 8 | # furnished to do so, subject to the following conditions:
 9 | 
10 | # The above copyright notice and this permission notice shall be included in all
11 | # copies or substantial portions of the Software.
12 | 
13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | # SOFTWARE.
20 | 
21 | from .engine import *
22 | 


--------------------------------------------------------------------------------
/tao_pointpillars/tensorrt_sample/include/postprocess.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  * http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | #ifndef POSTPROCESS_H_
19 | #define POSTPROCESS_H_
20 | 
21 | #include <vector>
22 | 
23 | struct Bndbox {
24 |     float x;
25 |     float y;
26 |     float z;
27 |     float w;
28 |     float l;
29 |     float h;
30 |     float rt;
31 |     int id;
32 |     float score;
33 |     Bndbox(){};
34 |     Bndbox(float x_, float y_, float z_, float l_, float w_, float h_, float rt_, int id_, float score_)
35 |         : x(x_), y(y_), z(z_), w(w_), l(l_), h(h_), rt(rt_), id(id_), score(score_) {}
36 | };
37 | 
38 | int nms_cpu(std::vector<Bndbox> bndboxes, const float nms_thresh,
39 |             std::vector<Bndbox> &nms_pred, const int pre_nms_top_n);
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/tao_ocdr/handwritten/specs/ocr/experiment.yaml:
--------------------------------------------------------------------------------
 1 | results_dir: /results
 2 | encryption_key: nvidia_tao
 3 | model:
 4 |   TPS: True
 5 |   backbone: ResNet
 6 |   feature_channel: 512
 7 |   sequence: BiLSTM
 8 |   hidden_size: 256
 9 |   prediction: CTC
10 |   quantize: False
11 |   input_width: 100
12 |   input_height: 32
13 |   input_channel: 1
14 | dataset:
15 |   train_dataset_dir: []
16 |   val_dataset_dir: /data/test/lmdb
17 |   character_list_file: /data/character_list
18 |   max_label_length: 25
19 |   batch_size: 32
20 |   workers: 4
21 |   augmentation:
22 |     keep_aspect_ratio: False
23 | train:
24 |   seed: 1111
25 |   gpu_ids: [0]
26 |   optim:
27 |     name: "adadelta"
28 |     lr: 0.1
29 |   clip_grad_norm: 5.0
30 |   num_epochs: 10
31 |   checkpoint_interval: 2
32 |   validation_interval: 1
33 | evaluate:
34 |   gpu_id: 0
35 |   checkpoint: "??"
36 |   test_dataset_dir: "??"
37 |   results_dir: "${results_dir}/evaluate"
38 | prune:
39 |   gpu_id: 0
40 |   checkpoint: "??"
41 |   results_dir: "${results_dir}/prune"
42 |   prune_setting:
43 |     mode: experimental_hybrid
44 |     amount: 0.4
45 |     granularity: 8
46 |     raw_prune_score: L1
47 | inference:
48 |   gpu_id: 0
49 |   checkpoint: "??"
50 |   inference_dataset_dir: "??"
51 |   results_dir: "${results_dir}/inference"
52 | export:
53 |   gpu_id: 0
54 |   checkpoint: "??"
55 |   results_dir: "${results_dir}/export"
56 | dataset_convert:
57 |   input_img_dir: "??"
58 |   gt_file: "??"
59 |   results_dir: "${results_dir}/convert_dataset"
60 | gen_trt_engine:
61 |   onnx_file: "??"
62 |   results_dir: "${results_dir}/convert_dataset"
63 | 


--------------------------------------------------------------------------------
/tao_retinanet/README.md:
--------------------------------------------------------------------------------
 1 | # Sample to estimate best scales and aspect ratio values for TAO retinanet
 2 | 
 3 | This is an experimental sample to estimate best scales and aspect ratio values for TAO retinanet:
 4 | 
 5 | ```
 6 | retinanet_config {
 7 |   aspect_ratios_global: "[1.0, 2.0, 0.5]"
 8 |   scales: "[0.05, 0.2, 0.35, 0.5, 0.65, 0.8]"
 9 | ```
10 | 
11 | Please do try more parameters for best model performance.
12 | 
13 | 
14 | ## Detailed Steps
15 | 
16 | ### Download kitti dataset
17 | 
18 | Assume link below has the txt label files:
19 | ```
20 | /home/user/tlt-experiments/data/training/label_2/
21 | ```
22 | 
23 | 
24 | 
25 | ### Prepare parameters for sample to estimate optimal values
26 | 
27 | #### Change tao_retinanet_scales_aspect_ratio_estimate.py to point to correct folder for labels
28 | ```
29 | folder="/home/user/tlt-experiments/data/training/label_2/"
30 | ```
31 | 
32 | 
33 | #### Change tao_retinanet_scales_aspect_ratio_estimate.py to set shorten value of image width and image height
34 | ```
35 | shorter_length_of_image = 375
36 | ```
37 | 
38 | 
39 | ##### Change tao_retinanet_scales_aspect_ratio_estimate.py to remove outliers for aspect ratios
40 | ```
41 | limit_max_ar=4
42 | ```
43 | 
44 | 
45 | 
46 | ### Run sample to estimate optimal values
47 | 
48 | ```
49 | python tao_retinanet_scales_aspect_ratio_estimate.py
50 | ```
51 | 
52 | 
53 | 
54 | ### Running log with kitti dataset
55 | 
56 | 
57 | ```
58 | scales:
59 |      [0.0691874  0.13098365 0.21473368 0.33218772 0.48606437 0.82403735]
60 | aspect ratios from algo
61 |      [0.52653116 1.36425734 2.43270715]
62 | aspect ratios considering 1.0:
63 |      [0.52653116 1.         2.43270715]
64 | ```
65 | 
66 | 


--------------------------------------------------------------------------------
/tao_action_recognition/tensorrt_inference/README.md:
--------------------------------------------------------------------------------
 1 | # TensorRT inference sample for TAO ActionRecognitionNet
 2 | 
 3 | ## Introduction
 4 | This is a TensorRT inference sample with TAO ActionRecognitionNet deployable model. This sample will consume TensorRT engine and sequence of images and predict the people's action in those images.
 5 | 
 6 | ## Prequisites
 7 | `TensorRT`, `numpy`, `PIL` is needed for this sample. You can try TensorRT docker image on [NGC](https://ngc.nvidia.com/catalog/containers/nvidia:tensorrt) for easily building environment. 
 8 | 
 9 | You also need to download `tao-converter` from [TAO toolkit](https://developer.nvidia.com/tao-toolkit-get-started) to convert the encrypted tao model to TensorRT engine.
10 | 
11 | ## Steps to run inference:
12 | 
13 | ```sh
14 | # Download the deployable action recognition model from NGC
15 | wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/tao/actionrecognitionnet/versions/deployable_v1.0/zip -O actionrecognitionnet_deployable_v1.0.zip
16 | 
17 | # Generate TensorRT engine of action recognition model
18 | # generate engine of 2D model:
19 | tao-converter resnet18_2d_rgb_hmdb5_32.etlt -k nvidia_tao -p input_rgb,1x96x224x224,1x96x224x224,1x96x224x224 -e trt2d.engine -t fp16
20 | # generate engine of 3D model:
21 | tao-converter resnet18_3d_rgb_hmdb5_32.etlt -k nvidia_tao -p input_rgb,1x3x32x224x224,1x3x32x224x224,1x3x32x224x224 -e trt3d.engine -t fp16
22 | 
23 | # run inference:
24 | # run inference with 2D engine:
25 | python ar_trt_inference.py --input_images_folder=/path/to/images --trt_engine=./trt2d.engine --input_2d
26 | # run inference with 3D engine:
27 | python ar_trt_inference.py --input_images_folder=/path/to/images --trt_engine=./trt3d.engine
28 | ```
29 | 


--------------------------------------------------------------------------------
/tao_object_dection/yolov4/specs/classification_cspdarknet53.txt:
--------------------------------------------------------------------------------
 1 | model_config {
 2 |   # Model Architecture can be chosen from:
 3 |   # ['resnet', 'vgg', 'googlenet', 'alexnet']
 4 |   arch: "cspdarknet"
 5 | 
 6 |   # for resnet --> n_layers can be [10, 18, 50]
 7 |   # for vgg --> n_layers can be [16, 19]
 8 |   n_layers: 53
 9 |   use_batch_norm: True
10 |   use_bias: False
11 |   use_imagenet_head: True
12 |   all_projections: False
13 |   use_pooling: True
14 |   # if you want to use the pretrained model,
15 |   # image size should be "3,224,224"
16 |   # otherwise, it can be "3, X, Y", where X,Y >= 16
17 |   input_image_size: "3,224,224"
18 | }
19 | train_config {
20 |   train_dataset_path: "/workspace/tao-experiments/data/imagenet2012/train"
21 |   val_dataset_path: "/workspace/tao-experiments/data/imagenet2012/val"
22 |   # Only ['sgd', 'adam'] are supported for optimizer
23 |   optimizer {
24 |     sgd {
25 |         lr: 0.01
26 |         decay: 0.0
27 |         momentum: 0.9
28 |         nesterov: False
29 |     }
30 |   }
31 |   preprocess_mode: "torch"
32 |   enable_random_crop: True
33 |   enable_center_crop: True
34 |   label_smoothing: 0.0
35 |   batch_size_per_gpu: 64
36 |   n_epochs: 200
37 |   mixup_alpha: 0.2
38 | 
39 |   # Number of CPU cores for loading data
40 |   n_workers: 40
41 | 
42 |   # regularizer
43 |   reg_config {
44 |     # regularizer type can be "L1", "L2" or "None".
45 |     type: "L2"
46 |     # if the type is not "None",
47 |     # scope can be either "Conv2D" or "Dense" or both.
48 |     scope: "Conv2D,Dense"
49 |     # 0 < weight decay < 1
50 |     weight_decay: 0.00003
51 |   }
52 | 
53 |   # learning_rate
54 |   lr_config {
55 |     cosine{
56 |         learning_rate: 0.05
57 |         soft_start: 0.0
58 |         min_lr_ratio: 0.001
59 |     }
60 |   }
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/tao_action_recognition/specs/i3d_rgb_3d_64_export.yaml:
--------------------------------------------------------------------------------
 1 | output_file: /workspace/rgb_3d_hmdb/i3d_rgb3d_64.etlt
 2 | model:  /workspace/rgb_3d_hmdb/i3d_rgb3d_64.tlt
 3 | batch_size: 1
 4 | encryption_key: nvidia_tlt
 5 | gpu_id: 0
 6 | model_config:
 7 |   model_type: rgb
 8 |   input_type: "3d"
 9 |   backbone: i3d
10 |   rgb_seq_length: 64
11 |   sample_strategy: consecutive
12 |   sample_rate: 1
13 | dataset_config:
14 |   train_dataset_dir: /raid/HMDB51_splitted_org/train
15 |   val_dataset_dir: /raid/HMDB51_splitted_org/test
16 |   label_map:
17 |     throw: 0
18 |     push: 1
19 |     dribble: 2
20 |     shoot_gun: 3
21 |     hug: 4
22 |     smile: 5
23 |     fall_floor: 6
24 |     chew: 7
25 |     turn: 8
26 |     cartwheel: 9
27 |     stand: 10
28 |     draw_sword: 11
29 |     drink: 12
30 |     eat: 13
31 |     talk: 14
32 |     climb: 15
33 |     smoke: 16
34 |     pick: 17
35 |     shake_hands: 18
36 |     pushup: 19
37 |     swing_baseball: 20
38 |     somersault: 21
39 |     walk: 22
40 |     flic_flac: 23
41 |     run: 24
42 |     ride_horse: 25
43 |     sit: 26
44 |     kiss: 27
45 |     situp: 28
46 |     hit: 29
47 |     handstand: 30
48 |     climb_stairs: 31
49 |     pour: 32
50 |     shoot_bow: 33
51 |     kick_ball: 34
52 |     brush_hair: 35
53 |     sword_exercise: 36
54 |     dive: 37
55 |     fencing: 38
56 |     golf: 39
57 |     sword: 40
58 |     shoot_ball: 41
59 |     clap: 42
60 |     punch: 43
61 |     catch: 44
62 |     jump: 45
63 |     kick: 46
64 |     ride_bike: 47
65 |     wave: 48
66 |     laugh: 49
67 |     pullup: 50
68 |   output_shape:
69 |   - 224
70 |   - 224
71 |   batch_size: 8
72 |   workers: 8
73 |   augmentation_config:
74 |     train_crop_type: random_crop
75 |     horizontal_flip_prob: 0.5
76 |     rgb_input_mean: [0.485, 0.456, 0.406]
77 |     rgb_input_std: [0.229, 0.224, 0.225]
78 |     val_center_crop: True
79 | 


--------------------------------------------------------------------------------
/tao_classification/mobilenet_v2/mobilenetv2_imagenet2012.txt:
--------------------------------------------------------------------------------
 1 | model_config {
 2 |   # Model Architecture can be chosen from:
 3 |   # ['resnet', 'vgg', 'googlenet', 'alexnet']
 4 |   arch: "mobilenet_v2"
 5 |   # for resnet --> n_layers can be [10, 18, 50]
 6 |   # for vgg --> n_layers can be [16, 19]
 7 |   use_bias: False
 8 |   use_imagenet_head: True
 9 |   use_batch_norm: True
10 |   resize_interpolation_method: BICUBIC
11 |   # if you want to use the pretrained model,
12 |   # image size should be "3,224,224"
13 |   # otherwise, it can be "3, X, Y", where X,Y >= 16
14 |   input_image_size: "3,224,224"
15 | }
16 | train_config {
17 |   preprocess_mode: "tf"
18 |   train_dataset_path: "/raid/ImageNet2012/ImageNet2012/train"
19 |   val_dataset_path: "/raid/ImageNet2012/ImageNet2012/val"
20 |   # Only ['sgd', 'adam'] are supported for optimizer
21 |   optimizer {
22 |     sgd {
23 |     lr: 0.045
24 |     decay: 0.0
25 |     momentum: 0.9
26 |     nesterov: False
27 |   }
28 |   }
29 |   batch_size_per_gpu: 96
30 |   n_epochs: 420
31 |   # Number of CPU cores for loading data
32 |   n_workers: 16
33 |   # regularizer
34 |   reg_config {
35 |     # regularizer type can be "L1", "L2" or "None".
36 |     type: "L2"
37 |     # if the type is not "None",
38 |     # scope can be either "Conv2D" or "Dense" or both.
39 |     scope: "Conv2D,Dense"
40 |     # 0 < weight decay < 1
41 |     weight_decay: 5e-5
42 |   }
43 |   lr_config {
44 |     cosine {
45 |       learning_rate: 0.05
46 |       min_lr_ratio: 0.001
47 |     }
48 |   }
49 |   enable_random_crop: True
50 |   enable_center_crop: True
51 |   enable_color_augmentation: True
52 |   mixup_alpha: 0.2
53 |   label_smoothing: 0.1
54 | }
55 | eval_config {
56 |   eval_dataset_path: "/raid/ImageNet2012/ImageNet2012/val"
57 |   model_path: "/workspace/classification/mobilenet_v2/results/weights/mobilenet_v2_420.tlt"
58 |   top_k: 1
59 |   batch_size: 32
60 |   n_workers: 8
61 |   enable_center_crop: True
62 | }
63 | 


--------------------------------------------------------------------------------
/tao_pointpillars/tensorrt_sample/README.md:
--------------------------------------------------------------------------------
 1 | # PointPillar TensorRT Inference Sample
 2 | TensorRT Inference Sample for PointPillars in NVIDIA TAO Toolkit
 3 | 
 4 | # PointPillars inference with TensorRT
 5 | This repository provides an end-to-end inference sample for [PointPillars](https://arxiv.org/abs/1812.05784) with TensorRT.
 6 | 
 7 | The input model is the TensorRT engine generated by NVIDIA TAO toolkit with `tao-converter`.
 8 | 
 9 | ## Detailed Steps
10 | 
11 | * Install TensorRT 8.2(or above)
12 | 
13 | * Install TensorRT OSS 22.02
14 | ```
15 | git clone -b 22.02 https://github.com/NVIDIA/TensorRT.git TensorRT
16 | cd TensorRT
17 | git submodule update --init --recursive
18 | mkdir -p build && cd build
19 | cmake .. -DCUDA_VERSION=$CUDA_VERSION -DGPU_ARCHS=$GPU_ARCHS
20 | make nvinfer_plugin -j$(nproc)
21 | make nvinfer_plugin_static -j$(nproc)
22 | cp libnvinfer_plugin.so.8.2.* /usr/lib/$ARCH-linux-gnu/libnvinfer_plugin.so.8.2.3
23 | cp libnvinfer_plugin_static.a /usr/lib/$ARCH-linux-gnu/libnvinfer_plugin_static.a
24 | ```
25 | 
26 | * Train and export the `.etlt` model with TAO Toolkit
27 | 
28 | * Generate TensorRT engine with `tao-converter`
29 | 
30 | ```
31 | tao-converter  -k $KEY  \
32 |                -e $USER_EXPERIMENT_DIR/trt.fp16.engine \
33 |                -p points,1x204800x4,1x204800x4,1x204800x4 \
34 |                -p num_points,1,1,1 \
35 |                -t fp16 \
36 |                $USER_EXPERIMENT_DIR/pointpillars_deployable.etlt
37 | ```
38 | 
39 | * Clone the repo
40 | 
41 | ```
42 | cd ~
43 | git clone https://github.com/NVIDIA-AI-IOT/tao_toolkit_recipes.git
44 | cd tao_toolkit_recipes
45 | git lfs pull
46 | ```
47 | 
48 | * Run the TensorRT Inference
49 | 
50 | ```
51 | cd tao_pointpillars/tensorrt_sample/test
52 | mkdir build
53 | cd build
54 | cmake .. -DCUDA_VERSION=<CUDA_VERSION>
55 | make -j8
56 | ./pointpillars -e /path/to/tensorrt/engine -l ../../data/102.bin  -t 0.01 -c Vehicle,Pedestrain,Cyclist -n 4096 -p -d fp16
57 | ```
58 | 


--------------------------------------------------------------------------------
/tao_action_recognition/specs/train_rgb_3d_64_i3d.yaml:
--------------------------------------------------------------------------------
 1 | output_dir: ./exp0_20__/rgb_3d_hmdb
 2 | encryption_key: nvidia_tlt
 3 | gpu_ids: [0, 1, 2, 3, 4, 5, 6, 7]
 4 | model_config:
 5 |   model_type: rgb
 6 |   input_type: "3d"
 7 |   backbone: i3d
 8 |   rgb_seq_length: 64
 9 |   rgb_pretrained_model_path: /workspace/action_recognition/i3d_pretrained/rgb_imagenet_kinetics.pt
10 |   rgb_pretrained_num_classes: 400
11 |   sample_strategy: consecutive
12 |   sample_rate: 1
13 | train_config:
14 |   optim:
15 |     lr: 0.01
16 |     momentum: 0.9
17 |     weight_decay: 0.0000001
18 |     lr_steps: [12, 25]
19 |     lr_decay: 0.1
20 |   epochs: 35
21 | dataset_config:
22 |   train_dataset_dir: /raid/HMDB51_splitted_org/train
23 |   val_dataset_dir: /raid/HMDB51_splitted_org/test
24 |   label_map:
25 |     throw: 0
26 |     push: 1
27 |     dribble: 2
28 |     shoot_gun: 3
29 |     hug: 4
30 |     smile: 5
31 |     fall_floor: 6
32 |     chew: 7
33 |     turn: 8
34 |     cartwheel: 9
35 |     stand: 10
36 |     draw_sword: 11
37 |     drink: 12
38 |     eat: 13
39 |     talk: 14
40 |     climb: 15
41 |     smoke: 16
42 |     pick: 17
43 |     shake_hands: 18
44 |     pushup: 19
45 |     swing_baseball: 20
46 |     somersault: 21
47 |     walk: 22
48 |     flic_flac: 23
49 |     run: 24
50 |     ride_horse: 25
51 |     sit: 26
52 |     kiss: 27
53 |     situp: 28
54 |     hit: 29
55 |     handstand: 30
56 |     climb_stairs: 31
57 |     pour: 32
58 |     shoot_bow: 33
59 |     kick_ball: 34
60 |     brush_hair: 35
61 |     sword_exercise: 36
62 |     dive: 37
63 |     fencing: 38
64 |     golf: 39
65 |     sword: 40
66 |     shoot_ball: 41
67 |     clap: 42
68 |     punch: 43
69 |     catch: 44
70 |     jump: 45
71 |     kick: 46
72 |     ride_bike: 47
73 |     wave: 48
74 |     laugh: 49
75 |     pullup: 50
76 |   output_shape:
77 |   - 224
78 |   - 224
79 |   batch_size: 8
80 |   workers: 8
81 |   augmentation_config:
82 |     train_crop_type: random_crop
83 |     horizontal_flip_prob: 0.5
84 |     rgb_input_mean: [0.485, 0.456, 0.406]
85 |     rgb_input_std: [0.229, 0.224, 0.225]
86 |     val_center_crop: True
87 | 


--------------------------------------------------------------------------------
/tao_action_recognition/data_generation/jetson_of/vpi/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without
 4 | # modification, are permitted provided that the following conditions
 5 | # are met:
 6 | #  * Redistributions of source code must retain the above copyright
 7 | #    notice, this list of conditions and the following disclaimer.
 8 | #  * Redistributions in binary form must reproduce the above copyright
 9 | #    notice, this list of conditions and the following disclaimer in the
10 | #    documentation and/or other materials provided with the distribution.
11 | #  * Neither the name of NVIDIA CORPORATION nor the names of its
12 | #    contributors may be used to endorse or promote products derived
13 | #    from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 | # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | cmake_minimum_required(VERSION 3.5)
28 | 
29 | project(jetson_optflow_flow)
30 | 
31 | set(CMAKE_CXX_STANDARD 11)
32 | 
33 | find_package(vpi 1.1 REQUIRED)
34 | find_package(OpenCV REQUIRED)
35 | 
36 | add_executable(${PROJECT_NAME} main.cpp)
37 | target_link_libraries(${PROJECT_NAME} vpi opencv_core
38 |     opencv_imgproc)
39 | 
40 | if(OpenCV_VERSION VERSION_LESS 3)
41 |     target_link_libraries(${PROJECT_NAME} opencv_highgui)
42 | else()
43 |     target_link_libraries(${PROJECT_NAME} opencv_imgcodecs opencv_videoio)
44 | endif()
45 | 


--------------------------------------------------------------------------------
/tao_action_recognition/data_generation/convert_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 NVIDIA CORPORATION. All rights reserved.
 2 | 
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | # of this software and associated documentation files (the "Software"), to deal
 5 | # in the Software without restriction, including without limitation the rights
 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | # copies of the Software, and to permit persons to whom the Software is
 8 | # furnished to do so, subject to the following conditions:
 9 | 
10 | # The above copyright notice and this permission notice shall be included in all
11 | # copies or substantial portions of the Software.
12 | 
13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | # SOFTWARE.
20 | 
21 | import argparse
22 | import os
23 | import cv2
24 | 
25 | 
26 | def clip_video(input_video_path, output_path):
27 |     cap = cv2.VideoCapture(input_video_path)
28 |     frame_cnt = cap.get(cv2.CAP_PROP_FRAME_COUNT)
29 |     print("f cnt: {}".format(frame_cnt))
30 |     height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
31 |     width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
32 | 
33 |     img_id = 1
34 |     while cap.isOpened():
35 |         ret, frame = cap.read()
36 |         img_name = os.path.join(output_path, str(img_id).zfill(6)+".png")
37 |         if ret:
38 |             cv2.imwrite(img_name, frame)
39 |         else:
40 |             break
41 |         img_id += 1
42 | 
43 | 
44 | if __name__ == "__main__":
45 | 
46 |     parser = argparse.ArgumentParser(description='Clip video to RGB frames')
47 |     parser.add_argument('--input_video', type=str, help='input video path')
48 |     parser.add_argument('--output_folder', type=str, help='output images path')
49 |     args = parser.parse_args()
50 |     clip_video(args.input_video, args.output_folder)
51 | 


--------------------------------------------------------------------------------
/tao_ocdr/handwritten/specs/ocd/train.yaml:
--------------------------------------------------------------------------------
 1 | num_gpus: 1
 2 | 
 3 | model:
 4 |   load_pruned_graph: False
 5 |   pruned_graph_path: '/results/prune/pruned_0.1.pth'
 6 |   pretrained_model_path: '/data/ocdnet/ocdnet_deformable_resnet18.pth'
 7 |   backbone: deformable_resnet18
 8 | 
 9 | train:
10 |   results_dir: /results/ocd/train
11 |   num_epochs: 300
12 |   #resume_training_checkpoint_path: '/results/train/resume.pth'
13 |   checkpoint_interval: 1
14 |   validation_interval: 1
15 |   trainer:
16 |     clip_grad_norm: 5.0
17 | 
18 |   optimizer:
19 |     type: Adam
20 |     args:
21 |       lr: 0.001
22 | 
23 |   lr_scheduler:
24 |     type: WarmupPolyLR
25 |     args:
26 |       warmup_epoch: 3
27 | 
28 |   post_processing:
29 |     type: SegDetectorRepresenter
30 |     args:
31 |       thresh: 0.45
32 |       box_thresh: 0.55
33 |       max_candidates: 1000
34 |       unclip_ratio: 1.5
35 | 
36 |   metric:
37 |     type: QuadMetric
38 |     args:
39 |       is_output_polygon: false
40 | 
41 | 
42 | dataset:
43 |   train_dataset:
44 |       data_path: ['/data/ocdnet/iamdata/train']
45 |       args:
46 |         pre_processes:
47 |           - type: IaaAugment
48 |             args:
49 |               - {'type':Fliplr, 'args':{'p':0.5}}
50 |               - {'type': Affine, 'args':{'rotate':[-10,10]}}
51 |               - {'type':Resize,'args':{'size':[0.5,3]}}
52 |           - type: EastRandomCropData
53 |             args:
54 |               size: [1024,1024]
55 |               max_tries: 50
56 |               keep_ratio: true
57 |           - type: MakeBorderMap
58 |             args:
59 |               shrink_ratio: 0.4
60 |               thresh_min: 0.3
61 |               thresh_max: 0.7
62 |           - type: MakeShrinkMap
63 |             args:
64 |               shrink_ratio: 0.4
65 |               min_text_size: 8
66 | 
67 |         img_mode: BGR
68 |         filter_keys: [img_path,img_name,text_polys,texts,ignore_tags,shape]
69 |         ignore_tags: ['*', '###']
70 |       loader:
71 |         batch_size: 4
72 |         pin_memory: true
73 |         num_workers: 12
74 | 
75 |   validate_dataset:
76 |       data_path: ['/data/ocdnet/iamdata/test']
77 |       args:
78 |         pre_processes:
79 |           - type: Resize2D
80 |             args:
81 |               short_size:
82 |                 - 2464
83 |                 - 3520
84 |               resize_text_polys: true
85 |         img_mode: BGR
86 |         filter_keys: []
87 |         ignore_tags: ['*', '###']
88 |       loader:
89 |         batch_size: 1
90 |         pin_memory: false
91 |         num_workers: 4
92 | 
93 | 


--------------------------------------------------------------------------------
/tao_pointpillars/tensorrt_sample/test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | cmake_minimum_required(VERSION 2.8.7)
17 | set(PROJECT_NAME pointpillars)
18 | EXECUTE_PROCESS( COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE ARCH )
19 | message( STATUS "Architecture: ${ARCH}" )
20 | 
21 | find_package(CUDA REQUIRED)
22 | 
23 | set(CUDA_VERSION 11.3)
24 | set(CUDA_TOOLKIT_ROOT_DIR /usr/local/cuda-${CUDA_VERSION})
25 | 
26 | SET(CMAKE_BUILD_TYPE "Release")
27 | add_compile_options(-W)
28 | add_compile_options(-std=c++11)
29 | 
30 | set(SMS 50 52 53 60 61 62 70 72 75 80 86)
31 | foreach(sm ${SMS})
32 |     set(GENCODE ${GENCODE} -gencode arch=compute_${sm},code=sm_${sm})
33 | endforeach()
34 | list(GET SMS -1 LATEST_SM)
35 | set(GENCODE "${GENCODE} -gencode arch=compute_${LATEST_SM},code=compute_${LATEST_SM}")
36 | 
37 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}
38 |     -ccbin ${CMAKE_CXX_COMPILER}
39 |     -Xcompiler -DWIN_INTERFACE_CUSTOM
40 |     -Xcompiler -I/usr/${ARCH}-linux-gnu/include/
41 |     -Xlinker -lsocket
42 |     -Xlinker -rpath=/usr/lib/${ARCH}-linux-gnu/
43 |     -Xlinker -rpath=/usr/${ARCH}-linux-gnu/lib/
44 |     -Xlinker -L/usr/lib/${ARCH}-linux-gnu/
45 |     -Xlinker -L/usr/${ARCH}-linux-gnu/lib/
46 | )
47 | 
48 | set(TENSORRT_INCLUDE_DIRS /usr/include/${ARCH}-linux-gnu/)
49 | set(TENSORRT_LIBRARY_DIRS /usr/lib/${ARCH}-linux-gnu/)
50 | 
51 | include_directories(
52 |     ${CUDA_INCLUDE_DIRS}
53 |     ${TENSORRT_INCLUDE_DIRS}
54 |     ../include/
55 | )
56 | 
57 | link_directories(
58 |     ${TENSORRT_LIBRARY_DIRS}
59 |     /usr/lib/${ARCH}-linux-gnu
60 |     /usr/${ARCH}-linux-gnu/lib/
61 | )
62 | 
63 | file(GLOB_RECURSE SOURCE_FILES
64 |     ../src/*.cu
65 |     ../src/*.cpp
66 | )
67 | 
68 | cuda_add_executable(${PROJECT_NAME} main.cpp ${SOURCE_FILES})
69 | 
70 | target_link_libraries(${PROJECT_NAME}
71 |     libnvinfer.so
72 |     libnvonnxparser.so
73 |     libnvinfer_plugin.so
74 | )
75 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 
24 | VPI dense optical flow sample
25 | 
26 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
27 | 
28 | Redistribution and use in source and binary forms, with or without
29 | modification, are permitted provided that the following conditions
30 | are met:
31 |  * Redistributions of source code must retain the above copyright
32 |    notice, this list of conditions and the following disclaimer.
33 |  * Redistributions in binary form must reproduce the above copyright
34 |    notice, this list of conditions and the following disclaimer in the
35 |    documentation and/or other materials provided with the distribution.
36 |  * Neither the name of NVIDIA CORPORATION nor the names of its
37 |    contributors may be used to endorse or promote products derived
38 |    from this software without specific prior written permission.
39 | 
40 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
41 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
43 | PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
44 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
45 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
46 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
47 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
48 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
49 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
50 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
51 | 


--------------------------------------------------------------------------------
/tao_action_recognition/data_generation/README.md:
--------------------------------------------------------------------------------
 1 | # Data generation sample for TAO ActionRecognitionNet
 2 | 
 3 | ## Introduction
 4 | This projects contains the sample scripts to generate dataset to proper format used by TAO ActionRecognitionNet
 5 | 
 6 | - `convert_dataset.py` : Convert the video to RGB frames.
 7 | - `convert_of.py` : Convert the optical flow vectors to grayscale images.  
 8 | - `split_dataset.py` : Script to split the HMDB51 dataset.
 9 | - `load_tracks.py` / `save_tracks_shad.py` : Scripts to process SHAD dataset's annotation
10 | 
11 | 
12 | ## Prequisites
13 |  - xmltodict
14 |  - cv2
15 | 
16 | ```
17 | pip install xmltodict opencv-python
18 | ```
19 | 
20 | And we use the sample application `AppOFCuda` in Nvidia optical flow [SDK](https://developer.nvidia.com/opticalflow-sdk) to generate optical flow of frames. You could get this app by compiling by yourself or download the compiled binary in on [NGC](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/tao/resources/cv_samples/version) (It is packaged with action recognition notebook). 
21 | 
22 | ## Steps to generate dataset for TAO ActionRecognitionNet
23 | We provide 3 all_in_one scripts:
24 | 
25 | - `preprocess_HMDB_RGB.sh`: Generate RGB dataset of HMDB51
26 | - `preprocess_SHAD_RGB.sh`: Generate RGB dataset of SHAD
27 | - `preprocess_SHAD.sh`: Generate RGB+OF dataset of SHAD
28 | 
29 | ### SHAD dataset
30 | 
31 | Dataset [URL](https://best.sjtu.edu.cn/Data/View/990)
32 | 
33 | ```sh
34 | # make directory to contain 
35 | mkdir -p train_raw
36 | 
37 | # Download the dataset you need and unrar:
38 | wget -P ./ https://best.sjtu.edu.cn/Assets/userfiles/sys_eb538c1c-65ff-4e82-8e6a-a1ef01127fed/files/ZIP/Bend-train.rar
39 | unrar x Bend-train.rar train_raw
40 | ...
41 | 
42 | # Generate RGB dataset with all_in_one script:
43 | ./preprocess_SHAD_RGB.sh train_raw train
44 | # Or you can generate RGB+OF dataset:
45 | # ./preprocess_SHAD.sh train_raw train
46 | 
47 | ``` 
48 | 
49 | ### HMDB51 dataset
50 | 
51 | Dataset [URL](https://serre-lab.clps.brown.edu/resource/hmdb-a-large-human-motion-database/)
52 | 
53 | ```sh
54 | # download the dataset and unrar:
55 | wget http://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/hmdb51_org.rar
56 | unrar x hmdb51_org.rar video_rar
57 | 
58 | # unrar the videos packages:
59 | unrar x ./video_rar/climb.rar ./HMDB51_videos/
60 | unrar x ./video_rar/run.rar ./HMDB51_videos/
61 | ...
62 | 
63 | # run all_in_one script:
64 | ./preprocess_HMDB_RGB.sh ./HMDB51_videos ./HMDB51
65 | 
66 | # split the dataset if needed:
67 | # python split_dataset.py
68 | 
69 | ```
70 | 
71 | ### Common data process pipeline:
72 | The data process pipeline in above scripts can be concluded in following diagrams:
73 | - For RGB-only model:
74 | ![rgb_only_pipe](resources/rgb_preprocess_pipe.png)
75 | - For OF-only model:
76 | ![of_only_pipe](resources/of_preprocess_pipe.png)


--------------------------------------------------------------------------------
/tao_action_recognition/data_generation/split_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 NVIDIA CORPORATION. All rights reserved.
 2 | 
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | # of this software and associated documentation files (the "Software"), to deal
 5 | # in the Software without restriction, including without limitation the rights
 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | # copies of the Software, and to permit persons to whom the Software is
 8 | # furnished to do so, subject to the following conditions:
 9 | 
10 | # The above copyright notice and this permission notice shall be included in all
11 | # copies or substantial portions of the Software.
12 | 
13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | # SOFTWARE.
20 | 
21 | import os
22 | import shutil
23 | import sys
24 | 
25 | root_path = sys.argv[1]
26 | split_files_path = sys.argv[2]
27 | target_train_path = sys.argv[3]
28 | target_test_path = sys.argv[4]
29 | 
30 | if not os.path.exists(target_train_path):
31 |     os.makedirs(target_train_path)
32 | if not os.path.exists(target_test_path):
33 |     os.makedirs(target_test_path)
34 | 
35 | train_cnt = 0
36 | test_cnt = 0
37 | for class_name in os.listdir(root_path):
38 |     split_files = os.path.join(split_files_path, class_name + "_test_split1.txt")
39 |     cls_train_path = os.path.join(target_train_path, class_name)
40 |     cls_test_path = os.path.join(target_test_path, class_name)
41 |     if not os.path.exists(cls_train_path):
42 |         os.makedirs(cls_train_path)
43 |     if not os.path.exists(cls_test_path):
44 |         os.makedirs(cls_test_path)
45 | 
46 |     with open(split_files, "r") as f:
47 |         split_list = f.readlines()
48 | 
49 |     for line in split_list:
50 |         video_name, label = line.split()
51 |         video_name = video_name.split(".")[0]
52 |         cur_path = os.path.join(root_path, class_name, video_name)
53 |         if int(label) == 1:
54 |             train_cnt += 1
55 |             des_path = os.path.join(target_train_path, class_name, video_name)
56 |             shutil.move(cur_path, des_path)
57 |         elif int(label) == 2:
58 |             test_cnt += 1
59 |             des_path = os.path.join(target_test_path, class_name, video_name)
60 |             shutil.move(cur_path, des_path)
61 | 
62 | 
63 | print("Split 1: \n Train: {}\n Test: {}".format(train_cnt, test_cnt))
64 | 


--------------------------------------------------------------------------------
/tao_action_recognition/data_generation/preprocess_HMDB_RGB.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 NVIDIA CORPORATION. All rights reserved.
 2 | 
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | # of this software and associated documentation files (the "Software"), to deal
 5 | # in the Software without restriction, including without limitation the rights
 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | # copies of the Software, and to permit persons to whom the Software is
 8 | # furnished to do so, subject to the following conditions:
 9 | 
10 | # The above copyright notice and this permission notice shall be included in all
11 | # copies or substantial portions of the Software.
12 | 
13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | # SOFTWARE.
20 | 
21 | MKDIR(){
22 |     if [ ! -d $1 ]; then
23 |         mkdir -p $1
24 |     fi 
25 | }
26 | 
27 | WORKER_CNT=4
28 | VIDEO_LIST=("NULL" "NULL" "NULL" "NULL")
29 | RGB_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
30 | 
31 | 
32 | RUN_WORKERS(){
33 |     for((i=0;i<$WORKER_CNT;i++)); do
34 |         if [ ${VIDEO_LIST[i]} != "NULL" ]; then 
35 |             python3 ./convert_dataset.py --input_video ${VIDEO_LIST[i]} --output_folder ${RGB_PATH_LIST[i]}
36 |         fi
37 |     done
38 |     wait
39 | }
40 | 
41 | if [ $# -ne 2 ]; then
42 |     echo "USAGE:./preprocess_HMDB_RGB.sh [hmdb_dir] [output_top_dir]"
43 |     exit 1
44 | else
45 |     HMDB_TOP_DIR=$1
46 |     OUTPUT_TOP_DIR=$2
47 |     echo $HMDB_TOP_DIR
48 |     echo $OUTPUT_TOP_DIR
49 |     #TEMP_DIR="./tmp"
50 |     #MKDIR $TEMP_DIR
51 |     MKDIR $OUTPUT_TOP_DIR
52 | fi
53 | 
54 | # 1st stage: unrar rar package:
55 | # for class in $HMDB_TOP_DIR/*; do 
56 | #     unrar x $class $TEMP_DIR > /dev/null &
57 | # done
58 | 
59 | # 2nd stage: Clip video and generate optical flow out of it 
60 | for class in $HMDB_TOP_DIR/*; do 
61 |     CLASS_NAME=$(echo $(basename $class) | cut -d . -f1)
62 |     echo "Preprocess $CLASS_NAME"
63 |     cnt=0 
64 |     # extract the frames 
65 |     for video in $HMDB_TOP_DIR/$CLASS_NAME/*; do
66 |         VIDEO_NAME=$(echo $(basename $video) | cut -d . -f1) 
67 |         RGB_PATH=$OUTPUT_TOP_DIR/$CLASS_NAME/$VIDEO_NAME/"rgb"
68 |         MKDIR $RGB_PATH
69 |         VIDEO_LIST[$cnt]=$video
70 |         RGB_PATH_LIST[$cnt]=$RGB_PATH
71 | 
72 |         cnt=$((cnt + 1))
73 |         if [ $cnt -eq $WORKER_CNT ]; then
74 |             cnt=0
75 |             RUN_WORKERS
76 |             VIDEO_LIST=("NULL" "NULL" "NULL" "NULL")
77 |             RGB_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
78 |         fi 
79 |     done
80 |     if [ $cnt -ne 0 ]; then
81 |         RUN_WORKERS
82 |     fi
83 | done
84 | 
85 | # rm -r $TEMP_DIR
86 | 


--------------------------------------------------------------------------------
/tao_action_recognition/doc/load_I3D.md:
--------------------------------------------------------------------------------
 1 | # Load I3D Kinetics pretrained weights in TAO and finetune on HMDB51
 2 | 
 3 | I3D is a 3D inception architecture proposed in paper *Quo Vadis, Action Recognition? A New Model and the Kinetics Dataset*. In this paper, the authors show us the enormous benefit of pretrained weights on Kinetics400 of I3D architecture for the downstream dataset --- We can get much higher accuracy on other action recognition datasets with Kinetics pretrained weights:
 4 | 
 5 | |Model type|Dataset|Pretrained|Acc|
 6 | |:---:|:---:|:---:|:---:|
 7 | |I3D RGB-Only|HMDB51|ImageNet|49.8%|
 8 | |I3D OF-Only|HMDB51|ImageNet|61.9%|
 9 | |I3D RGB-Only|HMDB51|Kinetics|74.3%|
10 | |I3D OF-Only|HMDB51|Kinetics|77.3%|
11 |  
12 | In TAO Toolkit, we support to use I3D architecture for action recognition and it could alos load the pytorch version of Kinect400 pretrained I3D model to help improve the accuracy of the downstream dataset.
13 | 
14 | ## Load I3D Kinetics pretrained weights and finetune on HMDB51
15 | 
16 | The I3D architecture in TAO Toolkit is following the public [repo](https://github.com/piergiaj/pytorch-i3d). And this repo also contains the [RGB](https://github.com/piergiaj/pytorch-i3d/blob/master/models/rgb_imagenet.pt) and the [Optical flow](https://github.com/piergiaj/pytorch-i3d/blob/master/models/flow_imagenet.pt) pretrained weights converted from DeepMind.
17 | 
18 | To load these models, some config options should be set. Take RGB models as an example, the following are the `model_config` in the training config yaml file to load pretrained I3D RGB pretrained weights. 
19 | 
20 | ```yaml
21 | model_config:
22 |   model_type: rgb
23 |   input_type: 3d
24 |   backbone: i3d
25 |   rgb_seq_length: 64
26 |   rgb_pretrained_model_path: /workspace/action_recognition/i3d_pretrained/rgb_imagenet_kinetics.pt
27 |   rgb_pretrained_num_classes: 400
28 | ```
29 | 
30 | In the above config, the `backbone` is set to `i3d`, `rgb_pretrained_model_path` is set to the path of pretrained pytorch weights and the `rgb_pretrained_num_classes` is set to 400 to match with Kinetics-400 classes. 
31 | 
32 | We provide the [spec](https://github.com/NVIDIA-AI-IOT/tao_toolkit_recipes/blob/main/tao_action_recognition/specs/train_rgb_3d_64_i3d.yaml) to finetune I3D model on HMDB51 dataset. You might get ~75% accuracy after the training with following command.
33 | 
34 | ```shell
35 | tao action_recognition train -e /path/to/train_rgb_3d_64_i3d.yaml -k your_key -r /path/to/results 
36 | ```
37 | 
38 | ## Export the I3D model
39 | The exported I3D model could be consumed by TensorRT 8.2.3 and above. We provide the [spec](https://github.com/NVIDIA-AI-IOT/tao_toolkit_recipes/blob/main/tao_action_recognition/specs/i3d_rgb_3d_64_export.yaml) to export TAO Toolkit trained I3D model. And you could use the following command to export the model to etlt format:
40 | 
41 | ```shell
42 | tao action_recognition export -k your_key -e /path/to/i3d_rgb_3d_64_export.yaml 
43 | ```
44 | 
45 | ## Reference
46 | - [I3D models trained on Kinetics - pytorch version](https://github.com/piergiaj/pytorch-i3d)
47 | - [I3D models trained on Kinetics](https://github.com/piergiaj/pytorch-i3d)


--------------------------------------------------------------------------------
/tao_pointpillars/tensorrt_sample/include/pointpillar.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  *
  5 |  * Licensed under the Apache License, Version 2.0 (the "License");
  6 |  * you may not use this file except in compliance with the License.
  7 |  * You may obtain a copy of the License at
  8 |  *
  9 |  * http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | #ifndef POINTPILLAR_H_
 19 | #define POINTPILLAR_H_
 20 | 
 21 | #include <memory>
 22 | #include "cuda_runtime.h"
 23 | #include "NvInfer.h"
 24 | #include "NvOnnxConfig.h"
 25 | #include "NvOnnxParser.h"
 26 | #include "NvInferRuntime.h"
 27 | #include "postprocess.h"
 28 | 
 29 | #define PERFORMANCE_LOG 1
 30 | 
 31 | // Logger for TensorRT
 32 | class Logger : public nvinfer1::ILogger {
 33 |   public:
 34 |     void log(Severity severity, const char* msg) noexcept override {
 35 |         // suppress info-level message
 36 |         //if (severity == Severity::kERROR || severity == Severity::kINTERNAL_ERROR || severity == Severity::kINFO ) {
 37 |         if (severity == Severity::kERROR || severity == Severity::kINTERNAL_ERROR) {
 38 |             std::cerr << "trt_infer: " << msg << std::endl;
 39 |         }
 40 |     }
 41 | };
 42 | 
 43 | class TRT {
 44 |   private:
 45 |     cudaEvent_t start, stop;
 46 | 
 47 |     float elapsedTime = 0.0f;
 48 |     Logger gLogger_;
 49 |     nvinfer1::IExecutionContext *context = nullptr;
 50 |     nvinfer1::ICudaEngine *engine = nullptr;
 51 | 
 52 |     cudaStream_t stream_;
 53 |   public:
 54 |     TRT(
 55 |       std::string modelFile,
 56 |       std::string engineFile,
 57 |       cudaStream_t stream,
 58 |       const std::string& data_type
 59 |     );
 60 |     ~TRT(void);
 61 | 
 62 |     int doinfer(void**buffers, bool do_profile);
 63 |     nvinfer1::Dims get_binding_shape(int index);
 64 |     int getPointSize();
 65 | };
 66 | 
 67 | class PointPillar {
 68 |   private:
 69 |     cudaEvent_t start, stop;
 70 |     float elapsedTime = 0.0f;
 71 |     cudaStream_t stream_;
 72 |     //output of TRT
 73 |     std::shared_ptr<TRT> trt_;
 74 |     //output of TRT
 75 |     float *box_output = nullptr;
 76 |     int *box_num = nullptr;
 77 |     unsigned int box_size;
 78 |     std::vector<Bndbox> res;
 79 | 
 80 |   public:
 81 |     PointPillar(
 82 |       std::string modelFile,
 83 |       std::string engineFile,
 84 |       cudaStream_t stream,
 85 |       const std::string& data_type
 86 |     );
 87 |     ~PointPillar(void);
 88 |     int getPointSize();
 89 |     int doinfer(
 90 |       void*points_data,
 91 |       unsigned int* points_size,
 92 |       std::vector<Bndbox> &nms_pred,
 93 |       float nms_iou_thresh,
 94 |       int pre_nms_top_n,
 95 |       std::vector<std::string>& class_names,
 96 |       bool do_profile
 97 |     );
 98 | };
 99 | 
100 | #endif


--------------------------------------------------------------------------------
/tao_action_recognition/data_generation/generate_new_dataset_format.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 NVIDIA CORPORATION. All rights reserved.
 2 | 
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | # of this software and associated documentation files (the "Software"), to deal
 5 | # in the Software without restriction, including without limitation the rights
 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | # copies of the Software, and to permit persons to whom the Software is
 8 | # furnished to do so, subject to the following conditions:
 9 | 
10 | # The above copyright notice and this permission notice shall be included in all
11 | # copies or substantial portions of the Software.
12 | 
13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | # SOFTWARE.
20 | import os
21 | import sys
22 | 
23 | root_dir = sys.argv[1]
24 | target_dir = sys.argv[2]
25 | 
26 | for class_name in os.listdir(root_dir):
27 |     root_class_path = os.path.join(root_dir, class_name)
28 |     target_class_path = os.path.join(target_dir, class_name)
29 |     if not os.path.exists(target_class_path):
30 |         os.makedirs(target_class_path)
31 |     for video_name in os.listdir(root_class_path):
32 |         video_path = os.path.join(root_class_path, video_name)
33 |         target_video_path = os.path.join(target_class_path, video_name)
34 |         target_rgb_path = os.path.join(target_video_path, "rgb")
35 |         target_u_path = os.path.join(target_video_path, "u")
36 |         target_v_path = os.path.join(target_video_path, "v")
37 | 
38 |         if not os.path.exists(target_rgb_path):
39 |             os.makedirs(target_rgb_path)
40 |         if not os.path.exists(target_u_path):
41 |             os.makedirs(target_u_path)
42 |         if not os.path.exists(target_v_path):
43 |             os.makedirs(target_v_path)
44 | 
45 |         img_idx = 0
46 |         for video_clip_name in sorted(os.listdir(video_path)):
47 |             video_clip_path = os.path.join(video_path, video_clip_name)
48 |             rgb_path = os.path.join(video_clip_path, "rgb")
49 |             u_path = os.path.join(video_clip_path, "u")
50 |             v_path = os.path.join(video_clip_path, "v")
51 | 
52 |             assert len(os.listdir(u_path)) == \
53 |                 len(os.listdir(v_path)), "video clip mismatch. {}".format(video_clip_path)
54 | 
55 |             for file_name in sorted(os.listdir(rgb_path)):
56 |                 ext = file_name.split(".")[-1]
57 |                 rgb_file = os.path.join(rgb_path, file_name)
58 |                 u_file = os.path.join(u_path, file_name)
59 |                 v_file = os.path.join(v_path, file_name)
60 | 
61 |                 target_file_name = str(img_idx).zfill(6) + "." + ext
62 |                 img_idx += 1
63 |                 target_rgb_file = os.path.join(target_rgb_path, target_file_name)
64 |                 target_u_file = os.path.join(target_u_path, target_file_name)
65 |                 target_v_file = os.path.join(target_v_path, target_file_name)
66 | 
67 |                 os.rename(rgb_file, target_rgb_file)
68 |                 if os.path.exists(u_file):
69 |                     os.rename(u_file, target_u_file)
70 |                 if os.path.exists(v_file):
71 |                     os.rename(v_file, target_v_file)
72 | 
73 | 


--------------------------------------------------------------------------------
/tao_retinanet/tao_retinanet_scales_aspect_ratio_estimate.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved.
 2 | 
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | # of this software and associated documentation files (the "Software"), to deal
 5 | # in the Software without restriction, including without limitation the rights
 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | # copies of the Software, and to permit persons to whom the Software is
 8 | # furnished to do so, subject to the following conditions:
 9 | 
10 | # The above copyright notice and this permission notice shall be included in all
11 | # copies or substantial portions of the Software.
12 | 
13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | # SOFTWARE.
20 | 
21 | import os
22 | import numpy as np
23 | from sklearn.cluster import KMeans
24 | 
25 | num_scales_retinanet=6
26 | num_ars_retinanet=3
27 | limit_max_ar=4
28 | 
29 | #file 004156.jpg
30 | # 004156.jpg: JPEG image data, JFIF standard 1.01, aspect ratio, density 1x1, segment length 16, baseline, precision 8, 1242x375, frames 3
31 | shorter_length_of_image = 375
32 | 
33 | folder="/home/luwu/tlt-experiments/data/training/label_2/"
34 | widths=[]
35 | heights=[]
36 | files=[]
37 | 
38 | for r, d, f in os.walk(folder):
39 |     for file in f:
40 |         if file.endswith(".txt"):
41 |             file1 = open(folder+file, 'r')
42 |             lines=file1.readlines()
43 |                 
44 |             for line in lines:
45 |                 line_split=line.split(" ")
46 |                 cls=line_split[0]
47 |                 xl=float(line_split[4])
48 |                 yl=float(line_split[5])
49 |                 xr=float(line_split[6])
50 |                 yr=float(line_split[7])
51 |                 
52 |                 width=xr-xl
53 |                 height=yr-yl
54 |                 
55 |                 if cls != 'DontCare' and width>=0 and height >= 0:
56 |                     widths.append(width)
57 |                     heights.append(height)
58 |                     files.append(file1)
59 |             file1.close()
60 | 
61 | scales=[]
62 | aspect_ratios=[]
63 | for i in range(len(widths)):
64 |     w=widths[i]
65 |     h=heights[i]
66 |     if w<h:
67 |         scale=w/shorter_length_of_image
68 |     else:
69 |         scale=h/shorter_length_of_image
70 |         
71 |     scales.append(scale)
72 |     
73 |     ar=w/h
74 |     if ar<limit_max_ar:
75 |         aspect_ratios.append(ar)
76 | 
77 | x=np.array(scales)
78 | x=x.reshape(x.shape[0], 1)
79 | kmeans = KMeans(n_clusters=num_scales_retinanet, random_state=0, n_init="auto").fit(x)
80 | centers=kmeans.cluster_centers_
81 | centers=np.squeeze(centers, axis=1)
82 | print("scales: ")
83 | print("    ", np.sort(centers))
84 | 
85 | x=np.array(aspect_ratios)
86 | x=x.reshape(x.shape[0], 1)
87 | kmeans = KMeans(n_clusters=num_ars_retinanet, random_state=0, n_init="auto").fit(x)
88 | centers=kmeans.cluster_centers_
89 | centers=np.squeeze(centers, axis=1)
90 | print("aspect ratios from algo")
91 | print("    ", np.sort(centers))
92 | 
93 | centers=np.sort(centers)
94 | centers_abs = np.abs(centers-1.0)
95 | idx_min_distance_from_1 = centers_abs.argmin()
96 | centers[idx_min_distance_from_1] = 1.0
97 | print("aspect ratios considering 1.0: ")
98 | print("    ", np.sort(centers))
99 | 


--------------------------------------------------------------------------------
/tao_training_without_network/Guide.md:
--------------------------------------------------------------------------------
  1 | # Guide 
  2 | This guide helps run training without network.
  3 | 
  4 | ## Step
  5 | In the 1st machine which can connect to internet, assume the training data locates at below path.
  6 | 
  7 | `/home/username/`
  8 | 
  9 | Run below steps.
 10 | ```
 11 | $ docker run -it --rm -v /var/run/docker.sock:/var/run/docker.sock mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.1-cudnn8-ubuntu18.04 /bin/bash
 12 | 
 13 | # mkdir /home/username    (Note: make sure the username is the same as above)
 14 | 
 15 | # apt-get update
 16 | 
 17 | # apt-get -y install python3-pip unzip vim
 18 | 
 19 | # pip3 install --ignore-installed --no-cache-dir pip
 20 | 
 21 | # pip3 install nvidia-pyindex
 22 | 
 23 | # pip3 install nvidia-tao
 24 | 
 25 | # curl -sSL https://get.docker.com/ | sh
 26 | 
 27 | # docker login --username=\$oauthtoken --password="your_ngc_key"  nvcr.io
 28 | 
 29 | # touch ~/.tao_mounts.json
 30 | 
 31 | # vim ~/.tao_mounts.json 
 32 | 
 33 | {
 34 |    "Mounts":[
 35 |          {
 36 |             "source": "/home/username",
 37 |             "destination": "/workspace"
 38 |          }
 39 |    ],
 40 |    "Envs": [
 41 |          {
 42 |             "variable": "CUDA_DEVICE_ORDER",
 43 |             "value": "PCI_BUS_ID"
 44 |          }
 45 |    ],
 46 |    "DockerOptions":{
 47 |          "shm_size": "16G",
 48 |          "ulimits": {
 49 |             "memlock": -1,
 50 |             "stack": 67108864
 51 |          }
 52 |    }
 53 | }
 54 | ```
 55 | 
 56 | 
 57 | Run below command to pull 4 kinds of tao dockers.
 58 | ```
 59 | # docker pull nvcr.io/nvidia/tao/tao-toolkit-tf:v3.21.11-tf1.15.4-py3
 60 | 
 61 | # docker pull nvcr.io/nvidia/tao/tao-toolkit-tf:v3.21.11-tf1.15.5-py3
 62 | 
 63 | # docker pull nvcr.io/nvidia/tao/tao-toolkit-pyt:v3.21.11-py3
 64 | 
 65 | # docker pull nvidia/tao/tao-toolkit-lm:v3.21.08-py3
 66 | ```
 67 | 
 68 | 
 69 | (Optional) Run tao training, for example
 70 | ```
 71 | # tao classification train -k nvidia_tlt -r /workspace/demo/result -e /workspace/demo/classification_spec.cfg
 72 | ```
 73 | 
 74 | 
 75 | 
 76 | 
 77 | Open another terminal in the 1st machine,  save the new_azure docker and 4 kinds of tao dockers into files.
 78 | ```
 79 | $  docker ps
 80 | 
 81 | $  docker commit  <CONTAINER ID of mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.1-cudnn8-ubuntu18.04>  new_azure:version_1 
 82 | 
 83 | $  docker save -o new_azure_version_1.tar.gz  new_azure:version_1
 84 | 
 85 | $  docker save -o tao-toolkit-tf-v3.21.11-tf1.15.5-py3.tar.gz nvcr.io/nvidia/tao/tao-toolkit-tf:v3.21.11-tf1.15.5-py3
 86 | 
 87 | $  docker save -o tao-toolkit-tf-v3.21.11-tf1.15.4-py3.tar.gz nvcr.io/nvidia/tao/tao-toolkit-tf:v3.21.11-tf1.15.4-py3
 88 | 
 89 | $  docker save -o tao-toolkit-pyt-v3.21.11-py3.tar.gz  nvcr.io/nvidia/tao/tao-toolkit-pyt:v3.21.11-py3
 90 | 
 91 | $  docker save -o tao-toolkit-lm-v3.21.08-py3.tar.gz   nvcr.io/nvidia/tao/tao-toolkit-lm:v3.21.08-py3
 92 | ```
 93 | 
 94 | 
 95 | 
 96 | 
 97 | Copy all the tar.gz files into the 2nd machine which has no internet.
 98 | ```
 99 | $ docker load -i new_azure_version_1.tar.gz
100 | 
101 | $ docker load -i tao-toolkit-tf-v3.21.11-tf1.15.4-py3.tar.gz
102 | 
103 | $ docker load -i tao-toolkit-tf-v3.21.11-tf1.15.5-py3.tar.gz
104 | 
105 | $ docker load -i tao-toolkit-pyt-v3.21.11-py3.tar.gz
106 | 
107 | $ docker load -i tao-toolkit-lm-v3.21.08-py3.tar.gz
108 | ```
109 | 
110 | Copy the training dataset into below path of the 2nd machine. If the path is not available, please generate the same as the 1st machine.
111 | `/home/username/`
112 | 
113 | 
114 | 
115 | In the 2nd machine, login the new azure docker
116 | ```
117 | $ docker run -it --rm -v /var/run/docker.sock:/var/run/docker.sock new_azure:version_1 /bin/bash
118 | ```
119 | 
120 | Then run training.
121 | 


--------------------------------------------------------------------------------
/tao_ocdr/handwritten/preprocess_data.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
  2 | 
  3 | import os
  4 | import cv2
  5 | import argparse
  6 | from tqdm import tqdm
  7 | 
  8 | 
  9 | if __name__ == "__main__":
 10 |     parser = argparse.ArgumentParser("preprocess_data", add_help=True, description="Preprocess IAMDATA to TAO Toolkit OCRNet format")
 11 |     parser.add_argument(
 12 |         "--images_dir",
 13 |         help="Path to original images",
 14 |         default=None,
 15 |         required=True,
 16 |     )
 17 |     parser.add_argument(
 18 |         "--labels_dir",
 19 |         help="Path to original label txt files",
 20 |         default=None,
 21 |         required=True,
 22 |     )
 23 |     parser.add_argument(
 24 |         "--output_images_dir",
 25 |         help="Path to pre-processed images",
 26 |         default=None,
 27 |         required=True,
 28 |     )
 29 |     parser.add_argument(
 30 |         "--gt_file_path",
 31 |         help="Path to ground truth list",
 32 |         default=None,
 33 |         required=True,
 34 |     )
 35 |     parser.add_argument(
 36 |         "--character_list_path",
 37 |         help="Path to character list",
 38 |         default=None,
 39 |         required=True,
 40 |     )
 41 | 
 42 |     args, _ = parser.parse_known_args()
 43 |     root_dir = args.images_dir
 44 |     gt_file_dir = args.labels_dir
 45 |     target_dir = args.output_images_dir
 46 | 
 47 |     if not os.path.exists(target_dir):
 48 |         os.makedirs(target_dir)
 49 | 
 50 |     p_gt_file = open(args.gt_file_path, "w")
 51 | 
 52 |     gt_file_list = os.listdir(gt_file_dir)
 53 |     character_set = set()
 54 | 
 55 |     for gt_file_name in tqdm(gt_file_list):
 56 |         img_id = gt_file_name.split(".")[0].replace("gt_", "")
 57 |         f = open(os.path.join(gt_file_dir, gt_file_name), "r")
 58 |         reader = f.readlines()
 59 |         
 60 |         img_path = os.path.join(root_dir, img_id+".png")
 61 |         img = cv2.imread(img_path)
 62 |         height, width, _ = img.shape
 63 |         for idx, ann in enumerate(reader):
 64 |             ann = ann.split(",")
 65 |             vs = ann[:8]
 66 |             text = ann[8:]
 67 |             if len(text) == 1:
 68 |                 text = text[0].strip()
 69 |                 if text.count("\"") == 4:
 70 |                     text = "\""
 71 |             elif len(text) == 2:
 72 |                 text = ","
 73 |             else:
 74 |                 # for label like: "163,000,000"
 75 |                 # ignore the " " at the begin and end
 76 |                 text = ",".join(text)
 77 |                 text = text.replace("\"", "")
 78 |                 text = text.strip()
 79 | 
 80 |             # Skip the words which length > 25 or non-word-level label
 81 |             if len(text) > 25 or (" " in text):
 82 |                 continue
 83 |             # Lower-case:
 84 |             text = text.lower()
 85 |             
 86 |             for c in text:
 87 |                 character_set.add(c)
 88 |                 
 89 |             xs = [int(vs[idx]) for idx in range(0, len(vs), 2)]
 90 |             ys = [int(vs[idx]) for idx in range(1, len(vs), 2)]
 91 |             xmin = max(0, min(xs))
 92 |             ymin = max(0, min(ys))
 93 |             xmax = min(width, max(xs))
 94 |             ymax = min(height, max(ys))
 95 | 
 96 |             try:
 97 |                 crop_img = img[ymin:ymax, xmin:xmax, :]
 98 |                 target_img_path = f"{img_id}_{idx}.jpg"
 99 |                 p_gt_file.write(target_img_path + "\t" + text + "\n")
100 |                 cv2.imwrite(os.path.join(target_dir, target_img_path), crop_img)
101 |             except Exception as err:
102 |                 print(err)
103 |                 print(f"img_id: {img_id} bbox: {vs} img_shape: {img.shape}")
104 |                 exit()
105 | 
106 |     p_gt_file.close()
107 |     with open(args.character_list_path, "w") as f:
108 |         character_set = sorted(list(character_set))
109 |         for c in character_set:
110 |             f.write(f"{c}\n")


--------------------------------------------------------------------------------
/tao_action_recognition/data_generation/preprocess_HMDB.sh:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 NVIDIA CORPORATION. All rights reserved.
  2 | 
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  4 | # of this software and associated documentation files (the "Software"), to deal
  5 | # in the Software without restriction, including without limitation the rights
  6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7 | # copies of the Software, and to permit persons to whom the Software is
  8 | # furnished to do so, subject to the following conditions:
  9 | 
 10 | # The above copyright notice and this permission notice shall be included in all
 11 | # copies or substantial portions of the Software.
 12 | 
 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 19 | # SOFTWARE.
 20 | 
 21 | MKDIR(){
 22 |     if [ ! -d $1 ]; then
 23 |         mkdir -p $1
 24 |     fi 
 25 | }
 26 | 
 27 | WORKER_CNT=4
 28 | VIDEO_LIST=("NULL" "NULL" "NULL" "NULL")
 29 | RGB_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
 30 | OF_IMG_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
 31 | OF_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
 32 | 
 33 | 
 34 | RUN_WORKERS(){
 35 |     for((i=0;i<$WORKER_CNT;i++)); do
 36 |         if [ ${VIDEO_LIST[i]} != "NULL" ]; then 
 37 |             python ./convert_dataset.py --input_video ${VIDEO_LIST[i]} --output_folder ${RGB_PATH_LIST[i]} \
 38 |             &&
 39 |             ./AppOFCuda --input=${RGB_PATH_LIST[i]}/"*.png" --output=${OF_PATH_LIST[i]}/"flow" --preset=fast --gridSize=1 \
 40 |             && 
 41 |             python ./convert_of.py --input_flow_folder ${OF_PATH_LIST[i]} --output_folder ${OF_IMG_PATH_LIST[i]} &
 42 |         fi
 43 |     done
 44 |     wait
 45 |      for((i=0;i<$WORKER_CNT;i++)); do
 46 |          if [ ${VIDEO_LIST[i]} != "NULL" ]; then 
 47 |              rm -r ${OF_PATH_LIST[i]} 
 48 |          fi
 49 |      done
 50 | }
 51 | 
 52 | if [ $# -ne 2 ]; then
 53 |     echo "USAGE:./preprocess_HMDB.sh [hmdb_dir] [output_top_dir]"
 54 |     exit 1
 55 | else
 56 |     HMDB_TOP_DIR=$1
 57 |     OUTPUT_TOP_DIR=$2
 58 |     echo $HMDB_TOP_DIR
 59 |     echo $OUTPUT_TOP_DIR
 60 |     TEMP_DIR="./tmp"
 61 |     MKDIR $TEMP_DIR
 62 |     MKDIR $OUTPUT_TOP_DIR
 63 | fi
 64 | 
 65 | # 1st stage: unrar rar package:
 66 | # for class in $HMDB_TOP_DIR/*; do 
 67 | #     unrar x $class $TEMP_DIR > /dev/null &
 68 | # done
 69 | 
 70 | # 2nd stage: Clip video and generate optical flow out of it 
 71 | for class in $HMDB_TOP_DIR/*; do 
 72 |     CLASS_NAME=$(echo $(basename $class) | cut -d . -f1)
 73 |     echo "Preprocess $CLASS_NAME"
 74 |     cnt=0 
 75 |     # extract the frames 
 76 |     for video in $HMDB_TOP_DIR/$CLASS_NAME/*; do
 77 |         VIDEO_NAME=$(echo $(basename $video) | cut -d . -f1) 
 78 |         RGB_PATH=$OUTPUT_TOP_DIR/$CLASS_NAME/$VIDEO_NAME/"rgb"
 79 |         OF_PATH=$OUTPUT_TOP_DIR/$CLASS_NAME/$VIDEO_NAME/"of"
 80 |         OF_IMG_PATH=$OUTPUT_TOP_DIR/$CLASS_NAME/$VIDEO_NAME/
 81 |         MKDIR $RGB_PATH
 82 |         MKDIR $OF_PATH
 83 |         VIDEO_LIST[$cnt]=$video
 84 |         RGB_PATH_LIST[$cnt]=$RGB_PATH
 85 |         OF_PATH_LIST[$cnt]=$OF_PATH
 86 |         OF_IMG_PATH_LIST[$cnt]=$OF_IMG_PATH
 87 | 
 88 |         cnt=$((cnt + 1))
 89 |         if [ $cnt -eq $WORKER_CNT ]; then
 90 |             cnt=0
 91 |             RUN_WORKERS
 92 |             VIDEO_LIST=("NULL" "NULL" "NULL" "NULL")
 93 |             RGB_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
 94 |             OF_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
 95 |             OF_IMG_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
 96 |         fi 
 97 |     done
 98 |     if [ $cnt -ne 0 ]; then
 99 |         RUN_WORKERS
100 |     fi
101 | done
102 | 
103 | 


--------------------------------------------------------------------------------
/tao_action_recognition/data_generation/preprocess_SHAD_RGB.sh:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 NVIDIA CORPORATION. All rights reserved.
  2 | 
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  4 | # of this software and associated documentation files (the "Software"), to deal
  5 | # in the Software without restriction, including without limitation the rights
  6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7 | # copies of the Software, and to permit persons to whom the Software is
  8 | # furnished to do so, subject to the following conditions:
  9 | 
 10 | # The above copyright notice and this permission notice shall be included in all
 11 | # copies or substantial portions of the Software.
 12 | 
 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 19 | # SOFTWARE.
 20 | 
 21 | MKDIR(){
 22 |     if [ ! -d $1 ]; then
 23 |         mkdir -p $1
 24 |     fi 
 25 | }
 26 | 
 27 | WORKER_CNT=4
 28 | VIDEO_LIST=("NULL" "NULL" "NULL" "NULL")
 29 | RGB_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
 30 | #OF_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
 31 | #OF_U_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
 32 | #OF_V_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
 33 | ANNO_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
 34 | TEMP_VIDEO_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
 35 | 
 36 | 
 37 | RUN_WORKERS(){
 38 |     for((i=0;i<$WORKER_CNT;i++)); do
 39 |         if [ ${VIDEO_LIST[i]} != "NULL" ]; then 
 40 |             python3 ./convert_dataset.py --input_video ${VIDEO_LIST[i]} --output_folder ${RGB_PATH_LIST[i]} \
 41 |             && python3 ./save_tracks_shad.py --anno_folder ${ANNO_PATH_LIST[i]} --image_folder ${RGB_PATH_LIST[i]} \
 42 |             --of_folder ./ --output_folder $TEMP_DIR_ & 
 43 |         fi
 44 |     done
 45 |     wait
 46 |     for((i=0;i<$WORKER_CNT;i++)); do
 47 |         if [ ${VIDEO_LIST[i]} != "NULL" ]; then 
 48 |             rm -r ${TEMP_VIDEO_PATH_LIST[i]} 
 49 |         fi
 50 |     done
 51 | }
 52 | 
 53 | if [ $# -ne 2 ]; then
 54 |     echo "USAGE:./preprocess_SHAD_RGB.sh [shad_dataset_top_dir] [output_top_dir]"
 55 |     exit 1
 56 | else
 57 |     SHAD_TOP_DIR=$1
 58 |     OUTPUT_TOP_DIR=$2
 59 |     echo $SHAD_TOP_DIR
 60 |     echo $OUTPUT_TOP_DIR
 61 |     TEMP_DIR="./tmp"
 62 |     TEMP_DIR_="./tmp_"
 63 |     MKDIR $TEMP_DIR
 64 |     MKDIR $TEMP_DIR_
 65 |     MKDIR $OUTPUT_TOP_DIR
 66 | fi
 67 | 
 68 | # 1st stage: Clip video and generate optical flow out of it 
 69 | for class in $SHAD_TOP_DIR/*; do 
 70 |     if [ ! -d $class/"video"/ ]; then
 71 |         echo "Please use original SHAD dataset"
 72 |         exit 1
 73 |     fi
 74 |     echo "Preprocess $class"
 75 |     CLASS_NAME=$(basename $class)
 76 |     cnt=0 
 77 |     for video in $class/"video"/*; do
 78 |         VIDEO_NAME=$(echo $(basename $video) | cut -d . -f1) 
 79 |         ANNO_PATH=$class/"Annotations"/$VIDEO_NAME
 80 |         RGB_PATH=$TEMP_DIR/$CLASS_NAME/$VIDEO_NAME/"rgb"
 81 |         TEMP_VIDEO_PATH=$TEMP_DIR/$CLASS_NAME/$VIDEO_NAME
 82 |         MKDIR $RGB_PATH
 83 |         VIDEO_LIST[$cnt]=$video
 84 |         ANNO_PATH_LIST[$cnt]=$ANNO_PATH
 85 |         RGB_PATH_LIST[$cnt]=$RGB_PATH
 86 |         TEMP_VIDEO_PATH_LIST[$cnt]=$TEMP_VIDEO_PATH
 87 | 
 88 |         cnt=$((cnt + 1))
 89 |         if [ $cnt -eq $WORKER_CNT ]; then
 90 |             cnt=0
 91 |             RUN_WORKERS
 92 |             VIDEO_LIST=("NULL" "NULL" "NULL" "NULL")
 93 |             RGB_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
 94 |             ANNO_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
 95 |             TEMP_VIDEO_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
 96 |         fi 
 97 |     done
 98 |     if [ $cnt -ne 0 ]; then
 99 |         RUN_WORKERS
100 |     fi
101 | done
102 | 
103 | rm -r $TEMP_DIR
104 | 
105 | python generate_new_dataset_format.py $TEMP_DIR_ $OUTPUT_TOP_DIR
106 | 
107 | rm -r $TEMP_DIR_
108 | 


--------------------------------------------------------------------------------
/tao_action_recognition/data_generation/convert_of.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 NVIDIA CORPORATION. All rights reserved.
 2 | 
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | # of this software and associated documentation files (the "Software"), to deal
 5 | # in the Software without restriction, including without limitation the rights
 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | # copies of the Software, and to permit persons to whom the Software is
 8 | # furnished to do so, subject to the following conditions:
 9 | 
10 | # The above copyright notice and this permission notice shall be included in all
11 | # copies or substantial portions of the Software.
12 | 
13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | # SOFTWARE.
20 | 
21 | import argparse
22 | import cv2
23 | import numpy as np
24 | import os
25 | 
26 | 
27 | def parse_flow(flow_file):
28 |     """Parse the optical flow vector generated from NVOF SDK."""
29 | 
30 |     with open(flow_file, "rb") as f:
31 |         _ = f.read(4)
32 |         width = int.from_bytes(f.read(4), byteorder="little", signed=False) 
33 |         height = int.from_bytes(f.read(4), byteorder="little", signed=False)
34 |         data = f.read()
35 |         of_flatten = np.frombuffer(data, dtype=np.float32)
36 |         of_array = np.reshape(of_flatten, (height, width, 2))
37 |         of_array = of_array.transpose((2, 0, 1))
38 |         flow_x = np.squeeze(of_array[0, :, :])
39 |         flow_y = np.squeeze(of_array[1, :, :])
40 | 
41 |     return flow_x, flow_y
42 | 
43 | 
44 | def minmax_grayscale(flow_x, flow_y):
45 |     """Map the flow to grayscale images. The map method follows I3D"""
46 |     higher_end = 20.0
47 |     lower_end = -20.0
48 |     flow_x = np.maximum(np.minimum(255.0, 255.0 * ((flow_x - lower_end) / (higher_end - lower_end))), 0.0)
49 |     flow_y = np.maximum(np.minimum(255.0, 255.0 * ((flow_y - lower_end) / (higher_end - lower_end))), 0.0)
50 | 
51 |     img_x = np.array(np.around(flow_x), dtype=np.uint8)
52 |     img_y = np.array(np.around(flow_y), dtype=np.uint8)
53 | 
54 |     return img_x, img_y
55 | 
56 | 
57 | def max_rad_grayscale(flow_x, flow_y):
58 |     """Map the flow to grayscale images. Normalize vector using max_rad"""
59 |     max_rad = 1.0
60 |     rad = np.sqrt(flow_x * flow_x + flow_y * flow_y)
61 |     max_rad = max(max_rad, rad.max())
62 | 
63 |     img_x = np.array((flow_x / max_rad) * 127.999 + 128, dtype=np.uint8)
64 |     img_y = np.array((flow_y / max_rad) * 127.999 + 128, dtype=np.uint8)
65 | 
66 |     return img_x, img_y
67 | 
68 | 
69 | def convert(input_flow_folder, output_folder):
70 |     """Convert the flow in input_flow floder to grayscale images"""
71 | 
72 |     u_img_root = os.path.join(output_folder, "u")
73 |     v_img_root = os.path.join(output_folder, "v")
74 |     if not os.path.exists(u_img_root):
75 |         os.makedirs(u_img_root)
76 |     if not os.path.exists(v_img_root):
77 |         os.makedirs(v_img_root)
78 | 
79 |     for flow_name in os.listdir(input_flow_folder):
80 |         frame_id = str(int(flow_name.split("_")[1]) + 1).zfill(6)
81 |         flow_file_path = os.path.join(input_flow_folder, flow_name)
82 |         flow_x, flow_y = parse_flow(flow_file_path)
83 |         img_x, img_y = max_rad_grayscale(flow_x, flow_y)
84 | 
85 |         img_x_path = os.path.join(u_img_root, frame_id+".jpg")
86 |         img_y_path = os.path.join(v_img_root, frame_id+".jpg")
87 | 
88 |         cv2.imwrite(img_x_path, img_x)
89 |         cv2.imwrite(img_y_path, img_y)
90 | 
91 | 
92 | if __name__ == "__main__":
93 |     parser = argparse.ArgumentParser(description='Convert raw optical flow vectors to grayscale images')
94 |     parser.add_argument('--input_flow_folder', type=str, help='input optical flow path', required=True)
95 |     parser.add_argument('--output_folder', type=str, help='output images path', required=True)
96 |     args = parser.parse_args()
97 | 
98 |     convert(args.input_flow_folder, args.output_folder)


--------------------------------------------------------------------------------
/tao_action_recognition/data_generation/preprocess_SHAD.sh:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 NVIDIA CORPORATION. All rights reserved.
  2 | 
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  4 | # of this software and associated documentation files (the "Software"), to deal
  5 | # in the Software without restriction, including without limitation the rights
  6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7 | # copies of the Software, and to permit persons to whom the Software is
  8 | # furnished to do so, subject to the following conditions:
  9 | 
 10 | # The above copyright notice and this permission notice shall be included in all
 11 | # copies or substantial portions of the Software.
 12 | 
 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 19 | # SOFTWARE.
 20 | 
 21 | MKDIR(){
 22 |     if [ ! -d $1 ]; then
 23 |         mkdir -p $1
 24 |     fi 
 25 | }
 26 | 
 27 | WORKER_CNT=4
 28 | VIDEO_LIST=("NULL" "NULL" "NULL" "NULL")
 29 | RGB_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
 30 | OF_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
 31 | OF_IMG_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
 32 | ANNO_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
 33 | TEMP_VIDEO_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
 34 | 
 35 | 
 36 | RUN_WORKERS(){
 37 |     for((i=0;i<$WORKER_CNT;i++)); do
 38 |         if [ ${VIDEO_LIST[i]} != "NULL" ]; then 
 39 |             python3 ./convert_dataset.py --input_video ${VIDEO_LIST[i]} --output_folder ${RGB_PATH_LIST[i]} \
 40 |             && ./AppOFCuda --input=${RGB_PATH_LIST[i]}/"*.png" --output=${OF_PATH_LIST[i]}/"flow" --preset=slow --gridSize=1 \
 41 |             && python3 ./convert_of.py --input_flow_folder=${OF_PATH_LIST[i]} --output_folder=${OF_IMG_PATH_LIST[i]} \
 42 |             && python3 ./save_tracks_shad.py --anno_folder ${ANNO_PATH_LIST[i]} --image_folder ${RGB_PATH_LIST[i]} \
 43 |             --of_folder ${OF_IMG_PATH_LIST[i]} --output_folder $TEMP_DIR_ & 
 44 |         fi
 45 |     done
 46 |     wait
 47 |     for((i=0;i<$WORKER_CNT;i++)); do
 48 |         if [ ${VIDEO_LIST[i]} != "NULL" ]; then 
 49 |             rm -r ${TEMP_VIDEO_PATH_LIST[i]} 
 50 |         fi
 51 |     done
 52 | }
 53 | 
 54 | if [ $# -ne 2 ]; then
 55 |     echo "USAGE:./preprocess_SHAD.sh [shad_dataset_top_dir] [output_top_dir]"
 56 |     exit 1
 57 | else
 58 |     SHAD_TOP_DIR=$1
 59 |     OUTPUT_TOP_DIR=$2
 60 |     echo $SHAD_TOP_DIR
 61 |     echo $OUTPUT_TOP_DIR
 62 |     TEMP_DIR="./tmp"
 63 |     TEMP_DIR_="./tmp_"
 64 |     MKDIR $TEMP_DIR
 65 |     MKDIR $TEMP_DIR_
 66 |     MKDIR $OUTPUT_TOP_DIR
 67 | fi
 68 | 
 69 | # 1st stage: Clip video and generate optical flow out of it 
 70 | for class in $SHAD_TOP_DIR/*; do 
 71 |     if [ ! -d $class/"video"/ ]; then
 72 |         echo "Please use original SHAD dataset"
 73 |         exit 1
 74 |     fi
 75 |     echo "Preprocess $class"
 76 |     CLASS_NAME=$(basename $class)
 77 |     cnt=0 
 78 |     for video in $class/"video"/*; do
 79 |         VIDEO_NAME=$(echo $(basename $video) | cut -d . -f1) 
 80 |         ANNO_PATH=$class/"Annotations"/$VIDEO_NAME
 81 |         RGB_PATH=$TEMP_DIR/$CLASS_NAME/$VIDEO_NAME/"rgb"
 82 |         OF_PATH=$TEMP_DIR/$CLASS_NAME/$VIDEO_NAME/"of"
 83 |         OF_IMG_PATH=$TEMP_DIR/$CLASS_NAME/$VIDEO_NAME/"of_img"
 84 |         TEMP_VIDEO_PATH=$TEMP_DIR/$CLASS_NAME/$VIDEO_NAME
 85 |         MKDIR $RGB_PATH
 86 |         MKDIR $OF_PATH
 87 |         MKDIR $OF_IMG_PATH
 88 |         VIDEO_LIST[$cnt]=$video
 89 |         ANNO_PATH_LIST[$cnt]=$ANNO_PATH
 90 |         RGB_PATH_LIST[$cnt]=$RGB_PATH
 91 |         OF_PATH_LIST[$cnt]=$OF_PATH
 92 |         OF_IMG_PATH_LIST[$cnt]=$OF_IMG_PATH
 93 |         TEMP_VIDEO_PATH_LIST[$cnt]=$TEMP_VIDEO_PATH
 94 | 
 95 |         cnt=$((cnt + 1))
 96 |         if [ $cnt -eq $WORKER_CNT ]; then
 97 |             cnt=0
 98 |             RUN_WORKERS
 99 |             VIDEO_LIST=("NULL" "NULL" "NULL" "NULL")
100 |             RGB_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
101 |             OF_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
102 |             OF_IMG_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
103 |             ANNO_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
104 |             TEMP_VIDEO_PATH_LIST=("NULL" "NULL" "NULL" "NULL")
105 |         fi 
106 |     done
107 |     if [ $cnt -ne 0 ]; then
108 |         RUN_WORKERS
109 |     fi
110 | done
111 | 
112 | rm -r $TEMP_DIR
113 | 
114 | python generate_new_dataset_format.py $TEMP_DIR_ $OUTPUT_TOP_DIR
115 | 
116 | rm -r $TEMP_DIR_
117 | 


--------------------------------------------------------------------------------
/tao_key_points_estimation/tensorrt_inference/trt_inference/engine.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: Apache-2.0
  3 | 
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | 
 16 | import logging
 17 | import os
 18 | import numpy as np
 19 | import tensorrt as trt
 20 | import pycuda.autoinit
 21 | import pycuda.driver as cuda
 22 | 
 23 | logger = logging.getLogger(__name__)
 24 | 
 25 | EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
 26 | 
 27 | 
 28 | # Array of TensorRT loggers. We need to keep global references to
 29 | # the TensorRT loggers that we create to prevent them from being
 30 | # garbage collected as those are referenced from C++ code without
 31 | # Python knowing about it.
 32 | 
 33 | 
 34 | tensorrt_loggers = []
 35 | 
 36 | 
 37 | def _create_tensorrt_logger(verbose=False):
 38 |     """Create a TensorRT logger.
 39 | 
 40 |     Args:
 41 |         verbose (bool): whether to make the logger verbose.
 42 |     """
 43 |     if verbose:
 44 |         # trt_verbosity = trt.Logger.Severity.INFO
 45 |         trt_verbosity = trt.Logger.Severity.VERBOSE
 46 |     else:
 47 |         trt_verbosity = trt.Logger.Severity.WARNING
 48 |     tensorrt_logger = trt.Logger(trt_verbosity)
 49 |     tensorrt_loggers.append(tensorrt_logger)
 50 |     return tensorrt_logger
 51 | 
 52 | 
 53 | class HostDeviceMem(object):
 54 |     def __init__(self, host_mem, device_mem, binding_name, shape=None):
 55 |         self.host = host_mem
 56 |         self.device = device_mem
 57 |         self.binding_name = binding_name
 58 |         self.shape = shape
 59 | 
 60 |     def __str__(self):
 61 |         return "Host:\n" + str(self.host) + "\nDevice\n" + str(self.device)
 62 | 
 63 |     def __repr__(self):
 64 |         return self.__str__()
 65 | 
 66 | 
 67 | def allocate_buffers(engine, context):
 68 | 
 69 |     inputs = []
 70 |     outputs = []
 71 |     bindings = []
 72 |     stream = cuda.Stream()
 73 |     for binding in engine:
 74 |         binding_id = engine.get_binding_index(str(binding))
 75 |         size = trt.volume(context.get_binding_shape(binding_id)) * engine.max_batch_size
 76 |         print("{}:{}".format(binding, size))
 77 |         dtype = trt.nptype(engine.get_binding_dtype(binding))
 78 |         host_mem = cuda.pagelocked_empty(size, dtype)
 79 |         device_mem = cuda.mem_alloc(host_mem.nbytes)
 80 |         bindings.append(int(device_mem))
 81 |         if engine.binding_is_input(binding):
 82 |             inputs.append(HostDeviceMem(host_mem, device_mem, binding))
 83 |         else:
 84 |             output_shape = engine.get_binding_shape(binding)
 85 |             if len(output_shape) == 3:
 86 |                 dims = trt.Dims3(engine.get_binding_shape(binding))
 87 |                 output_shape = (engine.max_batch_size, dims[0], dims[1], dims[2])
 88 |             elif len(output_shape) == 2:
 89 |                 dims = trt.Dims2(output_shape)
 90 |                 output_shape = (engine.max_batch_size, dims[0], dims[1])
 91 |             outputs.append(HostDeviceMem(host_mem, device_mem, binding, output_shape))
 92 | 
 93 |     return inputs, outputs, bindings, stream
 94 | 
 95 | 
 96 | def do_inference(batch, context, bindings, inputs, outputs, stream):
 97 |     batch_size = batch.shape[0]
 98 |     assert len(inputs) == 1
 99 |     inputs[0].host = np.ascontiguousarray(batch, dtype=np.float32)
100 |     [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
101 |     context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle)
102 |     [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
103 |     stream.synchronize()
104 | 
105 |     outputs_dict = {}
106 |     outputs_shape = {}
107 |     for out in outputs:
108 |         outputs_dict[out.binding_name] = np.reshape(out.host, out.shape)
109 |         outputs_shape[out.binding_name] = out.shape
110 | 
111 |     return outputs_shape, outputs_dict
112 | 
113 | 
114 | def load_tensorrt_engine(filename, verbose=False):
115 |     tensorrt_logger = _create_tensorrt_logger(verbose)
116 | 
117 |     if not os.path.exists(filename):
118 |         raise ValueError("{} does not exits".format(filename))
119 | 
120 |     with trt.Runtime(tensorrt_logger) as runtime, open(filename, "rb") as f:
121 |         trt_engine = runtime.deserialize_cuda_engine(f.read())
122 | 
123 |     return trt_engine
124 | 


--------------------------------------------------------------------------------
/tao_classification/deploy_to_deepstream/README.md:
--------------------------------------------------------------------------------
  1 | # Deploy Classification model to Deepstream
  2 | Some tips to deploy TAO Classification model to Deepstream.
  3 | 
  4 | # Deploy Classification model as primary tensorrt engine
  5 | There are two ways of deploying classification model in deepstream.
  6 | One is working as primary tensorrt engine, anohter is working as secondary tensorrt engine.
  7 | 
  8 | 
  9 | ## Detailed Steps
 10 | 
 11 | * Create ds_classification_as_primary_gie.txt. Refer to [link](https://forums.developer.nvidia.com/t/issue-with-image-classification-tutorial-and-testing-with-deepstream-app/165835/12?u=morganh)
 12 | 
 13 | Below is a snippet of the config file.
 14 | 
 15 | ```
 16 | # config-file property is mandatory for any gie section.
 17 | # Other properties are optional and if set will override the properties set in
 18 | # the infer config file.
 19 | [primary-gie]
 20 | enable=1
 21 | gpu-id=0
 22 | #model-engine-file=your_classification.engine
 23 | batch-size=1
 24 | #Required by the app for OSD, not a plugin property
 25 | bbox-border-color0=1;0;0;1
 26 | bbox-border-color1=0;1;1;1
 27 | bbox-border-color2=0;0;1;1
 28 | bbox-border-color3=0;1;0;1
 29 | interval=0
 30 | gie-unique-id=1
 31 | nvbuf-memory-type=0
 32 | config-file=config_as_primary_gie.txt
 33 | ```
 34 | 
 35 | * Create config_as_primary_gie.txt.
 36 | ```
 37 | [property]
 38 | gpu-id=0
 39 | net-scale-factor=1.0
 40 | #below offsets=b,g,r  which can be also changed according to the "image_mean" in your training spec file.
 41 | offsets=123.67;116.28;103.53
 42 | model-color-format=1
 43 | batch-size= 30
 44 | 
 45 | tlt-model-key=yourkey
 46 | tlt-encoded-model=your_unpruned_or_pruned_model.etlt
 47 | labelfile-path=labels.txt
 48 | #int8-calib-file=cal.bin
 49 | #model-engine-file=your_classification.engine
 50 | #input-dims=c;h;w;0. Can be also changed according to the "input_image_size" in your training spec file.
 51 | input-dims=3;224;224;0
 52 | uff-input-blob-name=input_1
 53 | output-blob-names=predictions/Softmax
 54 | 
 55 | # process-mode: 2 - inferences on crops from primary detector, 1 - inferences on whole frame
 56 | process-mode=1
 57 | ## 0=FP32, 1=INT8, 2=FP16 mode
 58 | network-mode=0
 59 | 
 60 | network-type=1 # defines that the model is a classifier.
 61 | num-detected-classes=2
 62 | interval=0
 63 | gie-unique-id=1
 64 | #threshold=0.05
 65 | classifier-async-mode=1
 66 | classifier-threshold=0.2
 67 | operate-on-gie-id=1
 68 | #operate-on-class-ids=0
 69 | ```
 70 | 
 71 | * Run deepstream-app
 72 | ```
 73 | $ deepstream-app -c ds_classification_as_primary_gie.txt
 74 | ```
 75 | 
 76 | # Deploy Classification model as secondary tensorrt engine
 77 | 
 78 | ## Detailed Steps
 79 | 
 80 | * Create ds_classification_as_secondary_gie.txt. Refer to [link](https://forums.developer.nvidia.com/t/issue-with-image-classification-tutorial-and-testing-with-deepstream-app/165835/12?u=morganh)
 81 | 
 82 | Below is a snippet of the config file.
 83 | 
 84 | ```
 85 | [secondary-gie3]
 86 | enable=1
 87 | #model-engine-file=your_classification.engine
 88 | batch-size=4
 89 | gpu-id=0
 90 | gie-unique-id=7
 91 | operate-on-gie-id=1
 92 | #operate-on-class-ids=0;
 93 | config-file=config_as_secondary_gie.txt
 94 | ```
 95 | 
 96 | * Create config_as_secondary_gie.txt.
 97 | 
 98 | ```
 99 | [property]
100 | gpu-id=0
101 | net-scale-factor=1.0
102 | #below offsets=b,g,r  which can be also changed according to the "image_mean" in your training spec file.
103 | offsets=123.67;116.28;103.53
104 | model-color-format=1
105 | batch-size= 30
106 | 
107 | tlt-model-key=yourkey
108 | tlt-encoded-model=your_unpruned_or_pruned_model.etlt
109 | labelfile-path=labels.txt
110 | #int8-calib-file=cal.bin
111 | #model-engine-file=your_classification.engine
112 | #input-dims=c;h;w;0. Can be also changed according to the "input_image_size" in your training spec file.
113 | input-dims=3;224;224;0
114 | uff-input-blob-name=input_1
115 | output-blob-names=predictions/Softmax
116 | 
117 | # process-mode: 2 - inferences on crops from primary detector, 1 - inferences on whole frame
118 | process-mode=2
119 | ## 0=FP32, 1=INT8, 2=FP16 mode
120 | network-mode=0
121 | 
122 | network-type=1 # defines that the model is a classifier.
123 | num-detected-classes=2
124 | interval=0
125 | gie-unique-id=1
126 | #threshold=0.05
127 | classifier-async-mode=1
128 | classifier-threshold=0.2
129 | operate-on-gie-id=1
130 | #operate-on-class-ids=0
131 | ```
132 | 
133 | * Run deepstream-app
134 | ```
135 | $ deepstream-app -c ds_classification_as_secondary_gie.txt
136 | ```
137 | 
138 | # Other tips
139 | ## Generate avi video file as input test file. It is better than mp4 input file.
140 | ```
141 | gst-launch-1.0 multifilesrc location="/tmp/%d.jpg" caps=“image/jpeg,framerate=30/1” ! jpegdec ! x264enc ! avimux ! filesink location=“out.avi”
142 | ```
143 | 
144 | ## Change "scaling-filter". More info can be found in [DeepStream Gst-nvinfer Plugin](https://docs.nvidia.com/metropolis/deepstream/dev-guide/text/DS_plugin_gst-nvinfer.html#gst-nvinfer-file-configuration-specifications)
145 | ```
146 | scaling-filter=5
147 | ```
148 | 


--------------------------------------------------------------------------------
/tao_api/how_to_modify_code_for_TAO_API.md:
--------------------------------------------------------------------------------
  1 | # How to modify code for TAO API
  2 | 
  3 | This guide help you go through the detailed steps of how to modify code and generate new docker for TAO API.
  4 | 
  5 | 
  6 | ## Trigger docker in one host machine and modify code
  7 | Open a terminal, trigger 4.0.0 tao api docker.
  8 | ```shell
  9 | $ docker run -it --name tao-api-fixed nvcr.io/nvidia/tao/tao-toolkit:4.0.0-api /bin/bash
 10 | ```
 11 | 
 12 | Open another terminal. You need to build a new docker image based on nvcr.io/nvidia/tao/tao-toolkit:4.0.0-api.
 13 | First, create a folder, copy the file from the container to your local host, and modify it as below.
 14 | ```
 15 | nvidia@host:~$ mkdir docker_build && cd docker_build
 16 | nvidia@host:~/docker_build$ docker cp tao-api-fixed:/opt/api ./
 17 | nvidia@host:~/docker_build$ cd api
 18 | nvidia@host:~/docker_build/api$ vim handlers/actions.py
 19 | ```
 20 | 
 21 | Go to line:779 and change the code from
 22 | ```shell
 23 | if find_trained_weight == []:
 24 |     if not ptm_id == "":
 25 |         model_dir = f"/shared/users/00000000-0000-0000-0000-000000000000/models/{ptm_id}"
 26 |         if job_context.network == "lprnet":
 27 |             pretrained_model_file = glob.glob(model_dir+"/*/*.tlt")
 28 |         else:
 29 |             pretrained_model_file = glob.glob(model_dir+"/*/*.hdf5")
 30 | else:
 31 |     find_trained_weight.sort(reverse=False)
 32 |     trained_weight = find_trained_weight[0]
 33 | ```
 34 | to
 35 | 
 36 | ```shell
 37 | if find_trained_weight == []:
 38 |     if not ptm_id == "":
 39 |         model_dir = f"/shared/users/00000000-0000-0000-0000-000000000000/models/{ptm_id}"
 40 |         pretrained_model_file = []
 41 |         pretrained_model_file = glob.glob(model_dir+"/*/*.hdf5") + glob.glob(model_dir+"/*/*.tlt")
 42 |         if len(pretrained_model_file) > 1:
 43 |             pretrained_model_file = pretrained_model_file[0]
 44 | 
 45 |         assert pretrained_model_file != [], "error pretrained_model_file"
 46 | else:
 47 |     find_trained_weight.sort(reverse=False)
 48 |     trained_weight = find_trained_weight[0]
 49 | ```
 50 | 
 51 | 
 52 | Change docker_images.py and change the code
 53 | ```shell
 54 | nvidia@host:~/docker_build/api$ vim handlers/docker_images.py
 55 | ```
 56 | Go to line 23 and replace the docker image name from
 57 | ```shell
 58 | "api": os.getenv('IMAGE_API', default='nvcr.io/nvidia/tao/tao-toolkit:4.0.0-api')
 59 | ```
 60 | To
 61 | 
 62 | ```shell
 63 | "api": os.getenv('IMAGE_API', default='nvcr.io/nvidia/tao/tao-toolkit:4.0.0-api-fix')
 64 | ```
 65 | 
 66 | ## Generate a new docker
 67 | Create a Dockerfile
 68 | ```shell
 69 | nvidia@host:~/docker_build/api$ mv Dockerfile Dockerfile_bak
 70 | nvidia@host:~/docker_build/api$ vim Dockerfile
 71 | ```
 72 | 
 73 | Below is the content of Dockerfile
 74 | ```shell
 75 | nvidia@host:~/docker_build/api$ cat Dockerfile
 76 | ################ BUILD IMAGE #################
 77 | FROM nvcr.io/nvidia/tao/tao-toolkit:4.0.0-api
 78 | # Copy project files
 79 | WORKDIR /opt/api
 80 | COPY handlers/actions.py handlers/actions.py
 81 | COPY handlers/docker_images.py handlers/docker_images.py
 82 | ENV PATH=“/opt/ngccli/ngc-cli:${PATH}”
 83 | # Default command
 84 | CMD /bin/bash app_start.sh
 85 | ```
 86 | 
 87 | ```shell
 88 | nvidia@host:~/docker_build/api$ docker build . -t nvcr.io/nvidia/tao/tao-toolkit:4.0.0-api-fix
 89 | ```
 90 | 
 91 | ## Save the docker to tar file
 92 | ```shell
 93 | $ docker save -o tao-api.tar nvcr.io/nvidia/tao/tao-toolkit:4.0.0-api-fix
 94 | ```
 95 | 
 96 | Copy the tar file to k8s machine
 97 | ```shell
 98 | $ scp tao-api.tar ip_k8s_machine:/path/to/save
 99 | ```
100 | 
101 | ## Import the new image
102 | In k8s machines,
103 | ```shell
104 | $ sudo ctr -n=k8s.io image import tao-api.tar
105 | ```
106 | 
107 | ## Delete the old pods
108 | Delete existing tao-toolkit-api pods
109 | ```shell
110 | $ helm delete tao-toolkit-api
111 | ```
112 | 
113 | ## Download chart and modify
114 | Download latest helm chart.
115 | ```shell
116 | $ helm fetch https://helm.ngc.nvidia.com/nvidia/tao/charts/tao-toolkit-api-4.0.2.tgz --username=‘$oauthtoken’ --password=<NGC key>
117 | $ mkdir tao-toolkit-api && tar -zxvf tao-toolkit-api-4.0.2.tgz -C tao-toolkit-api
118 | $ cd tao-toolkit-api/
119 | ```
120 | 
121 | Modify the image name.
122 | ```shell
123 | $ vi tao-toolkit-api/values.yaml
124 | 
125 | # in line 2
126 | From
127 | image: nvcr.io/nvidia/tao/tao-toolkit:4.0.2-api
128 | To
129 | image: nvcr.io/nvidia/tao/tao-toolkit:4.0.0-api-fix
130 | 
131 | #in line 4
132 | From
133 | imagePullPolicy: Always
134 | To
135 | imagePullPolicy: IfNotPresent
136 | ```
137 | 
138 | ## Install latest chart
139 | ```shell
140 | $ helm install tao-toolkit-api tao-toolkit-api/ --namespace default
141 | ```
142 | 
143 | Verify the latest code inside the docker
144 | ```shell
145 | $ kubectl get pods
146 | $ kubectl exec -it tao-toolkit-api-app-pod-5d4d74c65c-k8zt5 -- /bin/bash
147 | root@tao-toolkit-api-app-pod-5d4d74c65c-k8zt5:/opt/api# apt-get install vim
148 | root@tao-toolkit-api-app-pod-5d4d74c65c-k8zt5:/opt/api# vim handlers/actions.py
149 | ```
150 | 


--------------------------------------------------------------------------------
/tao_forum_faq/FAQ.md:
--------------------------------------------------------------------------------
  1 | # FAQ
  2 | 
  3 | ## FPENet
  4 | 1. *Why is the bounding box recalculated just using the key points when I have also supplied the face bbox ground truth in the annotation file ? What is the purpose of the bbox in the ground truth file?*
  5 | 
  6 | The annotation file just provide all the keypoints. FPEnet will find the xmin, ymin, xmax, ymax of the points and then calculate a square face bounding box based on the key points. And then crop bounding box from image and scale the Keypoints to target resolution
  7 | 
  8 | ## Emotionnet
  9 | 1. *How to find the input name of EmotionNet?*
 10 | ```
 11 | tao-converter model.etlt
 12 | -k nvidia_tlt
 13 | -t fp32
 14 | -p input_landmarks:0,1x1x136x1,1x1x136x1,2x1x136x1
 15 | -e model.engine
 16 | ```
 17 | 
 18 | ## tlt or etlt
 19 | 1. *How to decode etlt file?*
 20 | ```
 21 | $ docker run --runtime=nvidia -it --rm -v /home/morganh:/home/morganh nvcr.io/nvidia/tao/tao-toolkit:5.0.0-tf1.15.5 /bin/bash
 22 | # wget --content-disposition ‘https://api.ngc.nvidia.com/v2/models/org/nvidia/team/tao/fpenet/deployable_v1.0/files?redirect=true&path=model.etlt’ -O fpenet_model_v1.0.etlt
 23 | ```
 24 | 
 25 | Generate deocde_etlt.py file as below.
 26 | ```
 27 | import argparse
 28 | import struct
 29 | from nvidia_tao_tf1.encoding import encoding
 30 | 
 31 | def parse_command_line(args):
 32 |     '''Parse command line arguments.'''
 33 |     parser = argparse.ArgumentParser(description='ETLT Decode Tool')
 34 |     parser.add_argument('-m',
 35 |                         '--model',
 36 |                         type=str,
 37 |                         required=True,
 38 |                         help='Path to the etlt file.')
 39 |     parser.add_argument('-o',
 40 |                         '--uff',
 41 |                         required=True,
 42 |                         type=str,
 43 |                         help='The path to the uff file.')
 44 |     parser.add_argument('-k',
 45 |                         '--key',
 46 |                         required=True,
 47 |                         type=str,
 48 |                         help='encryption key.')
 49 |     return parser.parse_args(args)
 50 | 
 51 | 
 52 | def decode(tmp_etlt_model, tmp_uff_model, key):
 53 |     with open(tmp_uff_model, 'wb') as temp_file, open(tmp_etlt_model, 'rb') as encoded_file:
 54 |         size = encoded_file.read(4)
 55 |         size = struct.unpack("<i", size)[0]
 56 |         input_node_name = encoded_file.read(size)
 57 |         encoding.decode(encoded_file, temp_file, key.encode())
 58 | 
 59 | def main(args=None):
 60 |     args = parse_command_line(args)
 61 |     decode(args.model, args.uff, args.key)
 62 |     print("Decode successfully.")
 63 | ```
 64 | 
 65 | Then, decode the etlt file with following command.
 66 | 
 67 | ```
 68 | # python decode_etlt.py -m fpenet_model_v1.0.etlt -o fpenet_model_v1.0.onnx -k nvidia_tlt
 69 | ```
 70 | 
 71 | ## Segformer
 72 | 1. *How to train multiclass?*
 73 | 
 74 | Reference: https://forums.developer.nvidia.com/t/training-segformer-cradiov2-with-multiple-classes-not-working-only-learning-1-class.
 75 | 
 76 | Please change 1-channel mask png file to 3-channel mask png file.
 77 | 
 78 | ```
 79 | # cat change_1_channle_to_3_channel_green.py
 80 | # pip install pillow numpy
 81 | import os, glob
 82 | import numpy as np
 83 | from PIL import Image
 84 | 
 85 | #in_dir  = "xxx/data/masks/train"     # 1-channel 0/1 mask folder
 86 | #out_dir = "xxx/data/masks_3channel/train"      # output RGB mask folder
 87 | in_dir  = "xxx/data/masks/val"     # 1-channel 0/1 mask folder
 88 | out_dir = "xxx/data/masks_3channel/val"      # output RGB mask folder
 89 | os.makedirs(out_dir, exist_ok=True)
 90 | 
 91 | for p in glob.glob(os.path.join(in_dir, "*.png")):
 92 |     g = np.array(Image.open(p))                   # 8-bit 1-channel
 93 |     assert g.ndim == 2, f"Not single channel: {p}"
 94 |     rgb = np.zeros((g.shape[0], g.shape[1], 3), dtype=np.uint8)
 95 |     rgb[g == 1] = (0, 255, 0)                     # set to green color
 96 |     Image.fromarray(rgb, mode="RGB").save(
 97 |         os.path.join(out_dir, os.path.basename(p)), format="PNG"
 98 | ```
 99 | 
100 | Snippet of training spec file for training 3 classes:
101 | ```
102 | dataset:
103 |   segment:
104 |     dataset: "SFDataset"
105 |     root_dir: /path/to/dataset_rgb
106 |     num_classes: 3            # background + 2 foreground classes
107 |     img_size: 224
108 |     train_split: "train"
109 |     validation_split: "val"
110 |     test_split: "val"
111 |     predict_split: "test"
112 |     label_transform: None     # palette uses 0–255 range; disable normalization on labels
113 |     palette:
114 |       - label_id: 0
115 |         mapping_class: background
116 |         rgb: [0, 0, 0]
117 |         seg_class: background
118 |       - label_id: 1
119 |         mapping_class: class_1
120 |         rgb: [0, 255, 0]
121 |         seg_class: class_1
122 |       - label_id: 2
123 |         mapping_class: class_2
124 |         rgb: [255, 0, 0]
125 |         seg_class: class_2
126 |     # masks must contain ONLY these exact RGB colors; labels will be mapped to indices {0,1,2} accordingly
127 | ```
128 | 


--------------------------------------------------------------------------------
/tao_action_recognition/tensorrt_inference/trt_inference/engine.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 NVIDIA CORPORATION. All rights reserved.
  2 | 
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  4 | # of this software and associated documentation files (the "Software"), to deal
  5 | # in the Software without restriction, including without limitation the rights
  6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7 | # copies of the Software, and to permit persons to whom the Software is
  8 | # furnished to do so, subject to the following conditions:
  9 | 
 10 | # The above copyright notice and this permission notice shall be included in all
 11 | # copies or substantial portions of the Software.
 12 | 
 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 19 | # SOFTWARE.
 20 | 
 21 | import logging
 22 | import os
 23 | import numpy as np
 24 | import tensorrt as trt
 25 | import pycuda.autoinit
 26 | import pycuda.driver as cuda
 27 | 
 28 | logger = logging.getLogger(__name__)
 29 | 
 30 | EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
 31 | 
 32 | 
 33 | # Array of TensorRT loggers. We need to keep global references to
 34 | # the TensorRT loggers that we create to prevent them from being
 35 | # garbage collected as those are referenced from C++ code without
 36 | # Python knowing about it.
 37 | 
 38 | 
 39 | tensorrt_loggers = []
 40 | 
 41 | 
 42 | def _create_tensorrt_logger(verbose=False):
 43 |     """Create a TensorRT logger.
 44 | 
 45 |     Args:
 46 |         verbose (bool): whether to make the logger verbose.
 47 |     """
 48 |     if verbose:
 49 |         # trt_verbosity = trt.Logger.Severity.INFO
 50 |         trt_verbosity = trt.Logger.Severity.VERBOSE
 51 |     else:
 52 |         trt_verbosity = trt.Logger.Severity.WARNING
 53 |     tensorrt_logger = trt.Logger(trt_verbosity)
 54 |     tensorrt_loggers.append(tensorrt_logger)
 55 |     return tensorrt_logger
 56 | 
 57 | 
 58 | class HostDeviceMem(object):
 59 |     def __init__(self, host_mem, device_mem, binding_name, shape=None):
 60 |         self.host = host_mem
 61 |         self.device = device_mem
 62 |         self.binding_name = binding_name
 63 |         self.shape = shape
 64 | 
 65 |     def __str__(self):
 66 |         return "Host:\n" + str(self.host) + "\nDevice\n" + str(self.device)
 67 | 
 68 |     def __repr__(self):
 69 |         return self.__str__()
 70 | 
 71 | 
 72 | def allocate_buffers(engine, context):
 73 | 
 74 |     inputs = []
 75 |     outputs = []
 76 |     bindings = []
 77 |     stream = cuda.Stream()
 78 |     for binding in engine:
 79 |         binding_id = engine.get_binding_index(str(binding))
 80 |         size = trt.volume(context.get_binding_shape(binding_id)) * engine.max_batch_size
 81 |         print("{}:{}".format(binding, size))
 82 |         dtype = trt.nptype(engine.get_binding_dtype(binding))
 83 |         host_mem = cuda.pagelocked_empty(size, dtype)
 84 |         device_mem = cuda.mem_alloc(host_mem.nbytes)
 85 |         bindings.append(int(device_mem))
 86 |         if engine.binding_is_input(binding):
 87 |             inputs.append(HostDeviceMem(host_mem, device_mem, binding))
 88 |         else:
 89 |             output_shape = engine.get_binding_shape(binding)
 90 |             if len(output_shape) == 3:
 91 |                 dims = trt.DimsCHW(engine.get_binding_shape(binding))
 92 |                 output_shape = (engine.max_batch_size, dims.c, dims.h, dims.w)
 93 |             elif len(output_shape) == 2:
 94 |                 dims = trt.Dims2(output_shape)
 95 |                 output_shape = (engine.max_batch_size, dims[0], dims[1])
 96 |             outputs.append(HostDeviceMem(host_mem, device_mem, binding, output_shape))
 97 | 
 98 |     return inputs, outputs, bindings, stream
 99 | 
100 | 
101 | def do_inference(batch, context, bindings, inputs, outputs, stream):
102 |     batch_size = batch.shape[0]
103 |     assert len(inputs) == 1
104 |     inputs[0].host = np.ascontiguousarray(batch, dtype=np.float32)
105 |     [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
106 |     context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle)
107 |     [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
108 |     stream.synchronize()
109 | 
110 |     outputs_dict = {}
111 |     outputs_shape = {}
112 |     for out in outputs:
113 |         outputs_dict[out.binding_name] = np.reshape(out.host, out.shape)
114 |         outputs_shape[out.binding_name] = out.shape
115 | 
116 |     return outputs_shape, outputs_dict
117 | 
118 | 
119 | def load_tensorrt_engine(filename, verbose=False):
120 |     tensorrt_logger = _create_tensorrt_logger(verbose)
121 | 
122 |     if not os.path.exists(filename):
123 |         raise ValueError("{} does not exits".format(filename))
124 | 
125 |     with trt.Runtime(tensorrt_logger) as runtime, open(filename, "rb") as f:
126 |         trt_engine = runtime.deserialize_cuda_engine(f.read())
127 | 
128 |     return trt_engine
129 | 


--------------------------------------------------------------------------------
/tao_action_recognition/data_generation/save_tracks_shad.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 NVIDIA CORPORATION. All rights reserved.
  2 | 
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  4 | # of this software and associated documentation files (the "Software"), to deal
  5 | # in the Software without restriction, including without limitation the rights
  6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7 | # copies of the Software, and to permit persons to whom the Software is
  8 | # furnished to do so, subject to the following conditions:
  9 | 
 10 | # The above copyright notice and this permission notice shall be included in all
 11 | # copies or substantial portions of the Software.
 12 | 
 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 19 | # SOFTWARE.
 20 | import argparse
 21 | import cv2
 22 | import os
 23 | from load_tracks import *
 24 | 
 25 | parser = argparse.ArgumentParser(
 26 |     description='Save trajectory crops of a video to disk')
 27 | parser.add_argument('--anno_folder', dest='anno_folder',
 28 |                     help="input folder containing annotations for building track class")
 29 | parser.add_argument('--image_folder', dest='image_folder',
 30 |                     help="input folder containing images for building track class")
 31 | parser.add_argument('--of_folder', dest='of_folder',
 32 |                     help="input folder containing of for building track class")
 33 | parser.add_argument('--output_folder', dest='output_folder', default="./",
 34 |                     help="folder to store crops")
 35 | 
 36 | 
 37 | Class_Labels = ["walk", "fall", "sits", "squa", "bend"]
 38 | 
 39 | 
 40 | def get_generator_for_tracks_for_video(annotation_folder, image_folder, of_folder, num_channels):
 41 |     output_image_annotation_folder = './dummy'
 42 |     all_tracks = Tracks(annotation_folder, image_folder, of_folder, output_image_annotation_folder)
 43 |     track_generator = Data_generator_from_track(
 44 |         num_channels, all_tracks, add_color=True)
 45 |     return track_generator
 46 | 
 47 | 
 48 | def prepare_generator(args):
 49 |     trk_length = 10
 50 |     trk_generator = get_generator_for_tracks_for_video(
 51 |         args.anno_folder, args.image_folder, args.of_folder, trk_length)
 52 |     _basename = os.path.basename(args.anno_folder)
 53 |     if args.anno_folder[-1] == "/":
 54 |         _basename = args.anno_folder[:-1]
 55 |         _basename = os.path.basename(_basename)
 56 |     return trk_generator, _basename
 57 | 
 58 | 
 59 | def prepare_folders(output_folder):
 60 |     if not os.path.isdir(output_folder):
 61 |         os.makedirs(output_folder)
 62 |     # make directories for classes
 63 |     for i in range(len(Class_Labels)):
 64 |         _tmp = os.path.join(output_folder, Class_Labels[i])
 65 |         if not(os.path.isdir(_tmp)):  # can be made by other video
 66 |             os.makedirs(_tmp)
 67 | 
 68 | 
 69 | def save_images_to_disk(images, folder, ext=".jpg"):
 70 |     for idx, img in enumerate(images):
 71 |         _tmp = "%06d" % idx + ext
 72 |         _tmp = os.path.join(folder, _tmp)
 73 |         assert(cv2.imwrite(_tmp, img))
 74 | 
 75 | 
 76 | def save_crops(track_generator, video_name, args):
 77 |     sample_counter_per_class = {"walk": 0,
 78 |                                 "fall": 0, "sits": 0, "squa": 0, "bend": 0}
 79 |     while True:
 80 |         try:
 81 |             gen_val = track_generator.next()
 82 |         except NoTrajectoryLeft:
 83 |             break
 84 |         except NotEnoughPointsInTrack:
 85 |             continue
 86 |         else:
 87 |             # do actual crop save to disk
 88 |             action_label = gen_val[-1]
 89 |             # sometime this will return NONE - TODO Investigate - one reason could be this part of track is not labeled
 90 |             print(action_label)
 91 |             if (action_label in sample_counter_per_class.keys()):
 92 |                 sample_counter_per_class[action_label] += 1
 93 |                 counter = sample_counter_per_class[action_label]
 94 |                 str_counter = "%06d" % counter
 95 |                 opt_folder = os.path.join(args.output_folder, action_label)
 96 |                 opt_folder = os.path.join(opt_folder, video_name)
 97 |                 if not os.path.isdir(opt_folder):
 98 |                     os.makedirs(opt_folder)
 99 |                 _opf_folder_u = os.path.join(
100 |                     opt_folder, str_counter + "/u/")
101 |                 _opf_folder_v = os.path.join(
102 |                     opt_folder, str_counter + "/v/")
103 |                 _rgb_folder = os.path.join(
104 |                     opt_folder, str_counter + "/rgb/")
105 |                 # above folders should NOT exist
106 |                 print(_opf_folder_u)
107 |                 assert not(os.path.isdir(_opf_folder_u))
108 |                 assert not(os.path.isdir(_opf_folder_v))
109 |                 assert not(os.path.isdir(_rgb_folder))
110 |                 os.makedirs(_opf_folder_u)
111 |                 os.makedirs(_opf_folder_v)
112 |                 os.makedirs(_rgb_folder)
113 |                 save_images_to_disk(gen_val[0], _opf_folder_u)
114 |                 save_images_to_disk(gen_val[1], _opf_folder_v)
115 |                 save_images_to_disk(gen_val[-2], _rgb_folder)
116 | 
117 | 
118 | def main(args):
119 |     trk_generator, video_name = prepare_generator(args)
120 |     prepare_folders(args.output_folder)
121 |     save_crops(trk_generator, video_name, args)
122 | 
123 | 
124 | if __name__ == '__main__':
125 |     args = parser.parse_args()
126 |     main(args)
127 | 


--------------------------------------------------------------------------------
/tao_object_dection/yolov4/README.md:
--------------------------------------------------------------------------------
 1 | # Train TAO YOLOV4 on COCO14 dataset
 2 | In this page, we will walk you through the steps to reproduce the best mAP on COCO14 using YOLOV4 with TAO Toolkit. In the first section, you will train a classification model with CSPDarkNet53 architecture on ImageNet2012. In the second section, you will leverage the imagenet pretrained CSPDarkNet53 as the backbone of YOLOV4 and use the techniques provided in TAO toolkit to train the YOLOV4 on COCO14 dataset.   
 3 | 
 4 | ## Train CSPDarkNet53 backbone on ImageNet2012
 5 | In this section, you will train a classification model against the ImageNet 2012 classification dataset with CSPDarkNet53 backbone.
 6 | 
 7 | ### Prepare the ImageNet 2012 dataset
 8 | The ImageNet 2012 dataset contains more than 1.1 million images over 1000 classes. Start by downloading the [ImageNet classification dataset](http://www.image-net.org/download-images) (choose "Download Original Images"), which contains more than 140 GB of images. There are two tarballs to download and save to the same directory:
 9 | 
10 | ```shell
11 | ILSVRC2012_img_train.tar (138 GB)—Used for training.
12 | ILSVRC2012_img_val.tar (6.3 GB) —Used for validation.
13 | ```
14 | 
15 | After the dataset has been downloaded, use the [imagenet.py](https://github.com/NVIDIA-AI-IOT/deepstream_tao_apps/blob/release/tao3.0/misc/dev_blog/SOTA/dataset_tools/imagenet.py) Python script and the [imagenet_val_maps.pklz](https://github.com/NVIDIA-AI-IOT/deepstream_tao_apps/blob/release/tao3.0/misc/dev_blog/SOTA/dataset_tools/imagenet_val_maps.pklz) validation map file to unzip the two tarballs and restructure the validation dataset. Due to copyright issues, we can’t provide the ImageNet dataset or any ImageNet-pretrained models in TAO Toolkit. Use the following command to unzip and restructure the dataset:
16 | 
17 | ```shell
18 | python3.6 imagenet.py --download-dir <tarballs_download_directory>  --target_dir <unzip_target_dir>
19 | ```
20 | 
21 | Assume that the paths from inside the TAO Toolkit container to the dataset are as follows:
22 | 
23 | ```shell
24 | /home/<username>/tao-experiments/data/imagenet2012/train
25 | /home/<username>/tao-experiments/data/imagenet2012/val
26 | ```
27 | 
28 | The first path is a directory that contains all the training images, where each of the 1K classes has its own subdirectory. The same is assumed for the validation split as well. The structure of the classification dataset follows the [TAO Toolkit classification model training requirements](https://docs.nvidia.com/tao/tao-toolkit/text/data_annotation_format.html#image-classification-format).
29 | 
30 | ### Training specification:
31 | For every TAO Toolkit model training, you have a configuration file (spec file) to configure some necessary parameters used to customize the model and the training process. Please refer to 
32 | [classification_cspdarknet53.txt](specs/classification_cspdarknet53.txt) for the training spec file.
33 | 
34 | ### Start Training:
35 | Run following command to start training on 8 GPUs:
36 | 
37 | ```
38 | tao classification train --gpus 8 -e </path/to/spec_file> -r </path/to/results/> -k nvidia_tao
39 | ```
40 | For 8 x A100 GPUs, the training will require about ~30 hours. And you might get around 78.3% of val accuracy.
41 | 
42 | 
43 | ## Train YOLOV4-CSPDarkNet53 on COCO14 dataset
44 | In this section, you will train YOLOV4 on COCO14 dataset with imagenet pretrained CSPDarkNet53 backbone you create in the first section.
45 | 
46 | ### Prepare COCO14 dataset
47 | Firstly, you will prepare the COCO14 dataset for training. To compare with the SOTA model, you will do the training/testing split the same way as the original YOLOV4. And this split is different from the official split of COCO14. You could download the original COCO14 dataset and the split images list (5k.txt/trainvalno5k.txt)by running [get_coco_dataset.sh](https://raw.githubusercontent.com/AlexeyAB/darknet/master/scripts/get_coco_dataset.sh)
48 | 
49 | After the downloading dataset, you should convert the json format labels to KITTI format by using [coco2kitti.py](https://github.com/NVIDIA-AI-IOT/deepstream_tao_apps/blob/release/tao3.0/misc/dev_blog/SOTA/dataset_tools/coco2kitti.py)
50 | 
51 | ```shell
52 | # Convert instances_train2014.json to KITTI format
53 | python3 ./coco2kitti.py <path to COCO2014 root dir> train2014
54 | mv ./labels ./train2014_KITTI
55 | 
56 | # Convert instances_val2014.json to KITTI format
57 | python3 ./coco2kitti.py <path to COCO2014 root dir> val2014
58 | mv ./labels ./val2014_KITTI
59 | ```
60 | Once you get the images and KITTI format labels, you could re-split them according to 5k.txt / trainvalno5k.txt. 
61 | 
62 | ### Training specification:
63 | Before we start the training, there are 3 more steps to do to get a better results on the dataset.
64 | 
65 | #### Generate anchor setting:
66 | TAO Toolkit YOLOV4 supports the ground truth bboxes clustering to find suitable anchor setting on a specific dataset:
67 | 
68 | ```shell
69 | tao yolo_v4 kmeans -l </path/to/labels> \
70 |                    -i </path/to/images> \
71 |                    -x <network_input_width> \
72 |                    -y <network_input_height>
73 | ```
74 | 
75 | You can replace the `small_anchor_shape`, `mid_anchor_shape`, `big_anchor_shape` in `yolov4_config` with the generated anchors shapes. 
76 | 
77 | #### Enable model EMA
78 | TAO Toolkit YOLOV4 supports the model exponential moving average (EMA) during the training. Enable it by setting `model_ema: true` in the `train_config`.
79 | 
80 | You can also do some hyperparameters (learning rate, learning rate schduler, regularization factor) search by using part of dataset or less epochs. Here we provide a [spec](specs/yolov4_416_coco14.txt) to train YOLOV4-416-Leaky on COCO14 YOLO split. In this spec, we use raw KITTI-style labels for training.
81 | 
82 | ### Start Training:
83 | Run following command to start training on 8 GPUs:
84 | 
85 | ```
86 | tao yolo_v4 train --gpus=8 -e </path/to/spec_file> -r </path/to/results/> -k nvidia_tao
87 | ```
88 | The training will require ~130 hours on 8 V100 16G. And you might get around mAP@0.5 60.9% using COCO-style metrics. 
89 | 


--------------------------------------------------------------------------------
/tao_key_points_estimation/tensorrt_inference/fpenet_trt_inference.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: Apache-2.0
  3 | 
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | 
 16 | import argparse
 17 | import cv2
 18 | from trt_inference import allocate_buffers, do_inference, load_tensorrt_engine
 19 | import json
 20 | import numpy as np
 21 | import os
 22 | import tqdm
 23 | 
 24 | INPUT_CHANNEL=1
 25 | INPUT_WIDTH=80
 26 | INPUT_HEIGHT=80
 27 | NUM_KEYPOINTS=6
 28 | 
 29 | 
 30 | def preprocess(sample):
 31 |     fname = str(sample['filename'])
 32 | 
 33 |     for chunk in sample['annotations']:
 34 |         if 'facebbox' not in chunk['class'].lower():
 35 |             continue
 36 | 
 37 |         bbox_data = (entry for entry in chunk if ('class' not in entry and
 38 |                                                     'version' not in entry))
 39 |         for entry in bbox_data:
 40 |             if 'face_tight_bboxheight' in str(entry).lower():
 41 |                 height = int(float(chunk[entry]))
 42 |             if 'face_tight_bboxwidth' in str(entry).lower():
 43 |                 width = int(float(chunk[entry]))
 44 |             if 'face_tight_bboxx' in str(entry).lower():
 45 |                 x = int(float(chunk[entry]))
 46 |             if 'face_tight_bboxy' in str(entry).lower():
 47 |                 y = int(float(chunk[entry]))
 48 | 
 49 |         image = cv2.imread(os.path.join(fname))
 50 | 
 51 |         image_shape = image.shape
 52 |         image_height = image_shape[0]
 53 |         image_width = image_shape[1]
 54 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 55 |         image = np.float32(image)
 56 | 
 57 |         # transform it into a square bbox wrt the longer side
 58 |         longer_side = max(width, height)
 59 |         new_width = longer_side
 60 |         new_height = longer_side
 61 |         x = int(x - (new_width - width) / 2)
 62 |         y = int(y - (new_height - height) / 2)
 63 |         x = min(max(x, 0), image_width)
 64 |         y = min(max(y, 0), image_height)
 65 |         new_width = min(new_width, image_width - x)
 66 |         new_height = min(new_height, image_height - y)
 67 |         new_width = min(new_width, new_height)
 68 |         new_height = new_width  # make it a square bbox
 69 |         crop_bbox = [x, y, new_width, new_height]
 70 | 
 71 |         # crop the face bounding box
 72 |         img_crop = image[y:y + new_height, x:x + new_width, :]  # pylint:disable=E1136
 73 |         image_resized = cv2.resize(img_crop,
 74 |                                    (INPUT_HEIGHT, INPUT_WIDTH),
 75 |                                     interpolation=cv2.INTER_CUBIC)
 76 |         if INPUT_CHANNEL == 1:
 77 |             image_resized = cv2.cvtColor(image_resized, cv2.COLOR_BGR2GRAY)
 78 |             image_resized = np.expand_dims(image_resized, 2)
 79 |         # make it channel first (channel, height, width)
 80 |         image_resized = np.transpose(image_resized, (2, 0, 1))
 81 |         image_resized = np.expand_dims(image_resized, 0).astype(np.float32)  # add batch dimension
 82 |         
 83 |         return crop_bbox, image_resized
 84 | 
 85 | 
 86 | def postprocess(outputs, crop_bbox):
 87 | 
 88 |     keypoints = outputs['softargmax/strided_slice:0']
 89 |     scale = float(crop_bbox[2]) / INPUT_HEIGHT
 90 |     shift = np.tile(np.array((crop_bbox[0], crop_bbox[1])),
 91 |                     (NUM_KEYPOINTS, 1))
 92 |     result = (keypoints[0, :, :] * scale) + shift
 93 |     
 94 |     return result
 95 | 
 96 | 
 97 | if __name__ == "__main__":
 98 | 
 99 |     parser = argparse.ArgumentParser(description='Do FPENet inference using TRT')
100 |     parser.add_argument('--input_json', type=str, help='input json path', required=True)
101 |     parser.add_argument('--trt_engine', type=str, help='trt engine file path', required=True)
102 |     parser.add_argument('--output_img_dir', type=str, help='output imgs save path')
103 | 
104 |     args = parser.parse_args()
105 | 
106 |     batch_size = 1
107 |     engine_file = args.trt_engine
108 |     input_json = args.input_json
109 |     output_dir = args.output_img_dir
110 | 
111 | 
112 |     with load_tensorrt_engine(engine_file) as engine:
113 |         with engine.create_execution_context() as context:
114 |             context.set_binding_shape(0, (1, INPUT_CHANNEL, INPUT_HEIGHT, INPUT_WIDTH))
115 |             inputs, outputs, bindings, stream = allocate_buffers(engine, context)
116 |             json_data = json.loads(open(input_json , 'r').read())
117 |             results = []
118 |             for sample in tqdm.tqdm(json_data):
119 |                 fname = str(sample['filename'])
120 |                 crop_bbox, img = preprocess(sample)
121 |                 outputs_shape, outputs_data = do_inference(batch=img, context=context,
122 |                                                            bindings=bindings, inputs=inputs,
123 |                                                            outputs=outputs, stream=stream)
124 |                 keypoints = postprocess(outputs_data, crop_bbox)
125 |                 keypoints = keypoints[0]
126 |                 img = cv2.imread(fname)
127 |                 for idx, kp in enumerate(keypoints):
128 |                     x = kp[0]
129 |                     y = kp[1]
130 |                     cv2.circle(img,(int(x), int(y)), 1, (0,255,0), 2)
131 |                     cv2.putText(img, str(idx), (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0,255,0), 1)
132 |                 cv2.imwrite(os.path.join(output_dir, fname.split("/")[-1]), img)
133 | 


--------------------------------------------------------------------------------
/tao_action_recognition/tensorrt_inference/ar_trt_inference.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 NVIDIA CORPORATION. All rights reserved.
  2 | 
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  4 | # of this software and associated documentation files (the "Software"), to deal
  5 | # in the Software without restriction, including without limitation the rights
  6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7 | # copies of the Software, and to permit persons to whom the Software is
  8 | # furnished to do so, subject to the following conditions:
  9 | 
 10 | # The above copyright notice and this permission notice shall be included in all
 11 | # copies or substantial portions of the Software.
 12 | 
 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 19 | # SOFTWARE.
 20 | 
 21 | import argparse
 22 | from trt_inference import allocate_buffers, do_inference, load_tensorrt_engine
 23 | import numpy as np
 24 | import PIL
 25 | from PIL import Image
 26 | import os
 27 | 
 28 | SEQ = 32
 29 | CENTER_CROP = False
 30 | INPUT_2D = False
 31 | 
 32 | 
 33 | def preprocess_ds_ncdhw(batch_img):
 34 |     batch_img_array = np.array([np.array(img) for img in batch_img], dtype=np.float32)
 35 |     batch_img_array = ((batch_img_array / 255.0) - 0.5) / 0.5
 36 |     batch_transpose = np.transpose(batch_img_array, (3, 0, 1, 2))
 37 |     if INPUT_2D:
 38 |         batch_reshape = np.reshape(batch_transpose, (3*SEQ, 224, 224))
 39 |     else:
 40 |         batch_reshape = batch_transpose
 41 | 
 42 |     return batch_reshape
 43 | 
 44 | 
 45 | def test_consecutive_sample(max_sample_cnt, seq_length, sample_rate=1):
 46 |     """Choose the middle consecutive frames of each video."""
 47 |     total_frames_req = seq_length * sample_rate
 48 |     average_duration = max_sample_cnt - total_frames_req + 1
 49 |     if average_duration > 0:
 50 |         start_idx = int(average_duration/2.0)
 51 |     else:
 52 |         start_idx = 0
 53 | 
 54 |     img_ids = start_idx + np.arange(seq_length) * sample_rate
 55 |     # # loop the video to form sequence:
 56 |     img_ids = np.mod(img_ids, max_sample_cnt)
 57 | 
 58 |     return img_ids
 59 | 
 60 | 
 61 | def sample_patch(img_root_path, seq_len=SEQ):
 62 |     img_list = sorted(os.listdir(img_root_path))
 63 |     img_id_list = []
 64 |     if len(img_list) < seq_len:
 65 |         img_ids = np.arange(seq_len)
 66 |         img_ids = np.mod(img_ids, len(img_list))
 67 |         img_id_list.append(img_ids)
 68 |     else:
 69 |         end_index = len(img_list) - seq_len + 1
 70 |         for idx in range(end_index):
 71 |             img_ids = idx + np.arange(seq_len)
 72 |             img_id_list.append(img_ids)
 73 |     return img_id_list
 74 | 
 75 | 
 76 | def resize_and_center_crop(img):
 77 |     # resize the short side to 224
 78 |     w, h = img.size
 79 |     if h <= w:
 80 |         target_w = int((224.0 / float(h)) * w)
 81 |         resized_img = img.resize((target_w, 224), resample=PIL.Image.BILINEAR)
 82 |     else:
 83 |         target_h = int((224.0 / float(w)) * h)
 84 |         resized_img = img.resize((224, target_h), resample=PIL.Image.BILINEAR)
 85 | 
 86 |     # center crop to 224x224
 87 |     resized_w, resized_h = resized_img.size
 88 |     center_x = (resized_w - 224) / 2
 89 |     center_y = (resized_h - 224) / 2
 90 |     crop_img = resized_img.crop((center_x, center_y, center_x + 224, center_y + 224))
 91 | 
 92 |     return crop_img
 93 | 
 94 | 
 95 | def load_images(img_ids, img_root_path):
 96 |     img_list = sorted(os.listdir(img_root_path))
 97 | 
 98 |     raw_imgs = []
 99 |     for img_id in img_ids:
100 |         img_path = os.path.join(img_root_path, img_list[img_id])
101 |         img = Image.open(img_path)
102 |         if CENTER_CROP:
103 |             img = resize_and_center_crop(img)
104 |         else:
105 |             img = img.resize((224, 224), resample=PIL.Image.BILINEAR)
106 | 
107 |         raw_imgs.append(img)
108 | 
109 |     images = preprocess_ds_ncdhw(raw_imgs)
110 | 
111 |     return images
112 | 
113 | 
114 | def get_prob(pred):
115 | 
116 |     pred = pred - pred.max()
117 |     pred_exp = np.exp(pred)
118 | 
119 |     return pred_exp.max()/pred_exp.sum()
120 | 
121 | 
122 | if __name__ == "__main__":
123 | 
124 |     parser = argparse.ArgumentParser(description='Do AR inference using TRT')
125 |     parser.add_argument('--input_images_folder', type=str, help='input images path', required=True)
126 |     parser.add_argument('--trt_engine', type=str, help='trt engine file path', required=True)
127 |     parser.add_argument('--center_crop', action="store_true", help='resize the short side of image to 224 and center crop 224x224')
128 |     parser.add_argument('--input_2d', action="store_true", help='set if it is a 2d model')
129 | 
130 |     args = parser.parse_args()
131 | 
132 |     if args.center_crop:
133 |         CENTER_CROP = True
134 | 
135 |     if args.input_2d:
136 |         INPUT_2D = True
137 | 
138 |     batch_size = 1
139 |     engine_file = args.trt_engine
140 |     label_map = ["push", "fall_floor", "walk", "run", "ride_bike"]
141 |     img_root = args.input_images_folder
142 |     batch_cnt = 1
143 | 
144 |     total_cnt = 0
145 |     ac_cnt = 0
146 | 
147 |     with load_tensorrt_engine(engine_file) as engine:
148 |         with engine.create_execution_context() as context:
149 |             if INPUT_2D:
150 |                 context.set_binding_shape(0, (1, 3*SEQ, 224, 224))
151 |             else:
152 |                 context.set_binding_shape(0, (1, 3, SEQ, 224, 224))
153 |             inputs, outputs, bindings, stream = allocate_buffers(engine, context)
154 |             img_ids_list = sample_patch(img_root)
155 |             for img_ids in img_ids_list:
156 |                 images = load_images(img_ids, img_root)
157 |                 for sample_id in range(batch_size):
158 |                     batch_images = images
159 |                     # Hard Coded For explicit_batch and the ONNX model's batch_size = 1
160 |                     batch_images = batch_images[np.newaxis, :, :, :]
161 |                     outputs_shape, outputs_data = do_inference(batch=batch_images, context=context,
162 |                                                                bindings=bindings, inputs=inputs,
163 |                                                                outputs=outputs, stream=stream)
164 | 
165 |                     pred_data = np.squeeze(outputs_data['fc_pred'])
166 |                     label = label_map[np.argmax(pred_data)]
167 |                     prob = get_prob(pred_data)
168 |                     print("{} : {} {}".format(img_ids, label, prob))
169 | 


--------------------------------------------------------------------------------
/tao_action_recognition/tensorrt_inference/ar_of_trt_inference.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 NVIDIA CORPORATION. All rights reserved.
  2 | 
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  4 | # of this software and associated documentation files (the "Software"), to deal
  5 | # in the Software without restriction, including without limitation the rights
  6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7 | # copies of the Software, and to permit persons to whom the Software is
  8 | # furnished to do so, subject to the following conditions:
  9 | 
 10 | # The above copyright notice and this permission notice shall be included in all
 11 | # copies or substantial portions of the Software.
 12 | 
 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 19 | # SOFTWARE.
 20 | 
 21 | import argparse
 22 | from trt_inference import allocate_buffers, do_inference, load_tensorrt_engine
 23 | import numpy as np
 24 | import PIL
 25 | from PIL import Image
 26 | import os
 27 | 
 28 | SEQ = 32
 29 | CENTER_CROP = False
 30 | INPUT_2D = False
 31 | 
 32 | 
 33 | def preprocess_ds_ncdhw(batch_img):
 34 |     batch_img_array = np.array(batch_img, dtype=np.float32)
 35 |     batch_img_array = ((batch_img_array / 255.0) - 0.5) / 0.5
 36 |     batch_transpose = np.transpose(batch_img_array, (3, 0, 1, 2))
 37 |     if INPUT_2D:
 38 |         batch_reshape = np.reshape(batch_transpose, (2*SEQ, 224, 224))
 39 |     else:
 40 |         batch_reshape = batch_transpose
 41 | 
 42 |     return batch_reshape
 43 | 
 44 | 
 45 | def test_consecutive_sample(max_sample_cnt, seq_length, sample_rate=1):
 46 |     """Choose the middle consecutive frames of each video."""
 47 |     total_frames_req = seq_length * sample_rate
 48 |     average_duration = max_sample_cnt - total_frames_req + 1
 49 |     if average_duration > 0:
 50 |         start_idx = int(average_duration/2.0)
 51 |     else:
 52 |         start_idx = 0
 53 | 
 54 |     img_ids = start_idx + np.arange(seq_length) * sample_rate
 55 |     # # loop the video to form sequence:
 56 |     img_ids = np.mod(img_ids, max_sample_cnt)
 57 | 
 58 |     return img_ids
 59 | 
 60 | 
 61 | def sample_patch(img_root_path, seq_len=SEQ):
 62 |     img_list = sorted(os.listdir(os.path.join(img_root_path, "u")))
 63 |     img_id_list = []
 64 |     if len(img_list) < seq_len:
 65 |         img_ids = np.arange(seq_len)
 66 |         img_ids = np.mod(img_ids, len(img_list))
 67 |         img_id_list.append(img_ids)
 68 |     else:
 69 |         end_index = len(img_list) - seq_len + 1
 70 |         for idx in range(end_index):
 71 |             img_ids = idx + np.arange(seq_len)
 72 |             img_id_list.append(img_ids)
 73 |     return img_id_list
 74 | 
 75 | 
 76 | def resize_and_center_crop(img):
 77 |     # resize the short side to 224
 78 |     w, h = img.size
 79 |     if h <= w:
 80 |         target_w = int((256.0 / float(h)) * w)
 81 |         resized_img = img.resize((target_w, 224), resample=PIL.Image.BILINEAR)
 82 |     else:
 83 |         target_h = int((256.0 / float(w)) * h)
 84 |         resized_img = img.resize((224, target_h), resample=PIL.Image.BILINEAR)
 85 | 
 86 |     # center crop to 224x224
 87 |     resized_w, resized_h = resized_img.size
 88 |     center_x = (resized_w - 224) / 2
 89 |     center_y = (resized_h - 224) / 2
 90 |     crop_img = resized_img.crop((center_x, center_y, center_x + 224, center_y + 224))
 91 | 
 92 |     return crop_img
 93 | 
 94 | 
 95 | def load_images(img_ids, img_root_path):
 96 |     u_root_path = os.path.join(img_root_path, "u")
 97 |     v_root_path = os.path.join(img_root_path, "v")
 98 |     u_list = sorted(os.listdir(u_root_path))
 99 |     v_list = sorted(os.listdir(v_root_path))
100 | 
101 |     raw_imgs = []
102 |     for img_id in img_ids:
103 |         u_img_path = os.path.join(u_root_path, u_list[img_id])
104 |         v_img_path = os.path.join(v_root_path, v_list[img_id])
105 |         u_img = Image.open(u_img_path)
106 |         v_img = Image.open(v_img_path)
107 |         if CENTER_CROP:
108 |             u_img = resize_and_center_crop(u_img)
109 |             v_img = resize_and_center_crop(v_img)
110 |         else:
111 |             u_img = u_img.resize((224, 224), resample=PIL.Image.BILINEAR)
112 |             v_img = v_img.resize((224, 224), resample=PIL.Image.BILINEAR)
113 | 
114 |         #stack of
115 |         img = np.stack((np.array(u_img), np.array(v_img)), axis=-1)
116 |         raw_imgs.append(img)
117 | 
118 |     images = preprocess_ds_ncdhw(raw_imgs)
119 | 
120 |     return images
121 | 
122 | 
123 | def get_prob(pred):
124 | 
125 |     pred = pred - pred.max()
126 |     pred_exp = np.exp(pred)
127 | 
128 |     return pred_exp.max()/pred_exp.sum()
129 | 
130 | 
131 | if __name__ == "__main__":
132 | 
133 |     parser = argparse.ArgumentParser(description='Do AR inference using TRT')
134 |     parser.add_argument('--input_images_folder', type=str, help='input images path', required=True)
135 |     parser.add_argument('--trt_engine', type=str, help='trt engine file path', required=True)
136 |     parser.add_argument('--center_crop', action="store_true", help='resize the short side of image to 224 and center crop 224x224')
137 |     parser.add_argument('--input_2d', action="store_true", help='set if it is a 2d model')
138 | 
139 |     args = parser.parse_args()
140 | 
141 |     if args.center_crop:
142 |         CENTER_CROP = True
143 | 
144 |     if args.input_2d:
145 |         INPUT_2D = True
146 | 
147 |     batch_size = 1
148 |     engine_file = args.trt_engine
149 |     label_map = ["push", "fall_floor", "walk", "run", "ride_bike"]
150 |     img_root = args.input_images_folder
151 |     batch_cnt = 1
152 | 
153 |     total_cnt = 0
154 |     ac_cnt = 0
155 | 
156 |     with load_tensorrt_engine(engine_file) as engine:
157 |         with engine.create_execution_context() as context:
158 |             if INPUT_2D:
159 |                 context.set_binding_shape(0, (1, 2*SEQ, 224, 224))
160 |             else:
161 |                 context.set_binding_shape(0, (1, 2, SEQ, 224, 224))
162 |             inputs, outputs, bindings, stream = allocate_buffers(engine, context)
163 |             for class_name in os.listdir(img_root):
164 |                 class_path = os.path.join(img_root, class_name)
165 |                 cls_total= 0
166 |                 cls_ac = 0
167 |                 for video in os.listdir(class_path):
168 |                     total_cnt += 1
169 |                     cls_total +=1
170 |                     video_path = os.path.join(class_path, video)
171 |                     max_sample_cnt = len(os.listdir(os.path.join(video_path, "u")))
172 |                     img_ids_list = [test_consecutive_sample(max_sample_cnt, SEQ)]
173 |                     for img_ids in img_ids_list:
174 |                         images = load_images(img_ids, video_path)
175 |                         for sample_id in range(batch_size):
176 |                             batch_images = images
177 |                             # Hard Coded For explicit_batch and the ONNX model's batch_size = 1
178 |                             batch_images = batch_images[np.newaxis, :, :, :]
179 |                             outputs_shape, outputs_data = do_inference(batch=batch_images, context=context,
180 |                                                                     bindings=bindings, inputs=inputs,
181 |                                                                     outputs=outputs, stream=stream)
182 | 
183 |                             pred_data = np.squeeze(outputs_data['fc_pred'])
184 |                             label = label_map[np.argmax(pred_data)]
185 |                             if (label==class_name):
186 |                                 ac_cnt += 1
187 |                                 cls_ac +=1
188 |                             # prob = get_prob(pred_data)
189 |                             # print("{} : {} {}".format(img_ids, label, prob))
190 |                 print("{} : {}".format(class_name, float(cls_ac)/float(cls_total)))
191 | 
192 |     print("Acc: {}".format(float(ac_cnt)/float(total_cnt)))


--------------------------------------------------------------------------------
/tao_pointpillars/tensorrt_sample/src/postprocess.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  *
  5 |  * Licensed under the Apache License, Version 2.0 (the "License");
  6 |  * you may not use this file except in compliance with the License.
  7 |  * You may obtain a copy of the License at
  8 |  *
  9 |  * http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | #include <iostream>
 19 | #include <vector>
 20 | #include <algorithm>
 21 | #include <math.h>
 22 | #include <cuda_runtime_api.h>
 23 | #include "postprocess.h"
 24 | 
 25 | #define checkCudaErrors(status)                                   \
 26 | {                                                                 \
 27 |   if (status != 0)                                                \
 28 |   {                                                               \
 29 |     std::cout << "Cuda failure: " << cudaGetErrorString(status)   \
 30 |               << " at line " << __LINE__                          \
 31 |               << " in file " << __FILE__                          \
 32 |               << " error status: " << status                      \
 33 |               << std::endl;                                       \
 34 |               abort();                                            \
 35 |     }                                                             \
 36 | }
 37 | 
 38 | const float ThresHold = 1e-8;
 39 | 
 40 | inline float cross(const float2 p1, const float2 p2, const float2 p0) {
 41 |     return (p1.x - p0.x) * (p2.y - p0.y) - (p2.x - p0.x) * (p1.y - p0.y);
 42 | }
 43 | 
 44 | inline int check_box2d(const Bndbox box, const float2 p) {
 45 |     const float MARGIN = 1e-2;
 46 |     float center_x = box.x;
 47 |     float center_y = box.y;
 48 |     float angle_cos = cos(-box.rt);
 49 |     float angle_sin = sin(-box.rt);
 50 |     float rot_x = (p.x - center_x) * angle_cos + (p.y - center_y) * (-angle_sin);
 51 |     float rot_y = (p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos;
 52 | 
 53 |     return (fabs(rot_x) < box.l / 2 + MARGIN && fabs(rot_y) < box.w / 2 + MARGIN);
 54 | }
 55 | 
 56 | bool intersection(const float2 p1, const float2 p0, const float2 q1, const float2 q0, float2 &ans) {
 57 | 
 58 |     if (( std::min(p0.x, p1.x) <= std::max(q0.x, q1.x) &&
 59 |           std::min(q0.x, q1.x) <= std::max(p0.x, p1.x) &&
 60 |           std::min(p0.y, p1.y) <= std::max(q0.y, q1.y) &&
 61 |           std::min(q0.y, q1.y) <= std::max(p0.y, p1.y) ) == 0)
 62 |         return false;
 63 | 
 64 | 
 65 |     float s1 = cross(q0, p1, p0);
 66 |     float s2 = cross(p1, q1, p0);
 67 |     float s3 = cross(p0, q1, q0);
 68 |     float s4 = cross(q1, p1, q0);
 69 | 
 70 |     if (!(s1 * s2 > 0 && s3 * s4 > 0))
 71 |         return false;
 72 | 
 73 |     float s5 = cross(q1, p1, p0);
 74 |     if (fabs(s5 - s1) > ThresHold) {
 75 |         ans.x = (s5 * q0.x - s1 * q1.x) / (s5 - s1);
 76 |         ans.y = (s5 * q0.y - s1 * q1.y) / (s5 - s1);
 77 | 
 78 |     } else {
 79 |         float a0 = p0.y - p1.y, b0 = p1.x - p0.x, c0 = p0.x * p1.y - p1.x * p0.y;
 80 |         float a1 = q0.y - q1.y, b1 = q1.x - q0.x, c1 = q0.x * q1.y - q1.x * q0.y;
 81 |         float D = a0 * b1 - a1 * b0;
 82 | 
 83 |         ans.x = (b0 * c1 - b1 * c0) / D;
 84 |         ans.y = (a1 * c0 - a0 * c1) / D;
 85 |     }
 86 | 
 87 |     return true;
 88 | }
 89 | 
 90 | inline void rotate_around_center(const float2 &center, const float angle_cos, const float angle_sin, float2 &p) {
 91 |     float new_x = (p.x - center.x) * angle_cos + (p.y - center.y) * (-angle_sin) + center.x;
 92 |     float new_y = (p.x - center.x) * angle_sin + (p.y - center.y) * angle_cos + center.y;
 93 |     p = float2 {new_x, new_y};
 94 | }
 95 | 
 96 | inline float box_overlap(const Bndbox &box_a, const Bndbox &box_b) {
 97 |     float a_angle = box_a.rt, b_angle = box_b.rt;
 98 |     float a_dx_half = box_a.l / 2, b_dx_half = box_b.l / 2, a_dy_half = box_a.w / 2, b_dy_half = box_b.w / 2;
 99 |     float a_x1 = box_a.x - a_dx_half, a_y1 = box_a.y - a_dy_half;
100 |     float a_x2 = box_a.x + a_dx_half, a_y2 = box_a.y + a_dy_half;
101 |     float b_x1 = box_b.x - b_dx_half, b_y1 = box_b.y - b_dy_half;
102 |     float b_x2 = box_b.x + b_dx_half, b_y2 = box_b.y + b_dy_half;
103 |     float2 box_a_corners[5];
104 |     float2 box_b_corners[5];
105 | 
106 |     float2 center_a = float2 {box_a.x, box_a.y};
107 |     float2 center_b = float2 {box_b.x, box_b.y};
108 | 
109 |     float2 cross_points[16];
110 |     float2 poly_center =  {0, 0};
111 |     int cnt = 0;
112 |     bool flag = false;
113 | 
114 |     box_a_corners[0] = float2 {a_x1, a_y1};
115 |     box_a_corners[1] = float2 {a_x2, a_y1};
116 |     box_a_corners[2] = float2 {a_x2, a_y2};
117 |     box_a_corners[3] = float2 {a_x1, a_y2};
118 | 
119 |     box_b_corners[0] = float2 {b_x1, b_y1};
120 |     box_b_corners[1] = float2 {b_x2, b_y1};
121 |     box_b_corners[2] = float2 {b_x2, b_y2};
122 |     box_b_corners[3] = float2 {b_x1, b_y2};
123 | 
124 |     float a_angle_cos = cos(a_angle), a_angle_sin = sin(a_angle);
125 |     float b_angle_cos = cos(b_angle), b_angle_sin = sin(b_angle);
126 | 
127 |     for (int k = 0; k < 4; k++) {
128 |         rotate_around_center(center_a, a_angle_cos, a_angle_sin, box_a_corners[k]);
129 |         rotate_around_center(center_b, b_angle_cos, b_angle_sin, box_b_corners[k]);
130 |     }
131 | 
132 |     box_a_corners[4] = box_a_corners[0];
133 |     box_b_corners[4] = box_b_corners[0];
134 | 
135 |     for (int i = 0; i < 4; i++) {
136 |         for (int j = 0; j < 4; j++) {
137 |             flag = intersection(box_a_corners[i + 1], box_a_corners[i],
138 |                                 box_b_corners[j + 1], box_b_corners[j],
139 |                                 cross_points[cnt]);
140 |             if (flag) {
141 |                 poly_center = {poly_center.x + cross_points[cnt].x, poly_center.y + cross_points[cnt].y};
142 |                 cnt++;
143 |             }
144 |         }
145 |     }
146 | 
147 |     for (int k = 0; k < 4; k++) {
148 |         if (check_box2d(box_a, box_b_corners[k])) {
149 |             poly_center = {poly_center.x + box_b_corners[k].x, poly_center.y + box_b_corners[k].y};
150 |             cross_points[cnt] = box_b_corners[k];
151 |             cnt++;
152 |         }
153 |         if (check_box2d(box_b, box_a_corners[k])) {
154 |             poly_center = {poly_center.x + box_a_corners[k].x, poly_center.y + box_a_corners[k].y};
155 |             cross_points[cnt] = box_a_corners[k];
156 |             cnt++;
157 |         }
158 |     }
159 | 
160 |     poly_center.x /= cnt;
161 |     poly_center.y /= cnt;
162 | 
163 |     float2 temp;
164 |     for (int j = 0; j < cnt - 1; j++) {
165 |         for (int i = 0; i < cnt - j - 1; i++) {
166 |             if (atan2(cross_points[i].y - poly_center.y, cross_points[i].x - poly_center.x) >
167 |                 atan2(cross_points[i+1].y - poly_center.y, cross_points[i+1].x - poly_center.x)
168 |                 ) {
169 |                 temp = cross_points[i];
170 |                 cross_points[i] = cross_points[i + 1];
171 |                 cross_points[i + 1] = temp;
172 |             }
173 |         }
174 |     }
175 | 
176 |     float area = 0;
177 |     for (int k = 0; k < cnt - 1; k++) {
178 |         float2 a = {cross_points[k].x - cross_points[0].x,
179 |                     cross_points[k].y - cross_points[0].y};
180 |         float2 b = {cross_points[k + 1].x - cross_points[0].x,
181 |                     cross_points[k + 1].y - cross_points[0].y};
182 |         area += (a.x * b.y - a.y * b.x);
183 |     }
184 |     return fabs(area) / 2.0;
185 | }
186 | 
187 | int nms_cpu(
188 |     std::vector<Bndbox> bndboxes,
189 |     const float nms_thresh,
190 |     std::vector<Bndbox> &nms_pred,
191 |     const int pre_nms_top_n)
192 | {
193 |     std::sort(bndboxes.begin(), bndboxes.end(),
194 |               [](Bndbox boxes1, Bndbox boxes2) { return boxes1.score > boxes2.score; });
195 |     std::vector<int> suppressed(std::min(int(bndboxes.size()), pre_nms_top_n), 0);
196 |     for (size_t i = 0; i < std::min(int(bndboxes.size()), pre_nms_top_n); i++) {
197 |         if (suppressed[i] == 1) {
198 |             continue;
199 |         }
200 |         nms_pred.emplace_back(bndboxes[i]);
201 |         for (size_t j = i + 1; j < std::min(int(bndboxes.size()), pre_nms_top_n); j++) {
202 |             if (suppressed[j] == 1) {
203 |                 continue;
204 |             }
205 |             float sa = bndboxes[i].l * bndboxes[i].w;
206 |             float sb = bndboxes[j].l * bndboxes[j].w;
207 |             float s_overlap = box_overlap(bndboxes[i], bndboxes[j]);
208 |             float iou = s_overlap / fmaxf(sa + sb - s_overlap, ThresHold);
209 | 
210 |             if (iou >= nms_thresh) {
211 |                 suppressed[j] = 1;
212 |             }
213 |         }
214 |     }
215 |     return 0;
216 | }
217 | 


--------------------------------------------------------------------------------
/tao_object_dection/yolov4/specs/yolov4_416_coco14.txt:
--------------------------------------------------------------------------------
  1 | random_seed: 42
  2 | yolov4_config {
  3 | big_anchor_shape: "[(87.73, 65.44),(115.02, 177.14),(288.61, 296.34)]"
  4 | mid_anchor_shape: "[(20.78, 55.42),(41.95, 33.66),(43.95, 111.22)]"
  5 | small_anchor_shape: "[(5.84, 9.60),(10.24, 27.14),(21.27, 15.56)]"
  6 | box_matching_iou: 0.25
  7 | matching_neutral_box_iou: 0.5
  8 | arch: "cspdarknet"
  9 | nlayers: 53
 10 | arch_conv_blocks: 2
 11 | loss_loc_weight: 1.0
 12 | loss_neg_obj_weights: 1.0
 13 | loss_class_weights: 1.0
 14 |   label_smoothing: 0.0
 15 |   big_grid_xy_extend: 0.05
 16 |   mid_grid_xy_extend: 0.1
 17 |   small_grid_xy_extend: 0.2
 18 |   freeze_bn: false
 19 | }
 20 | 
 21 | training_config {
 22 |   batch_size_per_gpu: 8
 23 |   num_epochs: 300
 24 |   enable_qat: false
 25 |   checkpoint_interval: 1
 26 |   learning_rate {
 27 |   soft_start_cosine_annealing_schedule {
 28 |     min_learning_rate: 1e-5
 29 |     max_learning_rate: 0.000125   #0.00032625  #0.000435
 30 |     soft_start: 0.001
 31 |     }
 32 |   }
 33 |   regularizer {
 34 |     type: L2
 35 |     weight: 3e-5
 36 |   }
 37 |   optimizer {
 38 |     adam {
 39 |       epsilon: 1e-7
 40 |       beta1: 0.9
 41 |       beta2: 0.999
 42 |       amsgrad: false
 43 |     }
 44 |   }
 45 | 
 46 |   n_workers: 8
 47 |   use_multiprocessing: false
 48 |   pretrain_model_path: "/workspace_tz/tao_yolov4/cspdarknet_199.tlt"
 49 | }
 50 | eval_config {
 51 |   average_precision_mode: INTEGRATE
 52 |   batch_size: 8
 53 |   matching_iou_threshold: 0.5
 54 | }
 55 | nms_config {
 56 |   confidence_threshold: 0.001
 57 |   clustering_iou_threshold: 0.6
 58 |   top_k: 300
 59 |   force_on_cpu: True
 60 | }
 61 | augmentation_config {
 62 |   hue: 0.1
 63 |   saturation: 1.5
 64 |   exposure:1.5
 65 |   vertical_flip:0
 66 |   horizontal_flip: 0.5
 67 |   jitter: 0.3
 68 |   output_width: 416
 69 |   output_height: 416
 70 |   randomize_input_shape_period: 10
 71 |   output_channel: 3
 72 |   mosaic_prob: 0.5
 73 |   mosaic_min_ratio:0.2
 74 |   image_mean {
 75 |     key: 'b'
 76 |     value: 103.9
 77 |   }
 78 |   image_mean {
 79 |     key: 'g'
 80 |     value: 116.8
 81 |   }
 82 |   image_mean {
 83 |     key: 'r'
 84 |     value: 123.7
 85 |   }
 86 | }
 87 | dataset_config {
 88 |   data_sources: {
 89 |     label_directory_path: "/raid/KITTI/trainval2014"
 90 |     image_directory_path: "/raid/images/trainval2014"
 91 |   }
 92 |   target_class_mapping {
 93 |   key: "apple"
 94 |   value: "apple"
 95 |   }
 96 |   target_class_mapping {
 97 |   key: "book"
 98 |   value: "book"
 99 |   }
100 |   target_class_mapping {
101 |   key: "handbag"
102 |   value: "handbag"
103 |   }
104 |   target_class_mapping {
105 |   key: "car"
106 |   value: "car"
107 |   }
108 |   target_class_mapping {
109 |   key: "pottedplant"
110 |   value: "pottedplant"
111 |   }
112 |   target_class_mapping {
113 |   key: "backpack"
114 |   value: "backpack"
115 |   }
116 |   target_class_mapping {
117 |   key: "clock"
118 |   value: "clock"
119 |   }
120 |   target_class_mapping {
121 |   key: "truck"
122 |   value: "truck"
123 |   }
124 |   target_class_mapping {
125 |   key: "knife"
126 |   value: "knife"
127 |   }
128 |   target_class_mapping {
129 |   key: "cup"
130 |   value: "cup"
131 |   }
132 |   target_class_mapping {
133 |   key: "snowboard"
134 |   value: "snowboard"
135 |   }
136 |   target_class_mapping {
137 |   key: "suitcase"
138 |   value: "suitcase"
139 |   }
140 |   target_class_mapping {
141 |   key: "umbrella"
142 |   value: "umbrella"
143 |   }
144 |   target_class_mapping {
145 |   key: "bowl"
146 |   value: "bowl"
147 |   }
148 |   target_class_mapping {
149 |   key: "carrot"
150 |   value: "carrot"
151 |   }
152 |   target_class_mapping {
153 |   key: "person"
154 |   value: "person"
155 |   }
156 |   target_class_mapping {
157 |   key: "fork"
158 |   value: "fork"
159 |   }
160 |   target_class_mapping {
161 |   key: "train"
162 |   value: "train"
163 |   }
164 |   target_class_mapping {
165 |   key: "pizza"
166 |   value: "pizza"
167 |   }
168 |   target_class_mapping {
169 |   key: "couch"
170 |   value: "couch"
171 |   }
172 |   target_class_mapping {
173 |   key: "bus"
174 |   value: "bus"
175 |   }
176 |   target_class_mapping {
177 |   key: "skis"
178 |   value: "skis"
179 |   }
180 |   target_class_mapping {
181 |   key: "keyboard"
182 |   value: "keyboard"
183 |   }
184 |   target_class_mapping {
185 |   key: "firehydrant"
186 |   value: "firehydrant"
187 |   }
188 |   target_class_mapping {
189 |   key: "tennisracket"
190 |   value: "tennisracket"
191 |   }
192 |   target_class_mapping {
193 |   key: "sandwich"
194 |   value: "sandwich"
195 |   }
196 |   target_class_mapping {
197 |   key: "toothbrush"
198 |   value: "toothbrush"
199 |   }
200 |   target_class_mapping {
201 |   key: "motorcycle"
202 |   value: "motorcycle"
203 |   }
204 |   target_class_mapping {
205 |   key: "remote"
206 |   value: "remote"
207 |   }
208 |   target_class_mapping {
209 |   key: "frisbee"
210 |   value: "frisbee"
211 |   }
212 |   target_class_mapping {
213 |   key: "mouse"
214 |   value: "mouse"
215 |   }
216 |   target_class_mapping {
217 |   key: "trafficlight"
218 |   value: "trafficlight"
219 |   }
220 |   target_class_mapping {
221 |   key: "oven"
222 |   value: "oven"
223 |   }
224 |   target_class_mapping {
225 |   key: "scissors"
226 |   value: "scissors"
227 |   }
228 |   target_class_mapping {
229 |   key: "airplane"
230 |   value: "airplane"
231 |   }
232 |   target_class_mapping {
233 |   key: "teddybear"
234 |   value: "teddybear"
235 |   }
236 |   target_class_mapping {
237 |   key: "refrigerator"
238 |   value: "refrigerator"
239 |   }
240 |   target_class_mapping {
241 |   key: "stopsign"
242 |   value: "stopsign"
243 |   }
244 |   target_class_mapping {
245 |   key: "bed"
246 |   value: "bed"
247 |   }
248 |   target_class_mapping {
249 |   key: "orange"
250 |   value: "orange"
251 |   }
252 |   target_class_mapping {
253 |   key: "bottle"
254 |   value: "bottle"
255 |   }
256 |   target_class_mapping {
257 |   key: "sink"
258 |   value: "sink"
259 |   }
260 |   target_class_mapping {
261 |   key: "chair"
262 |   value: "chair"
263 |   }
264 |   target_class_mapping {
265 |   key: "broccoli"
266 |   value: "broccoli"
267 |   }
268 |   target_class_mapping {
269 |   key: "horse"
270 |   value: "horse"
271 |   }
272 |   target_class_mapping {
273 |   key: "elephant"
274 |   value: "elephant"
275 |   }
276 |   target_class_mapping {
277 |   key: "tie"
278 |   value: "tie"
279 |   }
280 |   target_class_mapping {
281 |   key: "banana"
282 |   value: "banana"
283 |   }
284 |   target_class_mapping {
285 |   key: "donut"
286 |   value: "donut"
287 |   }
288 |   target_class_mapping {
289 |   key: "baseballglove"
290 |   value: "baseballglove"
291 |   }
292 |   target_class_mapping {
293 |   key: "surfboard"
294 |   value: "surfboard"
295 |   }
296 |   target_class_mapping {
297 |   key: "hotdog"
298 |   value: "hotdog"
299 |   }
300 |   target_class_mapping {
301 |   key: "skateboard"
302 |   value: "skateboard"
303 |   }
304 |   target_class_mapping {
305 |   key: "zebra"
306 |   value: "zebra"
307 |   }
308 |   target_class_mapping {
309 |   key: "boat"
310 |   value: "boat"
311 |   }
312 |   target_class_mapping {
313 |   key: "vase"
314 |   value: "vase"
315 |   }
316 |   target_class_mapping {
317 |   key: "baseballbat"
318 |   value: "baseballbat"
319 |   }
320 |   target_class_mapping {
321 |   key: "hairdrier"
322 |   value: "hairdrier"
323 |   }
324 |   target_class_mapping {
325 |   key: "cake"
326 |   value: "cake"
327 |   }
328 |   target_class_mapping {
329 |   key: "diningtable"
330 |   value: "diningtable"
331 |   }
332 |   target_class_mapping {
333 |   key: "bicycle"
334 |   value: "bicycle"
335 |   }
336 |   target_class_mapping {
337 |   key: "laptop"
338 |   value: "laptop"
339 |   }
340 |   target_class_mapping {
341 |   key: "wineglass"
342 |   value: "wineglass"
343 |   }
344 |   target_class_mapping {
345 |   key: "bear"
346 |   value: "bear"
347 |   }
348 |   target_class_mapping {
349 |   key: "parkingmeter"
350 |   value: "parkingmeter"
351 |   }
352 |   target_class_mapping {
353 |   key: "tv"
354 |   value: "tv"
355 |   }
356 |   target_class_mapping {
357 |   key: "cat"
358 |   value: "cat"
359 |   }
360 |   target_class_mapping {
361 |   key: "bird"
362 |   value: "bird"
363 |   }
364 |   target_class_mapping {
365 |   key: "toilet"
366 |   value: "toilet"
367 |   }
368 |   target_class_mapping {
369 |   key: "sportsball"
370 |   value: "sportsball"
371 |   }
372 |   target_class_mapping {
373 |   key: "sheep"
374 |   value: "sheep"
375 |   }
376 |   target_class_mapping {
377 |   key: "microwave"
378 |   value: "microwave"
379 |   }
380 |   target_class_mapping {
381 |   key: "cow"
382 |   value: "cow"
383 |   }
384 |   target_class_mapping {
385 |   key: "bench"
386 |   value: "bench"
387 |   }
388 |   target_class_mapping {
389 |   key: "giraffe"
390 |   value: "giraffe"
391 |   }
392 |   target_class_mapping {
393 |   key: "spoon"
394 |   value: "spoon"
395 |   }
396 |   target_class_mapping {
397 |   key: "dog"
398 |   value: "dog"
399 |   }
400 |   target_class_mapping {
401 |   key: "toaster"
402 |   value: "toaster"
403 |   }
404 |   target_class_mapping {
405 |   key: "cellphone"
406 |   value: "cellphone"
407 |   }
408 |   target_class_mapping {
409 |   key: "kite"
410 |   value: "kite"
411 |   }
412 |   image_extension: "jpg"
413 |   validation_data_sources: {
414 |     label_directory_path: "/raid/KITTI/val_5k"
415 |     image_directory_path: "/raid/images/val_5k"
416 |   }
417 | }
418 | 


--------------------------------------------------------------------------------
/tao_pointpillars/tensorrt_sample/test/main.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  *
  5 |  * Licensed under the Apache License, Version 2.0 (the "License");
  6 |  * you may not use this file except in compliance with the License.
  7 |  * You may obtain a copy of the License at
  8 |  *
  9 |  * http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | #include <cassert>
 19 | #include <iostream>
 20 | #include <sstream>
 21 | #include <fstream>
 22 | #include <unistd.h>
 23 | #include <string>
 24 | #include "cuda_runtime.h"
 25 | #include "./pointpillar.h"
 26 | 
 27 | #include <boost/filesystem/convenience.hpp>
 28 | 
 29 | #define checkCudaErrors(status)                                   \
 30 | {                                                                 \
 31 |   if (status != 0)                                                \
 32 |   {                                                               \
 33 |     std::cout << "Cuda failure: " << cudaGetErrorString(status)   \
 34 |               << " at line " << __LINE__                          \
 35 |               << " in file " << __FILE__                          \
 36 |               << " error status: " << status                      \
 37 |               << std::endl;                                       \
 38 |               abort();                                            \
 39 |     }                                                             \
 40 | }
 41 | 
 42 | int loadData(const char *file, void **data, unsigned int *length)
 43 | {
 44 |   std::fstream dataFile(file, std::ifstream::in);
 45 | 
 46 |   if (!dataFile.is_open())
 47 |   {
 48 |     std::cout << "Can't open files: "<< file<<std::endl;
 49 |     return -1;
 50 |   }
 51 | 
 52 |   //get length of file:
 53 |   unsigned int len = 0;
 54 |   dataFile.seekg (0, dataFile.end);
 55 |   len = dataFile.tellg();
 56 |   dataFile.seekg (0, dataFile.beg);
 57 | 
 58 |   //allocate memory:
 59 |   char *buffer = new char[len];
 60 |   if(buffer==NULL) {
 61 |     std::cout << "Can't malloc buffer."<<std::endl;
 62 |     dataFile.close();
 63 |     exit(-1);
 64 |   }
 65 | 
 66 |   //read data as a block:
 67 |   dataFile.read(buffer, len);
 68 |   dataFile.close();
 69 | 
 70 |   *data = (void*)buffer;
 71 |   *length = len;
 72 |   return 0;  
 73 | }
 74 | 
 75 | void split_str(
 76 |     const char* s,
 77 |     std::vector<std::string>& ret,  // NOLINT(runtime/references)
 78 |     char del = ',') {
 79 |     int idx = 0;
 80 |     auto p = std::string(s + idx).find(std::string(1, del));
 81 |     while (std::string::npos != p) {
 82 |         auto s_tmp = std::string(s + idx).substr(0, p);
 83 |         ret.push_back(s_tmp);
 84 |         idx += (p + 1);
 85 |         p = std::string(s + idx).find(std::string(1, del));
 86 |     }
 87 |     if (s[idx] != 0) {
 88 |         ret.push_back(std::string(s + idx));
 89 |     }
 90 | }
 91 | 
 92 | void parse_args(
 93 |   int argc, char**argv,
 94 |   std::vector<std::string>& class_names,
 95 |   float& nms_iou_thresh,
 96 |   int& pre_nms_top_n,
 97 |   bool& do_profile,
 98 |   std::string& model_path,
 99 |   std::string& engine_path,
100 |   std::string& data_path,
101 |   std::string& data_type,
102 |   std::string& output_path
103 |   ) {
104 |     int c;
105 |     while ((c = getopt(argc, argv, "c:n:t:m:l:d:e:o:ph")) != -1) {
106 |         switch (c) {
107 |             case 't':
108 |                 {
109 |                     nms_iou_thresh = atof(optarg);
110 |                     break;
111 |                 }
112 |             case 'n':
113 |                 {
114 |                     pre_nms_top_n = atoi(optarg);
115 |                     break;
116 |                 }
117 |             case 'c':
118 |                 {
119 |                     split_str(optarg, class_names);
120 |                     break;
121 |                 }
122 |             case 'm':
123 |                 {
124 |                     model_path = std::string(optarg);
125 |                     break;
126 |                 }
127 |             case 'e':
128 |                 {
129 |                     engine_path = std::string(optarg);
130 |                     break;
131 |                 }
132 |             case 'l':
133 |                 {
134 |                     data_path = std::string(optarg);
135 |                     break;
136 |                 }
137 |             case 'o':
138 |                 {
139 |                     output_path = std::string(optarg);
140 |                     break;
141 |                 }
142 |             case 'd':
143 |                 {
144 |                     data_type = std::string(optarg);
145 |                     break;
146 |                 }
147 |             case 'p':
148 |                 {
149 |                     do_profile = true;
150 |                     break;
151 |                 }
152 |             case 'h':
153 |                 {
154 |                   std::cout << "Usage: " << std::endl;
155 |                   std::cout << argv[0] << " -t <nms_iou_thresh>" <<
156 |                    " -c <class_names> -n <pre_nms_top_n>" <<
157 |                    " -l <LIDAR_data_path> -m <model_path>" <<
158 |                    " -e <engine_path> -d <data_type> -o <output_path> -p -h" <<
159 |                    std::endl;
160 |                   exit(1);
161 |                 }
162 |             default:
163 |                 {
164 |                     std::cerr << "Unrecognized argument" << std::endl;
165 |                     abort();
166 |                 }
167 |         }
168 |     }
169 | }
170 | 
171 | std::vector<std::string> class_names;
172 | float nms_iou_thresh;
173 | int pre_nms_top_n;
174 | bool do_profile{false};
175 | std::string model_path;
176 | std::string engine_path;
177 | std::string data_path;
178 | std::string data_type{"fp32"};
179 | std::string output_path;
180 | 
181 | 
182 | void SaveBoxPred(std::vector<Bndbox> boxes, std::string file_name)
183 | {
184 |     std::ofstream ofs;
185 |     ofs.open(file_name, std::ios::out);
186 |     if (ofs.is_open()) {
187 |         for (const auto box : boxes) {
188 |           ofs << box.x << " ";
189 |           ofs << box.y << " ";
190 |           ofs << box.z << " ";
191 |           ofs << box.w << " ";
192 |           ofs << box.l << " ";
193 |           ofs << box.h << " ";
194 |           ofs << box.rt << " ";
195 |           ofs << box.id << " ";
196 |           ofs << box.score << " ";
197 |           ofs << "\n";
198 |         }
199 |     }
200 |     else {
201 |       std::cerr << "Output file cannot be opened!" << std::endl;
202 |     }
203 |     ofs.close();
204 |     std::cout << "Saved prediction in: " << file_name << std::endl;
205 |     return;
206 | };
207 | 
208 | 
209 | int main(int argc, char **argv)
210 | {
211 |   parse_args(
212 |     argc, argv,
213 |     class_names,
214 |     nms_iou_thresh,
215 |     pre_nms_top_n,
216 |     do_profile,
217 |     model_path,
218 |     engine_path,
219 |     data_path,
220 |     data_type,
221 |     output_path
222 |   );
223 |   assert(data_type == "fp32" || data_type == "fp16");
224 |   std::cout << "Loading Data: " << data_path << std::endl;
225 |   cudaEvent_t start, stop;
226 |   float elapsedTime = 0.0f;
227 |   cudaStream_t stream = NULL;
228 | 
229 |   checkCudaErrors(cudaEventCreate(&start));
230 |   checkCudaErrors(cudaEventCreate(&stop));
231 |   checkCudaErrors(cudaStreamCreate(&stream));
232 | 
233 |   std::vector<Bndbox> nms_pred;
234 |   nms_pred.reserve(100);
235 | 
236 |   PointPillar pointpillar(model_path, engine_path, stream, data_type);
237 | 
238 |   
239 |     std::string dataFile = data_path;
240 |     //load points cloud
241 |     unsigned int length = 0;
242 |     void *data = NULL;
243 |     std::shared_ptr<char> buffer((char *)data, std::default_delete<char[]>());
244 |     loadData(dataFile.data(), &data, &length);
245 |     buffer.reset((char *)data);
246 | 
247 |     float* points = (float*)buffer.get();
248 |     unsigned int num_point_values = pointpillar.getPointSize();
249 |     unsigned int points_size = length/sizeof(float)/num_point_values;
250 | 
251 |     float *points_data = nullptr;
252 |     unsigned int *points_num = nullptr;
253 |     unsigned int points_data_size = points_size * num_point_values * sizeof(float);
254 |     
255 |     checkCudaErrors(cudaMallocManaged((void **)&points_data, points_data_size));
256 |     checkCudaErrors(cudaMallocManaged((void **)&points_num, sizeof(unsigned int)));
257 |     checkCudaErrors(cudaMemcpy(points_data, points, points_data_size, cudaMemcpyDefault));
258 |     checkCudaErrors(cudaMemcpy(points_num, &points_size, sizeof(unsigned int), cudaMemcpyDefault));
259 |     checkCudaErrors(cudaDeviceSynchronize());
260 | 
261 |     cudaEventRecord(start, stream);
262 | 
263 |     pointpillar.doinfer(
264 |       points_data, points_num, nms_pred,
265 |       nms_iou_thresh,
266 |       pre_nms_top_n,
267 |       class_names,
268 |       do_profile
269 |     );
270 |     cudaEventRecord(stop, stream);
271 |     cudaEventSynchronize(stop);
272 |     cudaEventElapsedTime(&elapsedTime, start, stop);
273 |     std::cout<<"TIME: pointpillar: "<< elapsedTime <<" ms." <<std::endl;
274 | 
275 |     checkCudaErrors(cudaFree(points_data));
276 |     checkCudaErrors(cudaFree(points_num));
277 |     std::cout<<"Bndbox objs: "<< nms_pred.size()<<std::endl;
278 |     
279 |     
280 |     std::string bin_file_name = data_path.substr(0, data_path.find_last_of('.'));
281 |     std::string save_file_name = output_path + bin_file_name.substr(bin_file_name.find_last_of('/') + 1) + ".txt";
282 | 
283 |     SaveBoxPred(nms_pred, save_file_name);
284 |     nms_pred.clear();
285 |     std::cout << ">>>>>>>>>>>" <<std::endl;
286 |   
287 | 
288 |   checkCudaErrors(cudaEventDestroy(start));
289 |   checkCudaErrors(cudaEventDestroy(stop));
290 |   checkCudaErrors(cudaStreamDestroy(stream));
291 | 
292 |   return 0;
293 |   }
294 | 
295 | 


--------------------------------------------------------------------------------
/tao_pointpillars/tensorrt_sample/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/tao_pointpillars/tensorrt_sample/src/pointpillar.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  *
  5 |  * Licensed under the Apache License, Version 2.0 (the "License");
  6 |  * you may not use this file except in compliance with the License.
  7 |  * You may obtain a copy of the License at
  8 |  *
  9 |  * http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | #include <iostream>
 19 | #include <fstream>
 20 | #include <vector>
 21 | #include <iomanip>
 22 | #include<map>
 23 | #include<algorithm>
 24 | #include "cuda_runtime.h"
 25 | #include "NvInfer.h"
 26 | #include "NvOnnxConfig.h"
 27 | #include "NvOnnxParser.h"
 28 | #include "NvInferRuntime.h"
 29 | #include "NvInferPlugin.h"
 30 | #include "pointpillar.h"
 31 | 
 32 | #define checkCudaErrors(status)                                   \
 33 | {                                                                 \
 34 |   if (status != 0)                                                \
 35 |   {                                                               \
 36 |     std::cout << "Cuda failure: " << cudaGetErrorString(status)   \
 37 |               << " at line " << __LINE__                          \
 38 |               << " in file " << __FILE__                          \
 39 |               << " error status: " << status                      \
 40 |               << std::endl;                                       \
 41 |               abort();                                            \
 42 |     }                                                             \
 43 | }
 44 | 
 45 | 
 46 | struct SimpleProfiler : public nvinfer1::IProfiler
 47 | {
 48 |     struct Record
 49 |     {
 50 |         float time{0};
 51 |         int count{0};
 52 |     };
 53 | 
 54 |     virtual void reportLayerTime(const char* layerName, float ms) noexcept
 55 |     {
 56 |         mProfile[layerName].count++;
 57 |         mProfile[layerName].time += ms;
 58 |         if (std::find(mLayerNames.begin(), mLayerNames.end(), layerName) == mLayerNames.end())
 59 |         {
 60 |             mLayerNames.push_back(layerName);
 61 |         }
 62 |     }
 63 | 
 64 |     SimpleProfiler(const char* name, const std::vector<SimpleProfiler>& srcProfilers = std::vector<SimpleProfiler>())
 65 |         : mName(name)
 66 |     {
 67 |         for (const auto& srcProfiler : srcProfilers)
 68 |         {
 69 |             for (const auto& rec : srcProfiler.mProfile)
 70 |             {
 71 |                 auto it = mProfile.find(rec.first);
 72 |                 if (it == mProfile.end())
 73 |                 {
 74 |                     mProfile.insert(rec);
 75 |                 }
 76 |                 else
 77 |                 {
 78 |                     it->second.time += rec.second.time;
 79 |                     it->second.count += rec.second.count;
 80 |                 }
 81 |             }
 82 |         }
 83 |     }
 84 | 
 85 |     friend std::ostream& operator<<(std::ostream& out, const SimpleProfiler& value)
 86 |     {
 87 |         out << "========== " << value.mName << " profile ==========" << std::endl;
 88 |         float totalTime = 0;
 89 |         std::string layerNameStr = "TensorRT layer name";
 90 |         int maxLayerNameLength = std::max(static_cast<int>(layerNameStr.size()), 70);
 91 |         for (const auto& elem : value.mProfile)
 92 |         {
 93 |             totalTime += elem.second.time;
 94 |             maxLayerNameLength = std::max(maxLayerNameLength, static_cast<int>(elem.first.size()));
 95 |         }
 96 | 
 97 |         auto old_settings = out.flags();
 98 |         auto old_precision = out.precision();
 99 |         // Output header
100 |         {
101 |             out << std::setw(maxLayerNameLength) << layerNameStr << " ";
102 |             out << std::setw(12) << "Runtime, "
103 |                 << "%"
104 |                 << " ";
105 |             out << std::setw(12) << "Invocations"
106 |                 << " ";
107 |             out << std::setw(12) << "Runtime, ms" << std::endl;
108 |         }
109 |         for (size_t i = 0; i < value.mLayerNames.size(); i++)
110 |         {
111 |             const std::string layerName = value.mLayerNames[i];
112 |             auto elem = value.mProfile.at(layerName);
113 |             out << std::setw(maxLayerNameLength) << layerName << " ";
114 |             out << std::setw(12) << std::fixed << std::setprecision(1) << (elem.time * 100.0F / totalTime) << "%"
115 |                 << " ";
116 |             out << std::setw(12) << elem.count << " ";
117 |             out << std::setw(12) << std::fixed << std::setprecision(2) << elem.time << std::endl;
118 |         }
119 |         out.flags(old_settings);
120 |         out.precision(old_precision);
121 |         out << "========== " << value.mName << " total runtime = " << totalTime << " ms ==========" << std::endl;
122 | 
123 |         return out;
124 |     }
125 | 
126 | private:
127 |     std::string mName;
128 |     std::vector<std::string> mLayerNames;
129 |     std::map<std::string, Record> mProfile;
130 | };
131 | 
132 | 
133 | TRT::~TRT(void)
134 | {
135 |   context->destroy();
136 |   engine->destroy();
137 |   checkCudaErrors(cudaEventDestroy(start));
138 |   checkCudaErrors(cudaEventDestroy(stop));
139 | }
140 | 
141 | TRT::TRT(
142 |   std::string modelFile,
143 |   std::string modelCache,
144 |   cudaStream_t stream,
145 |   const std::string& data_type
146 | ):stream_(stream)
147 | {
148 |   initLibNvInferPlugins(&gLogger_, "");
149 |   std::fstream trtCache(modelCache, std::ifstream::in);
150 |   checkCudaErrors(cudaEventCreate(&start));
151 |   checkCudaErrors(cudaEventCreate(&stop));
152 |   if (!trtCache.is_open())
153 |   {
154 |     std::cout << "Loading Model: " << modelFile << std::endl;
155 |     std::cout << "Building TRT engine from the model."<<std::endl;
156 |     // define builder
157 |     auto builder = (nvinfer1::createInferBuilder(gLogger_));
158 | 
159 |     // define network
160 |     const auto explicitBatch = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
161 |     auto network = (builder->createNetworkV2(explicitBatch));
162 | 
163 |     // define onnxparser
164 |     auto parser = (nvonnxparser::createParser(*network, gLogger_));
165 |     if (!parser->parseFromFile(modelFile.data(), static_cast<int>(nvinfer1::ILogger::Severity::kWARNING)))
166 |     {
167 |         std::cerr << ": Failed to parse onnx model file, please check the onnx version and trt support op!"
168 |                   << std::endl;
169 |         exit(-1);
170 |     }
171 |     // dynamic shape
172 |     nvinfer1::IOptimizationProfile* profile = builder->createOptimizationProfile();
173 |     // define config
174 |     auto networkConfig = builder->createBuilderConfig();
175 |     if(data_type == "fp16") {
176 |         networkConfig->setFlag(nvinfer1::BuilderFlag::kFP16);
177 |         std::cout << "Enabled FP16 data type!" << std::endl;
178 |     }
179 |     nvinfer1::Dims dims{};
180 |     dims.nbDims = 3;
181 |     dims.d[0] = 1;
182 |     auto input0_dims = network->getInput(0)->getDimensions();
183 |     dims.d[1] = input0_dims.d[1];
184 |     dims.d[2] = 4;
185 |     profile->setDimensions("points", nvinfer1::OptProfileSelector::kMIN, dims);
186 |     profile->setDimensions("points", nvinfer1::OptProfileSelector::kOPT, dims);
187 |     profile->setDimensions("points", nvinfer1::OptProfileSelector::kMAX, dims);
188 |     dims.nbDims = 1;
189 |     dims.d[0] = 1;
190 |     profile->setDimensions("num_points", nvinfer1::OptProfileSelector::kMIN, dims);
191 |     profile->setDimensions("num_points", nvinfer1::OptProfileSelector::kOPT, dims);
192 |     profile->setDimensions("num_points", nvinfer1::OptProfileSelector::kMAX, dims);
193 |     networkConfig->addOptimizationProfile(profile);
194 |     // set max workspace
195 |     networkConfig->setMaxWorkspaceSize(size_t(1) << 30);
196 | 
197 |     engine = (builder->buildEngineWithConfig(*network, *networkConfig));
198 | 
199 |     if (engine == nullptr)
200 |     {
201 |       std::cerr << ": engine init null!" << std::endl;
202 |       exit(-1);
203 |     }
204 | 
205 |     // serialize the engine, then close everything down
206 |     auto trtModelStream = (engine->serialize());
207 |     std::string modelCacheSave = modelFile + ".cache";
208 |     std::fstream trtOut(modelCacheSave, std::ifstream::out);
209 |     if (!trtOut.is_open())
210 |     {
211 |        std::cout << "Can't store trt cache.\n";
212 |        exit(-1);
213 |     }
214 |     trtOut.write((char*)trtModelStream->data(), trtModelStream->size());
215 |     trtOut.close();
216 |     trtModelStream->destroy();
217 | 
218 |     networkConfig->destroy();
219 |     parser->destroy();
220 |     network->destroy();
221 |     builder->destroy();
222 |   } else {
223 |     std::cout << "Loading existing TRT Engine: "
224 |               << modelCache
225 |               << std::endl;
226 |     char *data;
227 |     unsigned int length;
228 |     // get length of file:
229 |     trtCache.seekg(0, trtCache.end);
230 |     length = trtCache.tellg();
231 |     trtCache.seekg(0, trtCache.beg);
232 |     data = (char *)malloc(length);
233 |     if (data == NULL ) {
234 |        std::cout << "Can't malloc data.\n";
235 |        exit(-1);
236 |     }
237 |     trtCache.read(data, length);
238 |     // create context
239 |     auto runtime = nvinfer1::createInferRuntime(gLogger_);
240 |     if (runtime == nullptr) {
241 |         std::cerr << ": runtime null!" << std::endl;
242 |         exit(-1);
243 |     }
244 |     engine = (runtime->deserializeCudaEngine(data, length, 0));
245 |     if (engine == nullptr) {
246 |         std::cerr << ": engine null!" << std::endl;
247 |         exit(-1);
248 |     }
249 |     free(data);
250 |     trtCache.close();
251 |   }
252 | 
253 |   context = engine->createExecutionContext();
254 | 
255 | }
256 | 
257 | int TRT::doinfer(void**buffers, bool do_profile)
258 | {
259 |   int status;
260 |   SimpleProfiler profiler("perf");
261 |   if(do_profile)
262 |       context->setProfiler(&profiler);
263 |   status = context->enqueueV2(buffers, stream_, &start);
264 |   if(do_profile)
265 |       std::cout << profiler;
266 |   if (!status)
267 |   {
268 |       return false;
269 |   }
270 |   return true;
271 | }
272 | 
273 | nvinfer1::Dims TRT::get_binding_shape(int index)
274 | {
275 |   return context->getBindingDimensions(index);
276 | }
277 | 
278 | int TRT::getPointSize() {
279 |     return context->getBindingDimensions(0).d[2];
280 | }
281 | 
282 | PointPillar::PointPillar(
283 |   std::string modelFile,
284 |   std::string engineFile,
285 |   cudaStream_t stream,
286 |   const std::string& data_type
287 | ):stream_(stream)
288 | {
289 | 
290 |   checkCudaErrors(cudaEventCreate(&start));
291 |   checkCudaErrors(cudaEventCreate(&stop));
292 | 
293 |   trt_.reset(new TRT(modelFile, engineFile, stream_, data_type));
294 | 
295 |   //output of TRT
296 |   box_size = (trt_->get_binding_shape(2).d[1]) * 9 * sizeof(float);
297 |   checkCudaErrors(cudaMallocManaged((void **)&box_output, box_size));
298 |   checkCudaErrors(cudaMallocManaged((void **)&box_num, sizeof(int)));
299 |   res.reserve(100);
300 | }
301 | 
302 | PointPillar::~PointPillar(void)
303 | {
304 |   trt_.reset();
305 | 
306 |   checkCudaErrors(cudaFree(box_output));
307 |   checkCudaErrors(cudaFree(box_num));
308 |   checkCudaErrors(cudaEventDestroy(start));
309 |   checkCudaErrors(cudaEventDestroy(stop));
310 | }
311 | 
312 | int PointPillar::getPointSize() {
313 |   return trt_->getPointSize();
314 | }
315 | 
316 | int PointPillar::doinfer(
317 |   void*points_data,
318 |   unsigned int* points_size,
319 |   std::vector<Bndbox> &nms_pred,
320 |   float nms_iou_thresh,
321 |   int pre_nms_top_n,
322 |   std::vector<std::string>& class_names,
323 |   bool do_profile
324 | )
325 | {
326 | #if PERFORMANCE_LOG
327 |   float doinferTime = 0.0f;
328 |   cudaEventRecord(start, stream_);
329 | #endif
330 |   void *buffers[] = {points_data, points_size, box_output, box_num};
331 | 
332 |   trt_->doinfer(buffers, do_profile);
333 | 
334 | #if PERFORMANCE_LOG
335 |   checkCudaErrors(cudaEventRecord(stop, stream_));
336 |   checkCudaErrors(cudaEventSynchronize(stop));
337 |   checkCudaErrors(cudaEventElapsedTime(&doinferTime, start, stop));
338 |   std::cout<<"TIME: doinfer: "<< doinferTime <<" ms." <<std::endl;
339 | #endif
340 |   cudaDeviceSynchronize();
341 |   int num_obj = box_num[0];
342 |   for (int i = 0; i < num_obj; i++) {
343 |     auto Bb = Bndbox(
344 |       box_output[i * 9],
345 |       box_output[i * 9 + 1],
346 |       box_output[i * 9 + 2],
347 |       box_output[i * 9 + 3],
348 |       box_output[i * 9 + 4],
349 |       box_output[i * 9 + 5],
350 |       box_output[i * 9 + 6],
351 |       box_output[i * 9 + 7],
352 |       box_output[i * 9 + 8]
353 |     );
354 |     res.push_back(Bb);
355 |   }
356 |   nms_cpu(res, nms_iou_thresh, nms_pred, pre_nms_top_n);
357 |   for(int i=0; i<nms_pred.size(); i++) {
358 |     printf("%s, %f, %f, %f, %f, %f, %f, %f, %f\n",
359 |       class_names[nms_pred[i].id].c_str(), nms_pred[i].x,
360 |       nms_pred[i].y, nms_pred[i].z, nms_pred[i].l, nms_pred[i].w,
361 |       nms_pred[i].h, nms_pred[i].rt, nms_pred[i].score);
362 |   }
363 |   res.clear();
364 | return 0;
365 | }
366 | 
367 | 


--------------------------------------------------------------------------------
/tao_action_recognition/data_generation/jetson_of/vpi/main.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
  3 | *
  4 | * Redistribution and use in source and binary forms, with or without
  5 | * modification, are permitted provided that the following conditions
  6 | * are met:
  7 | *  * Redistributions of source code must retain the above copyright
  8 | *    notice, this list of conditions and the following disclaimer.
  9 | *  * Redistributions in binary form must reproduce the above copyright
 10 | *    notice, this list of conditions and the following disclaimer in the
 11 | *    documentation and/or other materials provided with the distribution.
 12 | *  * Neither the name of NVIDIA CORPORATION nor the names of its
 13 | *    contributors may be used to endorse or promote products derived
 14 | *    from this software without specific prior written permission.
 15 | *
 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 17 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 19 | * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 20 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 21 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 22 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 23 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 24 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | 
 29 | #include <opencv2/core/version.hpp>
 30 | #if CV_MAJOR_VERSION >= 3
 31 | #    include <opencv2/imgcodecs.hpp>
 32 | #    include <opencv2/videoio.hpp>
 33 | #else
 34 | #    include <opencv2/highgui/highgui.hpp>
 35 | #endif
 36 | 
 37 | #include <opencv2/imgproc/imgproc.hpp>
 38 | #include <vpi/OpenCVInterop.hpp>
 39 | 
 40 | #include <vpi/Array.h>
 41 | #include <vpi/Image.h>
 42 | #include <vpi/ImageFormat.h>
 43 | #include <vpi/Pyramid.h>
 44 | #include <vpi/Status.h>
 45 | #include <vpi/Stream.h>
 46 | #include <vpi/algo/ConvertImageFormat.h>
 47 | #include <vpi/algo/OpticalFlowDense.h>
 48 | 
 49 | #include <iostream>
 50 | #include <iomanip>
 51 | #include <sstream>
 52 | #include <fstream>
 53 | 
 54 | #include <sys/types.h>
 55 | #include <sys/stat.h>
 56 | #include <unistd.h>
 57 | #include <dirent.h>
 58 | #include <regex>
 59 | 
 60 | #define TAG_STRING "PIEH"    // use this when WRITING the file
 61 | 
 62 | #define CHECK_STATUS(STMT)                                    \
 63 |     do                                                        \
 64 |     {                                                         \
 65 |         VPIStatus status = (STMT);                            \
 66 |         if (status != VPI_SUCCESS)                            \
 67 |         {                                                     \
 68 |             char buffer[VPI_MAX_STATUS_MESSAGE_LENGTH];       \
 69 |             vpiGetLastStatusMessage(buffer, sizeof(buffer));  \
 70 |             std::ostringstream ss;                            \
 71 |             ss << vpiStatusGetName(status) << ": " << buffer; \
 72 |             throw std::runtime_error(ss.str());               \
 73 |         }                                                     \
 74 |     } while (0);
 75 | 
 76 | static std::string generateRegexPattern(const std::string& imageNamePattern)
 77 | {
 78 |     std::string regex_pat;
 79 |     std::string image;
 80 |     std::string temp;
 81 | 
 82 |     for (auto it = imageNamePattern.cbegin(); it != imageNamePattern.cend(); ++it)
 83 |     {
 84 |         if (*it == '*')
 85 |         {
 86 |             image.append(".*");
 87 |         }
 88 |         else if (*it == '?')
 89 |         {
 90 |             image.append(".");
 91 |         }
 92 |         else
 93 |         {
 94 |             image.append(1, *it);
 95 |         }
 96 |     }
 97 | 
 98 |     size_t pos = image.find_first_of("%");
 99 |     if (pos != std::string::npos)
100 |     {
101 |         if (pos > 0)
102 |         {
103 |             regex_pat.append(image.substr(0, pos));
104 |         }
105 |         temp = image.substr(pos + 1);
106 |         pos = temp.find_first_of("d");
107 |         if (pos != std::string::npos)
108 |         {
109 |             if (pos > 0)
110 |             {
111 |                 auto nd = atoi(temp.substr(0, pos).c_str());
112 |                 std::ostringstream ss;
113 |                 ss << "([0-9]){" << nd << ",}";
114 |                 regex_pat.append(ss.str());
115 |             }
116 |             else
117 |             {
118 |                 regex_pat.append("([0 - 9]){1,}");
119 |             }
120 |             regex_pat.append(temp.substr(pos + 1));
121 |         }
122 |     }
123 |     else
124 |     {
125 |         regex_pat.append(image);
126 |     }
127 |     return regex_pat;
128 | }
129 | 
130 | static std::vector<std::pair<std::string, std::string>> ReadDirectory(const std::string& path)
131 | {
132 |     std::vector<std::pair<std::string, std::string>> files;
133 |     DIR* d;
134 |     struct dirent* dir;
135 |     d = opendir(path.c_str());
136 |     if (d)
137 |     {
138 |         while ((dir = readdir(d)) != NULL)
139 |         {
140 |             const char* name = dir->d_name;
141 |             if ((name[0] == 0) ||
142 |                 (name[0] == '.' && name[1] == 0) ||
143 |                 (name[0] == '.' && name[1] == '.' && name[2] == 0))
144 |                 continue;
145 | 
146 |             struct stat buf;
147 |             if ((stat(name, &buf) == 0) &&
148 |                 S_ISDIR(buf.st_mode))
149 |                 continue;
150 | 
151 |             files.push_back(std::make_pair(path + "/" + std::string(name), std::string(name)));
152 |         }
153 | 
154 |         closedir(d);
155 |     }
156 | 
157 |     return files;
158 | }
159 | 
160 | static void glob(const std::string& image, std::vector<std::string>& result)
161 | {
162 |     const char dir_separators[] = "/\\";
163 |     std::string wildchart;
164 |     std::string path;
165 |     size_t pos = image.find_last_of(dir_separators);
166 |     if (pos == std::string::npos)
167 |     {
168 |         wildchart = image;
169 |         path = ".";
170 |     }
171 |     else
172 |     {
173 |         path = image.substr(0, pos);
174 |         wildchart = image.substr(pos + 1);
175 |     }
176 |     std::string regex_str = generateRegexPattern(wildchart);
177 |     std::regex regex_pat{ regex_str };
178 | #ifndef NDEBUG
179 |     std::cout << "Input file directory path : " << path << std::endl;
180 |     std::cout << "Input file pattern : " << wildchart << std::endl;
181 | #endif
182 |     std::vector<std::pair<std::string, std::string>> fileNames = ReadDirectory(path);
183 |     for (const auto & p : fileNames)
184 |     {
185 |         if (!p.first.empty() && !p.second.empty())
186 |         {
187 |             auto fileName = p.second;
188 |             if (!wildchart.empty())
189 |             {
190 |                 if (regex_match(fileName, regex_pat))
191 |                 {
192 |                     result.push_back(p.first);
193 |                 }
194 |             }
195 |         }
196 |     }
197 | 
198 |     if (!result.empty())
199 |     {
200 |         std::sort(result.begin(), result.end());
201 |     }
202 | }
203 | 
204 | static void ProcessMotionVector(VPIImage mvImg, cv::Mat &outputImage)
205 | {
206 |     // Lock the input image to access it from CPU
207 |     VPIImageData mvData;
208 |     CHECK_STATUS(vpiImageLock(mvImg, VPI_LOCK_READ, &mvData));
209 | 
210 |     // Create a cv::Mat that points to the input image data
211 |     cv::Mat mvImage;
212 |     CHECK_STATUS(vpiImageDataExportOpenCVMat(mvData, &mvImage));
213 | 
214 |     // Convert S10.5 format to float
215 |     cv::Mat flow(mvImage.size(), CV_32FC2);
216 |     mvImage.convertTo(flow, CV_32F, 1.0f / (1 << 5));
217 | 
218 |     // Image not needed anymore, we can unlock it.
219 |     CHECK_STATUS(vpiImageUnlock(mvImg));
220 | 
221 |     outputImage = flow;
222 | 
223 | }
224 | 
225 | static void WriteFlowVectors(const std::string& outputFilePattern,
226 |                              const int frameIdx,
227 |                              const cv::Mat& outputImage,
228 |                              const int mvWidth,
229 |                              const int mvHeight)
230 | {
231 |     std::ostringstream fileName;
232 |     fileName << outputFilePattern << "_";
233 |     fileName << std::setw(5) << std::setfill('0') << frameIdx << std::string("_middlebury.flo") ;
234 | 
235 |     std::ofstream fpOut(fileName.str(), std::ios::out | std::ios::binary);
236 | 
237 |     fpOut << TAG_STRING;
238 | 
239 |     fpOut.write((char*)(&mvWidth), sizeof(uint32_t));
240 |     fpOut.write((char*)(&mvHeight), sizeof(uint32_t));
241 |     fpOut.write((char*)outputImage.data, sizeof(float) * mvWidth * mvHeight * 2);
242 |     fpOut.close();
243 | }
244 | 
245 | int main(int argc, char *argv[])
246 | {
247 |     // OpenCV image that will be wrapped by a VPIImage.
248 |     // Define it here so that it's destroyed *after* wrapper is destroyed
249 |     cv::Mat cvPrevFrame, cvCurFrame;
250 | 
251 |     // VPI objects that will be used
252 |     VPIStream stream         = NULL;
253 |     VPIImage imgPrevFramePL  = NULL;
254 |     VPIImage imgPrevFrameTmp = NULL;
255 |     VPIImage imgPrevFrameBL  = NULL;
256 |     VPIImage imgCurFramePL   = NULL;
257 |     VPIImage imgCurFrameTmp  = NULL;
258 |     VPIImage imgCurFrameBL   = NULL;
259 |     VPIImage imgMotionVecBL  = NULL;
260 |     VPIPayload payload       = NULL;
261 | 
262 |     int retval = 0;
263 | 
264 |     try
265 |     {
266 |         if (argc != 4)
267 |         {
268 |             std::cout<<argc;
269 |             throw std::runtime_error(std::string("Usage: ") + argv[0] + " <input_files_pattern> <output_files> <low|medium|high>");
270 |         }
271 | 
272 |         // Parse input parameters
273 |         std::string strInputFilesPattern = argv[1];
274 |         std::string strOuputFilesPattern = argv[2];
275 |         std::string strQuality    = argv[3];
276 | 
277 |         VPIOpticalFlowQuality quality;
278 |         if (strQuality == "low")
279 |         {
280 |             quality = VPI_OPTICAL_FLOW_QUALITY_LOW;
281 |         }
282 |         else if (strQuality == "medium")
283 |         {
284 |             quality = VPI_OPTICAL_FLOW_QUALITY_MEDIUM;
285 |         }
286 |         else if (strQuality == "high")
287 |         {
288 |             quality = VPI_OPTICAL_FLOW_QUALITY_HIGH;
289 |         }
290 |         else
291 |         {
292 |             throw std::runtime_error("Unknown quality provided");
293 |         }
294 | 
295 |         VPIBackend backend;
296 |             backend = VPI_BACKEND_NVENC;
297 |         // Load the files list
298 |         std::vector<std::string> inputFilesList;
299 |         glob(strInputFilesPattern, inputFilesList);
300 | 
301 |         // Create the stream where processing will happen. We'll use user-provided backend
302 |         // for Optical Flow, and CUDA/VIC for image format conversions.
303 |         CHECK_STATUS(vpiStreamCreate(backend | VPI_BACKEND_CUDA | VPI_BACKEND_VIC, &stream));
304 | 
305 |         cvPrevFrame = cv::imread(inputFilesList[0]);
306 | 
307 |         // Create the previous and current frame wrapper using the first frame. This wrapper will
308 |         // be set to point to every new frame in the main loop.
309 |         CHECK_STATUS(vpiImageCreateOpenCVMatWrapper(cvPrevFrame, 0, &imgPrevFramePL));
310 |         CHECK_STATUS(vpiImageCreateOpenCVMatWrapper(cvPrevFrame, 0, &imgCurFramePL));
311 | 
312 |         // Define the image formats we'll use throughout this sample.
313 |         VPIImageFormat imgFmt   = VPI_IMAGE_FORMAT_NV12_ER;
314 |         VPIImageFormat imgFmtBL = VPI_IMAGE_FORMAT_NV12_ER_BL;
315 | 
316 |         int32_t width  = cvPrevFrame.cols;
317 |         int32_t height = cvPrevFrame.rows;
318 | 
319 |         // Create Dense Optical Flow payload to be executed on the given backend
320 |         CHECK_STATUS(vpiCreateOpticalFlowDense(backend, width, height, imgFmtBL, quality, &payload));
321 | 
322 |         // The Dense Optical Flow on NVENC backend expects input to be in block-linear format.
323 |         // Since Convert Image Format algorithm doesn't currently support direct BGR
324 |         // pitch-linear (from OpenCV) to NV12 block-linear conversion, it must be done in two
325 |         // passes, first from BGR/PL to NV12/PL using CUDA, then from NV12/PL to NV12/BL using VIC.
326 |         // The temporary image buffer below will store the intermediate NV12/PL representation.
327 |         CHECK_STATUS(vpiImageCreate(width, height, imgFmt, 0, &imgPrevFrameTmp));
328 |         CHECK_STATUS(vpiImageCreate(width, height, imgFmt, 0, &imgCurFrameTmp));
329 | 
330 |         // Now create the final block-linear buffer that'll be used as input to the
331 |         // algorithm.
332 |         CHECK_STATUS(vpiImageCreate(width, height, imgFmtBL, 0, &imgPrevFrameBL));
333 |         CHECK_STATUS(vpiImageCreate(width, height, imgFmtBL, 0, &imgCurFrameBL));
334 | 
335 |         // Motion vector image width and height, align to be multiple of 4
336 |         int32_t mvWidth  = (width + 3) / 4;
337 |         int32_t mvHeight = (height + 3) / 4;
338 | 
339 | 
340 |         // Create the output motion vector buffer
341 |         CHECK_STATUS(vpiImageCreate(mvWidth, mvHeight, VPI_IMAGE_FORMAT_2S16_BL, 0, &imgMotionVecBL));
342 | 
343 |         // First convert the first frame to NV12_BL. It'll be used as previous frame when the algorithm is called.
344 |         CHECK_STATUS(vpiSubmitConvertImageFormat(stream, VPI_BACKEND_CUDA, imgPrevFramePL, imgPrevFrameTmp, nullptr));
345 |         CHECK_STATUS(vpiSubmitConvertImageFormat(stream, VPI_BACKEND_VIC, imgPrevFrameTmp, imgPrevFrameBL, nullptr));
346 | 
347 |         // Create a output image which holds the rendered motion vector image.
348 |         cv::Mat mvOutputImage;
349 | 
350 |         // Fetch a new frame until video ends
351 |         int idxFrame = 1;
352 |         int outIdxFrame = 0;
353 |         for(idxFrame = 1; idxFrame < inputFilesList.size(); idxFrame++)
354 |         {
355 |             printf("Processing frame %d\n", idxFrame);
356 |             cvCurFrame = cv::imread(inputFilesList[idxFrame]);
357 |             // Wrap frame into a VPIImage, reusing the existing imgCurFramePL.
358 |             CHECK_STATUS(vpiImageSetWrappedOpenCVMat(imgCurFramePL, cvCurFrame));
359 | 
360 |             // Convert current frame to NV12_BL format
361 |             CHECK_STATUS(vpiSubmitConvertImageFormat(stream, VPI_BACKEND_CUDA, imgCurFramePL, imgCurFrameTmp, nullptr));
362 |             CHECK_STATUS(vpiSubmitConvertImageFormat(stream, VPI_BACKEND_VIC, imgCurFrameTmp, imgCurFrameBL, nullptr));
363 | 
364 |             CHECK_STATUS(
365 |                 vpiSubmitOpticalFlowDense(stream, backend, payload, imgPrevFrameBL, imgCurFrameBL, imgMotionVecBL));
366 | 
367 |             // Wait for processing to finish.
368 |             CHECK_STATUS(vpiStreamSync(stream));
369 | 
370 |             // Render the resulting motion vector in the output image
371 |             ProcessMotionVector(imgMotionVecBL, mvOutputImage);
372 | 
373 |             // Save to output files:
374 |             WriteFlowVectors(strOuputFilesPattern, outIdxFrame++, mvOutputImage, mvWidth, mvHeight);
375 | 
376 |             // Swap previous frame and next frame
377 |             std::swap(cvPrevFrame, cvCurFrame);
378 |             std::swap(imgPrevFramePL, imgCurFramePL);
379 |             std::swap(imgPrevFrameBL, imgCurFrameBL);
380 |         }
381 |     }
382 |     catch (std::exception &e)
383 |     {
384 |         std::cerr << e.what() << std::endl;
385 |         retval = 1;
386 |     }
387 | 
388 |     // Destroy all resources used
389 |     vpiStreamDestroy(stream);
390 |     vpiPayloadDestroy(payload);
391 | 
392 |     vpiImageDestroy(imgPrevFramePL);
393 |     vpiImageDestroy(imgPrevFrameTmp);
394 |     vpiImageDestroy(imgPrevFrameBL);
395 |     vpiImageDestroy(imgCurFramePL);
396 |     vpiImageDestroy(imgCurFrameTmp);
397 |     vpiImageDestroy(imgCurFrameBL);
398 |     vpiImageDestroy(imgMotionVecBL);
399 | 
400 |     return retval;
401 | }
402 | 
403 | // vim: ts=8:sw=4:sts=4:et:ai
404 | 


--------------------------------------------------------------------------------